diff options
Diffstat (limited to 'src/plugins/memif/device.c')
-rw-r--r-- | src/plugins/memif/device.c | 281 |
1 files changed, 278 insertions, 3 deletions
diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index f049a7be38e..ff6068f8243 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -369,6 +369,270 @@ no_free_slots: return n_left; } +CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff); + memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + + dma_info->finished = 1; + vec_reset_length (ptd->buffers); + vec_reset_length (ptd->copy_ops); + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + mq->dma_info_full = 0; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 *buffers, memif_if_t *mif, + memif_ring_type_t type, memif_queue_t *mq, + u32 n_left) +{ + memif_ring_t *ring; + u32 n_copy_op; + u16 ring_size, mask, slot, free_slots; + int n_retries = 5, fallback = 0; + vlib_buffer_t *b0, *b1, *b2, *b3; + memif_copy_op_t *co; + memif_region_index_t last_region = ~0; + void *last_region_shm = 0; + u16 head, tail; + memif_dma_info_t *dma_info; + memif_per_thread_data_t *ptd; + memif_main_t *mm = &memif_main; + u16 mif_id = mif - mm->interfaces; + + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + dma_info = mq->dma_info + mq->dma_info_tail; + ptd = &dma_info->data; + + /* do software fallback if dma info ring is full */ + u16 dma_mask = mq->dma_info_size - 1; + if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) || + ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0))) + { + if (!mq->dma_info_full) + mq->dma_info_full = 1; + else + fallback = 1; + } + + vlib_dma_batch_t *b = NULL; + if (PREDICT_TRUE (!fallback)) + b = vlib_dma_batch_new (vm, mif->dma_tx_config); + if (!b) + return n_left; + +retry: + + slot = tail = mq->dma_tail; + head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE); + mq->last_tail += tail - mq->last_tail; + free_slots = head - mq->dma_tail; + + while (n_left && free_slots) + { + memif_desc_t *d0; + void *mb0; + i32 src_off; + u32 bi0, dst_off, src_left, dst_left, bytes_to_copy; + u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops); + u32 saved_ptd_buffers_len = _vec_len (ptd->buffers); + u16 saved_slot = slot; + + clib_prefetch_load (&ring->desc[(slot + 8) & mask]); + + d0 = &ring->desc[slot & mask]; + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + + dst_off = 0; + + /* slave is the producer, so it should be able to reset buffer length */ + dst_left = d0->length; + + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + bi0 = buffers[0]; + + next_in_chain: + + b0 = vlib_get_buffer (vm, bi0); + src_off = b0->current_data; + src_left = b0->current_length; + + while (src_left) + { + if (PREDICT_FALSE (dst_left == 0)) + { + if (free_slots) + { + d0->length = dst_off; + d0->flags = MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size : + d0->length; + + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + } + else + { + /* we need to rollback vectors before bailing out */ + vec_set_len (ptd->buffers, saved_ptd_buffers_len); + vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len); + vlib_error_count (vm, node->node_index, + MEMIF_TX_ERROR_ROLLBACK, 1); + slot = saved_slot; + goto no_free_slots; + } + } + bytes_to_copy = clib_min (src_left, dst_left); + memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off, + vec_len (ptd->buffers)); + src_off += bytes_to_copy; + dst_off += bytes_to_copy; + src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + slot++; + free_slots--; + bi0 = b0->next_buffer; + goto next_in_chain; + } + + vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES); + d0->length = dst_off; + d0->flags = 0; + + free_slots -= 1; + slot += 1; + + buffers++; + n_left--; + } +no_free_slots: + + /* copy data */ + n_copy_op = vec_len (ptd->copy_ops); + co = ptd->copy_ops; + while (n_copy_op >= 8) + { + clib_prefetch_load (co[4].data); + clib_prefetch_load (co[5].data); + clib_prefetch_load (co[6].data); + clib_prefetch_load (co[7].data); + + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]); + b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); + b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_add (vm, b, co[0].data, + b0->data + co[0].buffer_offset, co[0].data_len); + vlib_dma_batch_add (vm, b, co[1].data, + b1->data + co[1].buffer_offset, co[1].data_len); + vlib_dma_batch_add (vm, b, co[2].data, + b2->data + co[2].buffer_offset, co[2].data_len); + vlib_dma_batch_add (vm, b, co[3].data, + b3->data + co[3].buffer_offset, co[3].data_len); + } + else + { + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset, + co[1].data_len); + clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset, + co[2].data_len); + clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset, + co[3].data_len); + } + + co += 4; + n_copy_op -= 4; + } + while (n_copy_op) + { + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + if (PREDICT_TRUE (!fallback)) + vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + else + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + co += 1; + n_copy_op -= 1; + } + + /* save dma info before retry */ + dma_info->dma_tail = slot; + mq->dma_tail = slot; + vec_reset_length (ptd->copy_ops); + + if (n_left && n_retries--) + goto retry; + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_set_cookie (vm, b, + (mif_id << 16) | (mq - mif->tx_queues)); + vlib_dma_batch_submit (vm, b); + dma_info->finished = 0; + + if (b->n_enq) + { + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + } + } + else if (fallback && dma_info->finished) + { + /* if dma has been completed, update ring immediately */ + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + vec_reset_length (ptd->buffers); + __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE); + } + + return n_left; +} + VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -399,8 +663,14 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M, mq, ptd, n_left); else - n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S, - mq, ptd, n_left); + { + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + n_left = memif_interface_tx_dma_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, n_left); + else + n_left = memif_interface_tx_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, ptd, n_left); + } if (tf->shared_queue) clib_spinlock_unlock (&mq->lockp); @@ -416,7 +686,12 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, mq->int_count++; } - if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + { + if (n_left) + vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); + } + else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) vlib_buffer_free (vm, from, frame->n_vectors); else if (n_left) vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); |