diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/dma_intel/CMakeLists.txt | 9 | ||||
-rw-r--r-- | src/plugins/dma_intel/dsa.c | 424 | ||||
-rw-r--r-- | src/plugins/dma_intel/dsa_intel.h | 160 | ||||
-rw-r--r-- | src/plugins/dma_intel/format.c | 15 | ||||
-rw-r--r-- | src/plugins/dma_intel/main.c | 272 | ||||
-rw-r--r-- | src/plugins/dpdk/device/init.c | 10 | ||||
-rw-r--r-- | src/vlib/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/vlib/dma/cli.c | 160 | ||||
-rw-r--r-- | src/vlib/dma/dma.c | 82 | ||||
-rw-r--r-- | src/vlib/dma/dma.h | 131 | ||||
-rw-r--r-- | src/vlib/dma/dma.rst | 70 | ||||
-rw-r--r-- | src/vpp/conf/startup.conf | 6 |
12 files changed, 1341 insertions, 0 deletions
diff --git a/src/plugins/dma_intel/CMakeLists.txt b/src/plugins/dma_intel/CMakeLists.txt new file mode 100644 index 00000000000..8fd8cd1933f --- /dev/null +++ b/src/plugins/dma_intel/CMakeLists.txt @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2022 Cisco Systems, Inc. + +add_vpp_plugin(dma_intel + SOURCES + dsa.c + format.c + main.c +) diff --git a/src/plugins/dma_intel/dsa.c b/src/plugins/dma_intel/dsa.c new file mode 100644 index 00000000000..7e1cdc2de01 --- /dev/null +++ b/src/plugins/dma_intel/dsa.c @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + * Copyright (c) 2022 Intel and/or its affiliates. + */ + +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vlib/dma/dma.h> +#include <vppinfra/heap.h> +#include <vppinfra/atomics.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <dma_intel/dsa_intel.h> + +extern vlib_node_registration_t intel_dsa_node; + +VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = { + .class_name = "intel_dsa", + .subclass_name = "dsa", +}; + +static void +intel_dsa_channel_lock (intel_dsa_channel_t *ch) +{ + u8 expected = 0; + if (ch->n_threads < 2) + return; + + /* channel is used by multiple threads so we need to lock it */ + while (!__atomic_compare_exchange_n (&ch->lock, &expected, + /* desired */ 1, /* weak */ 0, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) + { + while (__atomic_load_n (&ch->lock, __ATOMIC_RELAXED)) + CLIB_PAUSE (); + expected = 0; + } +} + +static void +intel_dsa_channel_unlock (intel_dsa_channel_t *ch) +{ + if (ch->n_threads < 2) + return; + + __atomic_store_n (&ch->lock, 0, __ATOMIC_RELEASE); +} + +static vlib_dma_batch_t * +intel_dsa_batch_new (vlib_main_t *vm, struct vlib_dma_config_data *cd) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + intel_dsa_config_t *idc; + intel_dsa_batch_t *b; + + idc = vec_elt_at_index (idm->dsa_config_heap, + cd->private_data + vm->thread_index); + + if (vec_len (idc->freelist) > 0) + b = vec_pop (idc->freelist); + else + { + clib_spinlock_lock (&idm->lock); + b = vlib_physmem_alloc (vm, idc->alloc_size); + clib_spinlock_unlock (&idm->lock); + /* if no free space in physmem, force quit */ + ASSERT (b != NULL); + *b = idc->batch_template; + b->max_transfers = idc->max_transfers; + + u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) | + INTEL_DSA_FLAG_CACHE_CONTROL; + if (b->ch->block_on_fault) + def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT; + for (int i = 0; i < idc->max_transfers; i++) + { + intel_dsa_desc_t *dsa_desc = b->descs + i; + dsa_desc->op_flags = def_flags; + } + } + + return &b->batch; +} + +#if defined(__x86_64__) || defined(i386) +static_always_inline void +__movdir64b (volatile void *dst, const void *src) +{ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : + : "a"(dst), "d"(src) + : "memory"); +} +#endif + +static_always_inline void +intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b, + intel_dsa_channel_t *ch) +{ + for (u16 i = 0; i < b->batch.n_enq; i++) + { + intel_dsa_desc_t *desc = &b->descs[i]; + clib_memcpy_fast (desc->dst, desc->src, desc->size); + } + b->status = INTEL_DSA_STATUS_CPU_SUCCESS; + ch->submitted++; + return; +} + +int +intel_dsa_batch_submit (vlib_main_t *vm, struct vlib_dma_batch *vb) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + intel_dsa_batch_t *b = (intel_dsa_batch_t *) vb; + intel_dsa_channel_t *ch = b->ch; + if (PREDICT_FALSE (vb->n_enq == 0)) + { + vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b); + return 0; + } + + intel_dsa_channel_lock (ch); + if (ch->n_enq >= ch->size) + { + if (!b->sw_fallback) + { + intel_dsa_channel_unlock (ch); + return 0; + } + /* skip channel limitation if first pending finished */ + intel_dsa_batch_t *lb = NULL; + u32 n_pendings = + vec_len (idm->dsa_threads[vm->thread_index].pending_batches); + if (n_pendings) + lb = + idm->dsa_threads[vm->thread_index].pending_batches[n_pendings - 1]; + + if (!lb || lb->status != INTEL_DSA_STATUS_SUCCESS) + { + intel_dsa_batch_fallback (vm, b, ch); + goto done; + } + } + + b->status = INTEL_DSA_STATUS_BUSY; + if (PREDICT_FALSE (vb->n_enq == 1)) + { + intel_dsa_desc_t *desc = &b->descs[0]; + desc->completion = (u64) &b->completion_cl; + desc->op_flags |= INTEL_DSA_FLAG_COMPLETION_ADDR_VALID | + INTEL_DSA_FLAG_REQUEST_COMPLETION; +#if defined(__x86_64__) || defined(i386) + _mm_sfence (); /* fence before writing desc to device */ + __movdir64b (ch->portal, (void *) desc); +#endif + } + else + { + intel_dsa_desc_t *batch_desc = &b->descs[b->max_transfers]; + batch_desc->op_flags = (INTEL_DSA_OP_BATCH << INTEL_DSA_OP_SHIFT) | + INTEL_DSA_FLAG_COMPLETION_ADDR_VALID | + INTEL_DSA_FLAG_REQUEST_COMPLETION; + batch_desc->desc_addr = (void *) (b->descs); + batch_desc->size = vb->n_enq; + batch_desc->completion = (u64) &b->completion_cl; +#if defined(__x86_64__) || defined(i386) + _mm_sfence (); /* fence before writing desc to device */ + __movdir64b (ch->portal, (void *) batch_desc); +#endif + } + + ch->submitted++; + ch->n_enq++; + +done: + intel_dsa_channel_unlock (ch); + vec_add1 (idm->dsa_threads[vm->thread_index].pending_batches, b); + vlib_node_set_interrupt_pending (vm, intel_dsa_node.index); + return 1; +} + +static int +intel_dsa_check_channel (intel_dsa_channel_t *ch, vlib_dma_config_data_t *cd) +{ + if (!ch) + { + dsa_log_error ("no available dsa channel"); + return 1; + } + vlib_dma_config_t supported_cfg = { + .barrier_before_last = 1, + .sw_fallback = 1, + }; + + if (cd->cfg.features & ~supported_cfg.features) + { + dsa_log_error ("unsupported feature requested"); + return 1; + } + + if (cd->cfg.max_transfers > ch->max_transfers) + { + dsa_log_error ("transfer number (%u) too big", cd->cfg.max_transfers); + return 1; + } + + if (cd->cfg.max_transfer_size > ch->max_transfer_size) + { + dsa_log_error ("transfer size (%u) too big", cd->cfg.max_transfer_size); + return 1; + } + return 0; +} + +static int +intel_dsa_config_add_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + intel_dsa_config_t *idc; + u32 index, n_threads = vlib_get_n_threads (); + + vec_validate (idm->dsa_config_heap_handle_by_config_index, cd->config_index); + index = heap_alloc_aligned ( + idm->dsa_config_heap, n_threads, CLIB_CACHE_LINE_BYTES, + idm->dsa_config_heap_handle_by_config_index[cd->config_index]); + + cd->batch_new_fn = intel_dsa_batch_new; + cd->private_data = index; + + for (u32 thread = 0; thread < n_threads; thread++) + { + intel_dsa_batch_t *idb; + vlib_dma_batch_t *b; + idc = vec_elt_at_index (idm->dsa_config_heap, index + thread); + + /* size of physmem allocation for this config */ + idc->max_transfers = cd->cfg.max_transfers; + idc->alloc_size = sizeof (intel_dsa_batch_t) + + sizeof (intel_dsa_desc_t) * (idc->max_transfers + 1); + /* fill batch template */ + idb = &idc->batch_template; + idb->ch = idm->dsa_threads[thread].ch; + if (intel_dsa_check_channel (idb->ch, cd)) + return 0; + + dsa_log_debug ("config %d in thread %d using channel %u/%u", + cd->config_index, thread, idb->ch->did, idb->ch->qid); + idb->config_heap_index = index + thread; + idb->config_index = cd->config_index; + idb->batch.callback_fn = cd->cfg.callback_fn; + idb->features = cd->cfg.features; + b = &idb->batch; + b->stride = sizeof (intel_dsa_desc_t); + b->src_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].src); + b->dst_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].dst); + b->size_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].size); + b->submit_fn = intel_dsa_batch_submit; + dsa_log_debug ( + "config %d in thread %d stride %d src/dst/size offset %d-%d-%d", + cd->config_index, thread, b->stride, b->src_ptr_off, b->dst_ptr_off, + b->size_off); + } + + dsa_log_info ("config %u added", cd->private_data); + + return 1; +} + +static void +intel_dsa_config_del_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + intel_dsa_thread_t *t = + vec_elt_at_index (idm->dsa_threads, vm->thread_index); + u32 n_pending, n_threads, config_heap_index, n = 0; + n_threads = vlib_get_n_threads (); + + if (!t->pending_batches) + goto free_heap; + + n_pending = vec_len (t->pending_batches); + intel_dsa_batch_t *b; + + /* clean pending list and free list */ + for (u32 i = 0; i < n_pending; i++) + { + b = t->pending_batches[i]; + if (b->config_index == cd->config_index) + { + vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b); + if (b->status == INTEL_DSA_STATUS_SUCCESS || + b->status == INTEL_DSA_STATUS_BUSY) + b->ch->n_enq--; + } + else + t->pending_batches[n++] = b; + } + + vec_set_len (t->pending_batches, n); + +free_heap: + for (u32 thread = 0; thread < n_threads; thread++) + { + config_heap_index = cd->private_data + thread; + while (vec_len (idm->dsa_config_heap[config_heap_index].freelist) > 0) + { + b = vec_pop (idm->dsa_config_heap[config_heap_index].freelist); + vlib_physmem_free (vm, b); + } + } + + heap_dealloc (idm->dsa_config_heap, + idm->dsa_config_heap_handle_by_config_index[cd->config_index]); + + dsa_log_debug ("config %u removed", cd->private_data); +} + +static uword +intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + intel_dsa_thread_t *t = + vec_elt_at_index (idm->dsa_threads, vm->thread_index); + u32 n_pending = 0, n = 0; + u8 glitch = 0; + + if (!t->pending_batches) + return 0; + + n_pending = vec_len (t->pending_batches); + + for (u32 i = 0; i < n_pending; i++) + { + intel_dsa_batch_t *b = t->pending_batches[i]; + intel_dsa_channel_t *ch = b->ch; + + if ((b->status == INTEL_DSA_STATUS_SUCCESS || + b->status == INTEL_DSA_STATUS_CPU_SUCCESS) && + !glitch) + { + /* callback */ + if (b->batch.callback_fn) + b->batch.callback_fn (vm, &b->batch); + + /* restore last descriptor fields */ + if (b->batch.n_enq == 1) + { + b->descs[0].completion = 0; + b->descs[0].op_flags = + (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) | + INTEL_DSA_FLAG_CACHE_CONTROL; + if (b->ch->block_on_fault) + b->descs[0].op_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT; + } + /* add to freelist */ + vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b); + + intel_dsa_channel_lock (ch); + if (b->status == INTEL_DSA_STATUS_SUCCESS) + { + ch->n_enq--; + ch->completed++; + } + else + ch->sw_fallback++; + intel_dsa_channel_unlock (ch); + + b->batch.n_enq = 0; + b->status = INTEL_DSA_STATUS_IDLE; + } + else if (b->status == INTEL_DSA_STATUS_BUSY) + { + glitch = 1 & b->barrier_before_last; + t->pending_batches[n++] = b; + } + else if (!glitch) + { + /* fallback to software if exception happened */ + intel_dsa_batch_fallback (vm, b, ch); + glitch = 1 & b->barrier_before_last; + } + else + { + t->pending_batches[n++] = b; + } + } + vec_set_len (t->pending_batches, n); + + if (n) + { + vlib_node_set_interrupt_pending (vm, intel_dsa_node.index); + } + + return n_pending - n; +} + +u8 * +format_dsa_info (u8 *s, va_list *args) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + vlib_main_t *vm = va_arg (*args, vlib_main_t *); + intel_dsa_channel_t *ch; + ch = idm->dsa_threads[vm->thread_index].ch; + s = format (s, "thread %d dma %u/%u request %-16lld hw %-16lld cpu %-16lld", + vm->thread_index, ch->did, ch->qid, ch->submitted, ch->completed, + ch->sw_fallback); + return s; +} + +VLIB_REGISTER_NODE (intel_dsa_node) = { + .function = intel_dsa_node_fn, + .name = "intel-dsa", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = 4, +}; + +vlib_dma_backend_t intel_dsa_backend = { + .name = "Intel DSA", + .config_add_fn = intel_dsa_config_add_fn, + .config_del_fn = intel_dsa_config_del_fn, + .info_fn = format_dsa_info, +}; diff --git a/src/plugins/dma_intel/dsa_intel.h b/src/plugins/dma_intel/dsa_intel.h new file mode 100644 index 00000000000..a52d4bff323 --- /dev/null +++ b/src/plugins/dma_intel/dsa_intel.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Intel and/or its affiliates. + */ + +#ifndef __dma_intel_dsa_intel_h__ +#define __dma_intel_dsa_intel_h__ + +#include <vlib/vlib.h> +#include <vlib/dma/dma.h> +#include <vlib/pci/pci.h> +#include <vppinfra/format.h> +typedef struct +{ + u32 pasid; + u32 op_flags; + u64 completion; + union + { + void *src; + void *desc_addr; + }; + void *dst; + u32 size; + u16 intr_handle; + /* remaining 26 bytes are reserved */ + u16 __reserved[13]; +} intel_dsa_desc_t; + +STATIC_ASSERT_SIZEOF (intel_dsa_desc_t, 64); + +#define DSA_DEV_PATH "/dev/dsa" +#define SYS_DSA_PATH "/sys/bus/dsa/devices" + +typedef enum +{ + INTEL_DSA_DEVICE_TYPE_UNKNOWN, + INTEL_DSA_DEVICE_TYPE_KERNEL, + INTEL_DSA_DEVICE_TYPE_USER, + INTEL_DSA_DEVICE_TYPE_MDEV, +} intel_dsa_wq_type_t; + +enum dsa_ops +{ + INTEL_DSA_OP_NOP = 0, + INTEL_DSA_OP_BATCH, + INTEL_DSA_OP_DRAIN, + INTEL_DSA_OP_MEMMOVE, + INTEL_DSA_OP_FILL +}; +#define INTEL_DSA_OP_SHIFT 24 +#define INTEL_DSA_FLAG_FENCE (1 << 0) +#define INTEL_DSA_FLAG_BLOCK_ON_FAULT (1 << 1) +#define INTEL_DSA_FLAG_COMPLETION_ADDR_VALID (1 << 2) +#define INTEL_DSA_FLAG_REQUEST_COMPLETION (1 << 3) +#define INTEL_DSA_FLAG_CACHE_CONTROL (1 << 8) + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile void *portal; /* portal exposed by dedicated work queue */ + u64 submitted; + u64 completed; + u64 sw_fallback; + u32 max_transfer_size; /* maximum size of each transfer */ + u16 max_transfers; /* maximum number referenced in a batch */ + u16 n_threads; /* number of threads using this channel */ + u16 n_enq; /* number of batches currently enqueued */ + union + { + u16 wq_control; + struct + { + u16 type : 2; + u16 state : 1; + u16 ats_disable : 1; + u16 block_on_fault : 1; + u16 mode : 1; + }; + }; + u8 lock; /* spinlock, only used if m_threads > 1 */ + u8 numa; /* numa node */ + u8 size; /* size of work queue */ + u8 did; /* dsa device id */ + u8 qid; /* work queue id */ +} intel_dsa_channel_t; + +typedef struct intel_dsa_batch +{ + CLIB_CACHE_LINE_ALIGN_MARK (start); + vlib_dma_batch_t batch; /* must be first */ + intel_dsa_channel_t *ch; + u32 config_heap_index; + u32 max_transfers; + u32 config_index; + union + { + struct + { + u32 barrier_before_last : 1; + u32 sw_fallback : 1; + }; + u32 features; + }; + CLIB_CACHE_LINE_ALIGN_MARK (completion_cl); +#define INTEL_DSA_STATUS_IDLE 0x0 +#define INTEL_DSA_STATUS_SUCCESS 0x1 +#define INTEL_DSA_STATUS_BUSY 0xa +#define INTEL_DSA_STATUS_CPU_SUCCESS 0xb + u8 status; + /* to avoid read-modify-write completion is written as 64-byte + * DMA FILL operation */ + CLIB_CACHE_LINE_ALIGN_MARK (descriptors); + intel_dsa_desc_t descs[0]; +} intel_dsa_batch_t; + +STATIC_ASSERT_OFFSET_OF (intel_dsa_batch_t, batch, 0); + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + intel_dsa_batch_t batch_template; + u32 alloc_size; + u32 max_transfers; + intel_dsa_batch_t **freelist; +} intel_dsa_config_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + intel_dsa_channel_t *ch; /* channel used by this thread */ + intel_dsa_batch_t **pending_batches; +} intel_dsa_thread_t; + +typedef struct +{ + intel_dsa_channel_t ***channels; + intel_dsa_thread_t *dsa_threads; + intel_dsa_config_t *dsa_config_heap; + uword *dsa_config_heap_handle_by_config_index; + /* spin lock protect pmem */ + clib_spinlock_t lock; +} intel_dsa_main_t; + +extern intel_dsa_main_t intel_dsa_main; +extern vlib_dma_backend_t intel_dsa_backend; +format_function_t format_intel_dsa_addr; + +#define dsa_log_debug(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, intel_dsa_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +#define dsa_log_info(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_INFO, intel_dsa_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +#define dsa_log_error(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, intel_dsa_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +#endif diff --git a/src/plugins/dma_intel/format.c b/src/plugins/dma_intel/format.c new file mode 100644 index 00000000000..b05a06fb3b1 --- /dev/null +++ b/src/plugins/dma_intel/format.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Intel and/or its affiliates. + */ +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vlib/dma/dma.h> +#include <vnet/plugin/plugin.h> +#include <dma_intel/dsa_intel.h> + +u8 * +format_intel_dsa_addr (u8 *s, va_list *va) +{ + intel_dsa_channel_t *ch = va_arg (*va, intel_dsa_channel_t *); + return format (s, "wq%d.%d", ch->did, ch->qid); +} diff --git a/src/plugins/dma_intel/main.c b/src/plugins/dma_intel/main.c new file mode 100644 index 00000000000..4e9df04ea9e --- /dev/null +++ b/src/plugins/dma_intel/main.c @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + * Copyright (c) 2022 Intel and/or its affiliates. + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vlib/dma/dma.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <vppinfra/linux/sysfs.h> +#include <dma_intel/dsa_intel.h> + +VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = { + .class_name = "intel_dsa", +}; + +intel_dsa_main_t intel_dsa_main; + +void +intel_dsa_assign_channels (vlib_main_t *vm) +{ + intel_dsa_main_t *idm = &intel_dsa_main; + intel_dsa_channel_t *ch, **chv = 0; + u16 n_threads; + int n; + + vec_foreach_index (n, idm->channels) + vec_append (chv, idm->channels[n]); + + vec_validate (idm->dsa_threads, vlib_get_n_threads () - 1); + + if (vec_len (chv) == 0) + { + dsa_log_debug ("No DSA channels found"); + goto done; + } + + if (vec_len (chv) >= vlib_get_n_threads ()) + n_threads = 1; + else + n_threads = vlib_get_n_threads () % vec_len (chv) ? + vlib_get_n_threads () / vec_len (chv) + 1 : + vlib_get_n_threads () / vec_len (chv); + + for (int i = 0; i < vlib_get_n_threads (); i++) + { + vlib_main_t *tvm = vlib_get_main_by_index (i); + ch = *vec_elt_at_index (chv, i / n_threads); + idm->dsa_threads[i].ch = ch; + ch->n_threads = n_threads; + dsa_log_debug ("Assigning channel %u/%u to thread %u (numa %u)", ch->did, + ch->qid, i, tvm->numa_node); + } + +done: + /* free */ + vec_free (chv); +} + +static clib_error_t * +intel_dsa_map_region (intel_dsa_channel_t *ch) +{ + static clib_error_t *error = NULL; + /* map one page */ + uword size = 0x1000; + uword offset = 0; + char path[256] = { 0 }; + + snprintf (path, sizeof (path), "%s/wq%d.%d", DSA_DEV_PATH, ch->did, ch->qid); + int fd = open (path, O_RDWR); + if (fd < 0) + return clib_error_return (0, "failed to open dsa device %s", path); + + ch->portal = + clib_mem_vm_map_shared (0, size, fd, offset, "%s", (char *) path); + if (ch->portal == CLIB_MEM_VM_MAP_FAILED) + { + error = clib_error_return (0, "mmap portal %s failed", path); + close (fd); + return error; + } + + return NULL; +} + +static clib_error_t * +intel_dsa_get_info (intel_dsa_channel_t *ch, clib_error_t **error) +{ + clib_error_t *err; + u8 *tmpstr; + u8 *dev_dir_name = 0, *wq_dir_name = 0; + + u8 *f = 0; + dev_dir_name = format (0, "%s/dsa%d", SYS_DSA_PATH, ch->did); + + vec_reset_length (f); + f = format (f, "%v/numa_node%c", dev_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + ch->numa = atoi ((char *) tmpstr); + + wq_dir_name = format (0, "%s/%U", SYS_DSA_PATH, format_intel_dsa_addr, ch); + + vec_reset_length (f); + f = format (f, "%v/max_transfer_size%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + ch->max_transfer_size = atoi ((char *) tmpstr); + + vec_reset_length (f); + f = format (f, "%v/max_batch_size%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + ch->max_transfers = atoi ((char *) tmpstr); + + vec_reset_length (f); + f = format (f, "%v/size%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + ch->size = atoi ((char *) tmpstr); + + vec_reset_length (f); + f = format (f, "%v/type%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + if (tmpstr) + { + if (!clib_strcmp ((char *) tmpstr, "enabled")) + ch->type = INTEL_DSA_DEVICE_TYPE_UNKNOWN; + else if (!clib_strcmp ((char *) tmpstr, "user")) + ch->type = INTEL_DSA_DEVICE_TYPE_USER; + else if (!clib_strcmp ((char *) tmpstr, "mdev")) + ch->type = INTEL_DSA_DEVICE_TYPE_KERNEL; + else + ch->type = INTEL_DSA_DEVICE_TYPE_UNKNOWN; + vec_free (tmpstr); + } + + vec_reset_length (f); + f = format (f, "%v/state%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + if (tmpstr) + { + if (!clib_strcmp ((char *) tmpstr, "enabled")) + ch->state = 1; + else + ch->state = 0; + vec_free (tmpstr); + } + + vec_reset_length (f); + f = format (f, "%v/ats_disable%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + ch->ats_disable = atoi ((char *) tmpstr); + + vec_reset_length (f); + f = format (f, "%v/block_on_fault%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + ch->block_on_fault = atoi ((char *) tmpstr); + + vec_reset_length (f); + f = format (f, "%v/mode%c", wq_dir_name, 0); + err = clib_sysfs_read ((char *) f, "%s", &tmpstr); + if (err) + goto error; + if (tmpstr) + { + if (!clib_strcmp ((char *) tmpstr, "dedicated")) + ch->mode = 1; + else + ch->mode = 0; + vec_free (tmpstr); + } + + vec_free (f); + vec_free (dev_dir_name); + vec_free (wq_dir_name); + return NULL; + +error: + vec_free (f); + vec_free (dev_dir_name); + vec_free (wq_dir_name); + + return err; +} + +clib_error_t * +intel_dsa_add_channel (vlib_main_t *vm, intel_dsa_channel_t *ch) +{ + intel_dsa_main_t *dm = &intel_dsa_main; + clib_error_t *err = 0; + + if (intel_dsa_map_region (ch)) + return clib_error_return (0, "dsa open device failed"); + + if (intel_dsa_get_info (ch, &err)) + return clib_error_return (err, "dsa info not scanned"); + + vec_validate (dm->channels, ch->numa); + vec_add1 (dm->channels[ch->numa], ch); + + return err; +} + +static clib_error_t * +dsa_config (vlib_main_t *vm, unformat_input_t *input) +{ + clib_error_t *error = 0; + intel_dsa_channel_t *ch; + u8 did, qid; + + if (intel_dsa_main.lock == 0) + clib_spinlock_init (&(intel_dsa_main.lock)); + + if ((error = vlib_dma_register_backend (vm, &intel_dsa_backend))) + goto done; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "dev wq%d.%d", &did, &qid)) + { + ch = clib_mem_alloc_aligned (sizeof (*ch), CLIB_CACHE_LINE_BYTES); + clib_memset (ch, 0, sizeof (*ch)); + ch->did = did; + ch->qid = qid; + if (intel_dsa_add_channel (vm, ch)) + clib_mem_free (ch); + } + else if (unformat_skip_white_space (input)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + +done: + return error; +} + +VLIB_CONFIG_FUNCTION (dsa_config, "dsa"); + +clib_error_t * +intel_dsa_num_workers_change (vlib_main_t *vm) +{ + intel_dsa_assign_channels (vm); + return 0; +} + +VLIB_NUM_WORKERS_CHANGE_FN (intel_dsa_num_workers_change); + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Intel DSA Backend", +}; diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 4863c8c12b6..23a26a4157b 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -987,6 +987,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) int eal_no_hugetlb = 0; u8 no_pci = 0; u8 no_vmbus = 0; + u8 no_dsa = 0; u8 file_prefix = 0; u8 *socket_mem = 0; u8 *huge_dir_path = 0; @@ -1095,6 +1096,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) tmp = format (0, "--no-pci%c", 0); vec_add1 (conf->eal_init_args, tmp); } + else if (unformat (input, "no-dsa")) + no_dsa = 1; else if (unformat (input, "blacklist %U", unformat_vlib_vmbus_addr, &vmbus_addr)) { @@ -1304,6 +1307,13 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vm = vlib_get_main (); + if (no_dsa) + { + struct rte_bus *bus; + bus = rte_bus_find_by_name ("dsa"); + if (bus) + rte_bus_unregister (bus); + } /* make copy of args as rte_eal_init tends to mess up with arg array */ for (i = 1; i < vec_len (conf->eal_init_args); i++) conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt index bb43abdc63e..69bd18b6db6 100644 --- a/src/vlib/CMakeLists.txt +++ b/src/vlib/CMakeLists.txt @@ -105,6 +105,8 @@ add_vpp_library(vlib unix/util.c vmbus/vmbus.c ${VMBUS_SOURCE} + dma/dma.c + dma/cli.c MULTIARCH_SOURCES buffer_funcs.c diff --git a/src/vlib/dma/cli.c b/src/vlib/dma/cli.c new file mode 100644 index 00000000000..0ca4a9b6778 --- /dev/null +++ b/src/vlib/dma/cli.c @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/physmem_funcs.h> +#include <vlib/dma/dma.h> + +static clib_error_t * +show_dma_backends_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + + if (vec_len (dm->backends)) + { + vlib_dma_backend_t *b; + vec_foreach (b, dm->backends) + vlib_cli_output (vm, "%s", b->name); + } + else + vlib_cli_output (vm, "No active DMA backends"); + + return 0; +} + +VLIB_CLI_COMMAND (avf_create_command, static) = { + .path = "show dma backends", + .short_help = "show dma backends", + .function = show_dma_backends_command_fn, +}; + +static void +test_dma_cb_fn (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + fformat (stderr, "%s: cb %p cookie %lx\n", __func__, b, + vlib_dma_batch_get_cookie (vm, b)); +} + +static clib_error_t * +fill_random_data (void *buffer, uword size) +{ + uword seed = random_default_seed (); + + uword remain = size; + const uword p = clib_mem_get_page_size (); + uword offset = 0; + + clib_random_buffer_t rb; + clib_random_buffer_init (&rb, seed); + + while (remain > 0) + { + uword fill_size = clib_min (p, remain); + + clib_random_buffer_fill (&rb, fill_size); + void *rbuf = clib_random_buffer_get_data (&rb, fill_size); + clib_memcpy_fast (buffer + offset, rbuf, fill_size); + clib_random_buffer_free (&rb); + + offset += fill_size; + remain -= fill_size; + } + + return 0; +} + +static clib_error_t * +test_dma_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t *err = 0; + vlib_dma_batch_t *b; + int config_index = -1; + u32 rsz, n_alloc, v; + u8 *from = 0, *to = 0; + vlib_dma_config_t cfg = { .max_transfers = 256, + .max_transfer_size = 4096, + .callback_fn = test_dma_cb_fn }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "transfers %u", &v)) + cfg.max_transfers = v; + else if (unformat (input, "size %u", &v)) + cfg.max_transfer_size = v; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if ((config_index = vlib_dma_config_add (vm, &cfg)) < 0) + { + err = clib_error_return (0, "Unable to allocate dma config"); + return err; + } + + rsz = round_pow2 (cfg.max_transfer_size, CLIB_CACHE_LINE_BYTES); + n_alloc = rsz * cfg.max_transfers * 2; + + if ((from = vlib_physmem_alloc_aligned_on_numa ( + vm, n_alloc, CLIB_CACHE_LINE_BYTES, vm->numa_node)) == 0) + { + err = clib_error_return (0, "Unable to allocate %u bytes of physmem", + n_alloc); + return err; + } + to = from + n_alloc / 2; + + u32 port_allocator_seed; + + fill_random_data (from, cfg.max_transfers * rsz); + + b = vlib_dma_batch_new (vm, config_index); + vlib_dma_batch_set_cookie (vm, b, 0x12345678); + + port_allocator_seed = clib_cpu_time_now (); + int transfers = random_u32 (&port_allocator_seed) % cfg.max_transfers; + if (!transfers) + transfers = 1; + for (int i = 0; i < transfers; i++) + vlib_dma_batch_add (vm, b, to + i * rsz, from + i * rsz, + cfg.max_transfer_size); + + vlib_dma_batch_submit (vm, b); + return err; +} + +static clib_error_t * +test_show_dma_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t *err = 0; + int config_index = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "config %u", &config_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + for (u32 i = 0; i < vlib_get_n_threads (); i++) + vlib_cli_output (vm, "Config %d %U", config_index, vlib_dma_config_info, + config_index, vlib_get_main_by_index (i)); + return err; +} + +VLIB_CLI_COMMAND (test_dma_command, static) = { + .path = "test dma", + .short_help = "test dma [transfers <x> size <x>]", + .function = test_dma_command_fn, +}; + +VLIB_CLI_COMMAND (show_dma_command, static) = { + .path = "show dma", + .short_help = "show dma [config <x>]", + .function = test_show_dma_fn, +}; diff --git a/src/vlib/dma/dma.c b/src/vlib/dma/dma.c new file mode 100644 index 00000000000..cc8ed94ea8f --- /dev/null +++ b/src/vlib/dma/dma.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/log.h> +#include <vlib/dma/dma.h> + +VLIB_REGISTER_LOG_CLASS (dma_log) = { + .class_name = "dma", +}; + +vlib_dma_main_t vlib_dma_main = {}; + +clib_error_t * +vlib_dma_register_backend (vlib_main_t *vm, vlib_dma_backend_t *b) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vec_add1 (dm->backends, *b); + dma_log_info ("backend '%s' registered", b->name); + return 0; +} + +int +vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *c) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vlib_dma_backend_t *b; + vlib_dma_config_data_t *cd; + + pool_get_zero (dm->configs, cd); + cd->config_index = cd - dm->configs; + + clib_memcpy (&cd->cfg, c, sizeof (vlib_dma_config_t)); + + vec_foreach (b, dm->backends) + { + dma_log_info ("calling '%s' config_add_fn", b->name); + if (b->config_add_fn (vm, cd)) + { + dma_log_info ("config %u added into backend %s", cd - dm->configs, + b->name); + cd->backend_index = b - dm->backends; + return cd - dm->configs; + } + } + + pool_put (dm->configs, cd); + return -1; +} + +void +vlib_dma_config_del (vlib_main_t *vm, u32 config_index) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index); + vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index); + + if (b->config_del_fn) + b->config_del_fn (vm, cd); + + pool_put (dm->configs, cd); + dma_log_info ("config %u deleted from backend %s", config_index, b->name); +} + +u8 * +vlib_dma_config_info (u8 *s, va_list *args) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + int config_index = va_arg (*args, int); + u32 len = pool_elts (dm->configs); + if (config_index >= len) + return format (s, "%s", "not found"); + vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index); + + vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index); + + if (b->info_fn) + return b->info_fn (s, args); + + return 0; +} diff --git a/src/vlib/dma/dma.h b/src/vlib/dma/dma.h new file mode 100644 index 00000000000..eaeba8b13b7 --- /dev/null +++ b/src/vlib/dma/dma.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + */ + +#ifndef included_vlib_dma_h +#define included_vlib_dma_h +#include <vlib/vlib.h> + +#define dma_log_debug(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, dma_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +#define dma_log_info(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_INFO, dma_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +struct vlib_dma_batch; +struct vlib_dma_config_data; + +typedef int (vlib_dma_config_add_fn) (vlib_main_t *vm, + struct vlib_dma_config_data *cfg); +typedef void (vlib_dma_config_del_fn) (vlib_main_t *vm, + struct vlib_dma_config_data *cfg); +typedef struct vlib_dma_batch *(vlib_dma_batch_new_fn) ( + vlib_main_t *vm, struct vlib_dma_config_data *); +typedef int (vlib_dma_batch_submit_fn) (vlib_main_t *vm, + struct vlib_dma_batch *b); +typedef void (vlib_dma_batch_callback_fn) (vlib_main_t *vm, + struct vlib_dma_batch *b); +typedef struct +{ + union + { + struct + { + u32 barrier_before_last : 1; + u32 sw_fallback : 1; + }; + u32 features; + }; + u16 max_transfers; + u32 max_transfer_size; + vlib_dma_batch_callback_fn *callback_fn; +} vlib_dma_config_t; + +typedef struct vlib_dma_batch +{ + vlib_dma_batch_submit_fn *submit_fn; + vlib_dma_batch_callback_fn *callback_fn; + uword cookie; + u16 src_ptr_off; + u16 dst_ptr_off; + u16 size_off; + u16 stride; + u16 n_enq; +} vlib_dma_batch_t; + +typedef struct +{ + char *name; + vlib_dma_config_add_fn *config_add_fn; + vlib_dma_config_del_fn *config_del_fn; + format_function_t *info_fn; +} vlib_dma_backend_t; + +typedef struct vlib_dma_config_data +{ + vlib_dma_config_t cfg; + vlib_dma_batch_new_fn *batch_new_fn; + uword private_data; + u32 backend_index; + u32 config_index; +} vlib_dma_config_data_t; + +typedef struct +{ + vlib_dma_backend_t *backends; + vlib_dma_config_data_t *configs; +} vlib_dma_main_t; + +extern vlib_dma_main_t vlib_dma_main; + +clib_error_t *vlib_dma_register_backend (vlib_main_t *vm, + vlib_dma_backend_t *b); + +int vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *b); +void vlib_dma_config_del (vlib_main_t *vm, u32 config_index); +u8 *vlib_dma_config_info (u8 *s, va_list *args); + +static_always_inline vlib_dma_batch_t * +vlib_dma_batch_new (vlib_main_t *vm, u32 config_index) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index); + + return cd->batch_new_fn (vm, cd); +} + +static_always_inline void +vlib_dma_batch_set_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch, + uword cookie) +{ + batch->cookie = cookie; +} + +static_always_inline uword +vlib_dma_batch_get_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch) +{ + return batch->cookie; +} + +static_always_inline void +vlib_dma_batch_add (vlib_main_t *vm, vlib_dma_batch_t *batch, void *dst, + void *src, u32 size) +{ + u8 *p = (u8 *) batch + batch->n_enq * batch->stride; + + *((void **) (p + batch->dst_ptr_off)) = dst; + *((void **) (p + batch->src_ptr_off)) = src; + *((u32 *) (p + batch->size_off)) = size; + + batch->n_enq++; +} + +static_always_inline void +vlib_dma_batch_submit (vlib_main_t *vm, vlib_dma_batch_t *batch) +{ + batch->submit_fn (vm, batch); +} + +#endif diff --git a/src/vlib/dma/dma.rst b/src/vlib/dma/dma.rst new file mode 100644 index 00000000000..4048d49b218 --- /dev/null +++ b/src/vlib/dma/dma.rst @@ -0,0 +1,70 @@ +.. _dma_plugin: + +.. toctree:: + +DMA plugin +========== + +Overview +-------- +This plugin utilize platform DMA accelerators like CBDMA/DSA for streaming +data movement. Modern DMA accelerators has high memory bandwidth and benefit +cross-numa traffic. Accelerator like DSA has the capability to do IO page +fault recovery, it will save IOMMU setup for the memory which not pinned. + +Terminology & Usage +------------------- + +A ``backend`` is the abstract of resource which inherited from DMA device, +it support necessary operations for DMA offloading like configuration, DMA +request and result query. + +A ``config`` is the abstract of application DMA capability. Application can +request a config instance through DMA node. DMA node will check the +requirements of application and bind suitable backend with it. + +Enable DSA work queue: +---------------------- + +.. code-block:: console + # configure 1 groups, each with one engine + accel-config config-engine dsa0/engine0.0 --group-id=0 + + # configure 1 queues, putting each in a different group, so each + # is backed by a single engine + accel-config config-wq dsa0/wq0.0 --group-id=0 --type=user \ + --priority=10 --max-batch-size=1024 --mode=dedicated -b 1 -a 0 --name=vpp1 + +DMA transfer: +------------- + +In this sample, application will request DMA capability which can hold +a batch contained maximum 256 transfers and each transfer hold maximum 4K bytes +from DMA node. If config_index value is not negative, mean resource has +been allocated and DMA engine is ready for serve. + +.. code-block:: console + void dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); + + vlib_dma_config_args_t args; + args->max_transfers = 256; + args->max_transfer_size = 4096; + args->cpu_fallback = 1; + args->barrier_before_last = 1; + args->cb = dma_completion_cb; + u32 config_index = vlib_dma_config (vm, &args); + if (config_index < 0) + return; + + u8 *dst[n_transfers]; + u8 *src[n_transfers]; + u32 i = 0, size = 4096; + + vlib_dma_batch_t *b; + b = vlib_dma_batch_new (vm, config_index); + while (wrk_t->config_index >= 0 && n_transfers) { + vlib_dma_batch_add (vm, b, dst[i], src[i], size); + n_transfers --; + i ++; + } + vlib_dma_batch_submit (vm, config_index); diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index 52c94339051..929106a4c8a 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -108,6 +108,12 @@ cpu { # page-size default-hugepage # } +# dsa { + ## DSA work queue address + # dev wq0.0 + # dev wq0.1 +# } + # dpdk { ## Change default settings for all interfaces # dev default { |