vlib: introduce DMA infrastructure

This patch introduces DMA infrastructure into vlib. This is well known that large amount of memory movements will drain core resource. Nowadays more and more hardware accelerators were designed out for freeing core from this burden. Meanwhile some restrictions still remained when utilizing hardware accelerators, e.g. cross numa throughput will have a significant drop compared to same node. Normally the number of hardware accelerator instances will less than cores number, not to mention that applications number will even beyond the number of cores. Some hardware may support share virtual address with cores, while others are not. Here we introduce new DMA infrastructure which can fulfill the requirements of vpp applications like session and memif and in the meantime dealing with hardware limitations. Here is some design backgrounds: Backend is the abstract of resource which allocated from DMA device and can do some basic operations like configuration, DMA copy and result query. Config is the abstract of application DMA requirement. Application need to request an unique config index from DMA infrastructure. This unique config index is associated with backend resource. Two options cpu fallback and barrier before last can be specified in config. DMA transfer will be performed by CPU when backend is busy if cpu fallback option is enabled. DMA transfer callback will be in order if barrier before last option is enabled. We constructs all the stuffs that DMA transfer request needed into DMA batch. It contains the pattern of DMA descriptors and function pointers for submission and callback. One DMA transfer request need multiple times batch update and one time batch submission. DMA backends will assigned to config's workers threads equally. Lock will be used for thread-safety if same backends assigned to multiple threads. Backend node will check all the pending requests in worker thread and do callback with the pointer of DMA batch if transfer completed. Application can utilize cookie in DMA batch for selves usage. DMA architecture: +----------+ +----------+ +----------+ +----------+ | Config1 | | Config2 | | Config1 | | Config2 | +----------+ +----------+ +----------+ +----------+ || || || || +-------------------------+ +-------------------------+ | DMA polling thread A | | DMA polling thread B | +-------------------------+ +-------------------------+ || || +----------+ +----------+ | Backend1 | | Backend2 | +----------+ +----------+ Type: feature Signed-off-by: Marvin Liu <yong.liu@intel.com> Change-Id: I1725e0c26687985aac29618c9abe4f5e0de08ebf
author: Marvin Liu <yong.liu@intel.com> 2022-08-17 09:38:40 +0800
committer: Damjan Marion <dmarion@0xa5.net> 2022-08-25 19:05:40 +0000
commit: abd5669422c5805da5135496d5e5a394fa5aa602 (patch)
tree: a464eb14b5e04b19042e92bb83ca7b8567731f19 /src/vlib
parent: 9a6ad01c0d443f002eafa709813d021bf0c98eac (diff)
5 files changed, 445 insertions, 0 deletions
diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt
index bb43abdc63e..69bd18b6db6 100644
--- a/src/vlib/CMakeLists.txt
+++ b/src/vlib/CMakeLists.txt
@@ -105,6 +105,8 @@ add_vpp_library(vlib
   unix/util.c
   vmbus/vmbus.c
   ${VMBUS_SOURCE}
+  dma/dma.c
+  dma/cli.c
 
   MULTIARCH_SOURCES
   buffer_funcs.c
diff --git a/src/vlib/dma/cli.c b/src/vlib/dma/cli.c
new file mode 100644
index 00000000000..0ca4a9b6778
--- /dev/null
+++ b/src/vlib/dma/cli.c
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/physmem_funcs.h>
+#include <vlib/dma/dma.h>
+
+static clib_error_t *
+show_dma_backends_command_fn (vlib_main_t *vm, unformat_input_t *input,
+			      vlib_cli_command_t *cmd)
+{
+  vlib_dma_main_t *dm = &vlib_dma_main;
+
+  if (vec_len (dm->backends))
+    {
+      vlib_dma_backend_t *b;
+      vec_foreach (b, dm->backends)
+	vlib_cli_output (vm, "%s", b->name);
+    }
+  else
+    vlib_cli_output (vm, "No active DMA backends");
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (avf_create_command, static) = {
+  .path = "show dma backends",
+  .short_help = "show dma backends",
+  .function = show_dma_backends_command_fn,
+};
+
+static void
+test_dma_cb_fn (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+  fformat (stderr, "%s: cb %p cookie %lx\n", __func__, b,
+	   vlib_dma_batch_get_cookie (vm, b));
+}
+
+static clib_error_t *
+fill_random_data (void *buffer, uword size)
+{
+  uword seed = random_default_seed ();
+
+  uword remain = size;
+  const uword p = clib_mem_get_page_size ();
+  uword offset = 0;
+
+  clib_random_buffer_t rb;
+  clib_random_buffer_init (&rb, seed);
+
+  while (remain > 0)
+    {
+      uword fill_size = clib_min (p, remain);
+
+      clib_random_buffer_fill (&rb, fill_size);
+      void *rbuf = clib_random_buffer_get_data (&rb, fill_size);
+      clib_memcpy_fast (buffer + offset, rbuf, fill_size);
+      clib_random_buffer_free (&rb);
+
+      offset += fill_size;
+      remain -= fill_size;
+    }
+
+  return 0;
+}
+
+static clib_error_t *
+test_dma_command_fn (vlib_main_t *vm, unformat_input_t *input,
+		     vlib_cli_command_t *cmd)
+{
+  clib_error_t *err = 0;
+  vlib_dma_batch_t *b;
+  int config_index = -1;
+  u32 rsz, n_alloc, v;
+  u8 *from = 0, *to = 0;
+  vlib_dma_config_t cfg = { .max_transfers = 256,
+			    .max_transfer_size = 4096,
+			    .callback_fn = test_dma_cb_fn };
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "transfers %u", &v))
+	cfg.max_transfers = v;
+      else if (unformat (input, "size %u", &v))
+	cfg.max_transfer_size = v;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+
+  if ((config_index = vlib_dma_config_add (vm, &cfg)) < 0)
+    {
+      err = clib_error_return (0, "Unable to allocate dma config");
+      return err;
+    }
+
+  rsz = round_pow2 (cfg.max_transfer_size, CLIB_CACHE_LINE_BYTES);
+  n_alloc = rsz * cfg.max_transfers * 2;
+
+  if ((from = vlib_physmem_alloc_aligned_on_numa (
+	 vm, n_alloc, CLIB_CACHE_LINE_BYTES, vm->numa_node)) == 0)
+    {
+      err = clib_error_return (0, "Unable to allocate %u bytes of physmem",
+			       n_alloc);
+      return err;
+    }
+  to = from + n_alloc / 2;
+
+  u32 port_allocator_seed;
+
+  fill_random_data (from, cfg.max_transfers * rsz);
+
+  b = vlib_dma_batch_new (vm, config_index);
+  vlib_dma_batch_set_cookie (vm, b, 0x12345678);
+
+  port_allocator_seed = clib_cpu_time_now ();
+  int transfers = random_u32 (&port_allocator_seed) % cfg.max_transfers;
+  if (!transfers)
+    transfers = 1;
+  for (int i = 0; i < transfers; i++)
+    vlib_dma_batch_add (vm, b, to + i * rsz, from + i * rsz,
+			cfg.max_transfer_size);
+
+  vlib_dma_batch_submit (vm, b);
+  return err;
+}
+
+static clib_error_t *
+test_show_dma_fn (vlib_main_t *vm, unformat_input_t *input,
+		  vlib_cli_command_t *cmd)
+{
+  clib_error_t *err = 0;
+  int config_index = 0;
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "config %u", &config_index))
+	;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+
+  for (u32 i = 0; i < vlib_get_n_threads (); i++)
+    vlib_cli_output (vm, "Config %d %U", config_index, vlib_dma_config_info,
+		     config_index, vlib_get_main_by_index (i));
+  return err;
+}
+
+VLIB_CLI_COMMAND (test_dma_command, static) = {
+  .path = "test dma",
+  .short_help = "test dma [transfers <x> size <x>]",
+  .function = test_dma_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_dma_command, static) = {
+  .path = "show dma",
+  .short_help = "show dma [config <x>]",
+  .function = test_show_dma_fn,
+};
diff --git a/src/vlib/dma/dma.c b/src/vlib/dma/dma.c
new file mode 100644
index 00000000000..cc8ed94ea8f
--- /dev/null
+++ b/src/vlib/dma/dma.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/log.h>
+#include <vlib/dma/dma.h>
+
+VLIB_REGISTER_LOG_CLASS (dma_log) = {
+  .class_name = "dma",
+};
+
+vlib_dma_main_t vlib_dma_main = {};
+
+clib_error_t *
+vlib_dma_register_backend (vlib_main_t *vm, vlib_dma_backend_t *b)
+{
+  vlib_dma_main_t *dm = &vlib_dma_main;
+  vec_add1 (dm->backends, *b);
+  dma_log_info ("backend '%s' registered", b->name);
+  return 0;
+}
+
+int
+vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *c)
+{
+  vlib_dma_main_t *dm = &vlib_dma_main;
+  vlib_dma_backend_t *b;
+  vlib_dma_config_data_t *cd;
+
+  pool_get_zero (dm->configs, cd);
+  cd->config_index = cd - dm->configs;
+
+  clib_memcpy (&cd->cfg, c, sizeof (vlib_dma_config_t));
+
+  vec_foreach (b, dm->backends)
+    {
+      dma_log_info ("calling '%s' config_add_fn", b->name);
+      if (b->config_add_fn (vm, cd))
+	{
+	  dma_log_info ("config %u added into backend %s", cd - dm->configs,
+			b->name);
+	  cd->backend_index = b - dm->backends;
+	  return cd - dm->configs;
+	}
+    }
+
+  pool_put (dm->configs, cd);
+  return -1;
+}
+
+void
+vlib_dma_config_del (vlib_main_t *vm, u32 config_index)
+{
+  vlib_dma_main_t *dm = &vlib_dma_main;
+  vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index);
+  vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index);
+
+  if (b->config_del_fn)
+    b->config_del_fn (vm, cd);
+
+  pool_put (dm->configs, cd);
+  dma_log_info ("config %u deleted from backend %s", config_index, b->name);
+}
+
+u8 *
+vlib_dma_config_info (u8 *s, va_list *args)
+{
+  vlib_dma_main_t *dm = &vlib_dma_main;
+  int config_index = va_arg (*args, int);
+  u32 len = pool_elts (dm->configs);
+  if (config_index >= len)
+    return format (s, "%s", "not found");
+  vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index);
+
+  vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index);
+
+  if (b->info_fn)
+    return b->info_fn (s, args);
+
+  return 0;
+}
diff --git a/src/vlib/dma/dma.h b/src/vlib/dma/dma.h
new file mode 100644
index 00000000000..eaeba8b13b7
--- /dev/null
+++ b/src/vlib/dma/dma.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef included_vlib_dma_h
+#define included_vlib_dma_h
+#include <vlib/vlib.h>
+
+#define dma_log_debug(f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_DEBUG, dma_log.class, "%s: " f, __func__,          \
+	    ##__VA_ARGS__)
+
+#define dma_log_info(f, ...)                                                  \
+  vlib_log (VLIB_LOG_LEVEL_INFO, dma_log.class, "%s: " f, __func__,           \
+	    ##__VA_ARGS__)
+
+struct vlib_dma_batch;
+struct vlib_dma_config_data;
+
+typedef int (vlib_dma_config_add_fn) (vlib_main_t *vm,
+				      struct vlib_dma_config_data *cfg);
+typedef void (vlib_dma_config_del_fn) (vlib_main_t *vm,
+				       struct vlib_dma_config_data *cfg);
+typedef struct vlib_dma_batch *(vlib_dma_batch_new_fn) (
+  vlib_main_t *vm, struct vlib_dma_config_data *);
+typedef int (vlib_dma_batch_submit_fn) (vlib_main_t *vm,
+					struct vlib_dma_batch *b);
+typedef void (vlib_dma_batch_callback_fn) (vlib_main_t *vm,
+					   struct vlib_dma_batch *b);
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      u32 barrier_before_last : 1;
+      u32 sw_fallback : 1;
+    };
+    u32 features;
+  };
+  u16 max_transfers;
+  u32 max_transfer_size;
+  vlib_dma_batch_callback_fn *callback_fn;
+} vlib_dma_config_t;
+
+typedef struct vlib_dma_batch
+{
+  vlib_dma_batch_submit_fn *submit_fn;
+  vlib_dma_batch_callback_fn *callback_fn;
+  uword cookie;
+  u16 src_ptr_off;
+  u16 dst_ptr_off;
+  u16 size_off;
+  u16 stride;
+  u16 n_enq;
+} vlib_dma_batch_t;
+
+typedef struct
+{
+  char *name;
+  vlib_dma_config_add_fn *config_add_fn;
+  vlib_dma_config_del_fn *config_del_fn;
+  format_function_t *info_fn;
+} vlib_dma_backend_t;
+
+typedef struct vlib_dma_config_data
+{
+  vlib_dma_config_t cfg;
+  vlib_dma_batch_new_fn *batch_new_fn;
+  uword private_data;
+  u32 backend_index;
+  u32 config_index;
+} vlib_dma_config_data_t;
+
+typedef struct
+{
+  vlib_dma_backend_t *backends;
+  vlib_dma_config_data_t *configs;
+} vlib_dma_main_t;
+
+extern vlib_dma_main_t vlib_dma_main;
+
+clib_error_t *vlib_dma_register_backend (vlib_main_t *vm,
+					 vlib_dma_backend_t *b);
+
+int vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *b);
+void vlib_dma_config_del (vlib_main_t *vm, u32 config_index);
+u8 *vlib_dma_config_info (u8 *s, va_list *args);
+
+static_always_inline vlib_dma_batch_t *
+vlib_dma_batch_new (vlib_main_t *vm, u32 config_index)
+{
+  vlib_dma_main_t *dm = &vlib_dma_main;
+  vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index);
+
+  return cd->batch_new_fn (vm, cd);
+}
+
+static_always_inline void
+vlib_dma_batch_set_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch,
+			   uword cookie)
+{
+  batch->cookie = cookie;
+}
+
+static_always_inline uword
+vlib_dma_batch_get_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch)
+{
+  return batch->cookie;
+}
+
+static_always_inline void
+vlib_dma_batch_add (vlib_main_t *vm, vlib_dma_batch_t *batch, void *dst,
+		    void *src, u32 size)
+{
+  u8 *p = (u8 *) batch + batch->n_enq * batch->stride;
+
+  *((void **) (p + batch->dst_ptr_off)) = dst;
+  *((void **) (p + batch->src_ptr_off)) = src;
+  *((u32 *) (p + batch->size_off)) = size;
+
+  batch->n_enq++;
+}
+
+static_always_inline void
+vlib_dma_batch_submit (vlib_main_t *vm, vlib_dma_batch_t *batch)
+{
+  batch->submit_fn (vm, batch);
+}
+
+#endif
diff --git a/src/vlib/dma/dma.rst b/src/vlib/dma/dma.rst
new file mode 100644
index 00000000000..4048d49b218
--- /dev/null
+++ b/src/vlib/dma/dma.rst
@@ -0,0 +1,70 @@
+.. _dma_plugin:
+
+.. toctree::
+
+DMA plugin
+==========
+
+Overview
+--------
+This plugin utilize platform DMA accelerators like CBDMA/DSA for streaming
+data movement. Modern DMA accelerators has high memory bandwidth and benefit
+cross-numa traffic. Accelerator like DSA has the capability to do IO page
+fault recovery, it will save IOMMU setup for the memory which not pinned.
+
+Terminology & Usage
+-------------------
+
+A ``backend`` is the abstract of resource which inherited from DMA device,
+it support necessary operations for DMA offloading like configuration, DMA
+request and result query.
+
+A ``config`` is the abstract of application DMA capability. Application can
+request a config instance through DMA node. DMA node will check the
+requirements of application and bind suitable backend with it.
+
+Enable DSA work queue:
+----------------------
+
+.. code-block:: console
+  # configure 1 groups, each with one engine
+  accel-config config-engine dsa0/engine0.0 --group-id=0
+
+  # configure 1 queues, putting each in a different group, so each
+  # is backed by a single engine
+  accel-config config-wq dsa0/wq0.0 --group-id=0 --type=user  \
+    --priority=10 --max-batch-size=1024 --mode=dedicated -b 1 -a 0 --name=vpp1
+
+DMA transfer:
+-------------
+
+In this sample, application will request DMA capability which can hold
+a batch contained maximum 256 transfers and each transfer hold maximum 4K bytes
+from DMA node. If config_index value is not negative, mean resource has
+been allocated and DMA engine is ready for serve.
+
+.. code-block:: console
+  void dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b);
+
+  vlib_dma_config_args_t args;
+  args->max_transfers = 256;
+  args->max_transfer_size = 4096;
+  args->cpu_fallback = 1;
+  args->barrier_before_last = 1;
+  args->cb = dma_completion_cb;
+  u32 config_index = vlib_dma_config (vm, &args);
+  if (config_index < 0)
+    return;
+
+  u8 *dst[n_transfers];
+  u8 *src[n_transfers];
+  u32 i = 0, size = 4096;
+
+  vlib_dma_batch_t *b;
+  b = vlib_dma_batch_new (vm, config_index);
+  while (wrk_t->config_index >= 0 && n_transfers) {
+    vlib_dma_batch_add (vm, b, dst[i], src[i], size);
+    n_transfers --;
+    i ++;
+  }
+  vlib_dma_batch_submit (vm, config_index);
author	Marvin Liu <yong.liu@intel.com>	2022-08-17 09:38:40 +0800
committer	Damjan Marion <dmarion@0xa5.net>	2022-08-25 19:05:40 +0000
commit	abd5669422c5805da5135496d5e5a394fa5aa602 (patch)
tree	a464eb14b5e04b19042e92bb83ca7b8567731f19 /src/vlib
parent	9a6ad01c0d443f002eafa709813d021bf0c98eac (diff)