aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/plugins/dpdk/buffer.c85
-rwxr-xr-xsrc/plugins/dpdk/device/init.c122
2 files changed, 144 insertions, 63 deletions
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index e09d80194ed..c23a42070b1 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -409,13 +409,26 @@ dpdk_packet_template_init (vlib_main_t * vm,
vlib_worker_thread_barrier_release (vm);
}
+typedef struct
+{
+ /* must be first */
+ struct rte_pktmbuf_pool_private mbp_priv;
+ vlib_physmem_region_index_t region_index;
+} dpdk_mempool_private_t;
+
clib_error_t *
dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
unsigned socket_id)
{
dpdk_main_t *dm = &dpdk_main;
struct rte_mempool *rmp;
- int i;
+ dpdk_mempool_private_t priv;
+ vlib_physmem_region_t *pr;
+ vlib_physmem_region_index_t pri;
+ u8 *pool_name;
+ unsigned elt_size;
+ u32 size;
+ i32 i, ret;
vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
@@ -423,29 +436,64 @@ dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
if (dm->pktmbuf_pools[socket_id])
return 0;
- u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
+ pool_name = format (0, "dpdk_mbuf_pool_socket%u%c", socket_id, 0);
+
+ elt_size = sizeof (struct rte_mbuf) +
+ VLIB_BUFFER_HDR_SIZE /* priv size */ +
+ VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE; /*data room size */
- rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */
- num_mbufs, /* number of mbufs */
- 512, /* cache size */
- VLIB_BUFFER_HDR_SIZE, /* priv size */
- VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */
- socket_id); /* cpu socket */
+ size = rte_mempool_xmem_size (num_mbufs, elt_size, 21);
+ clib_error_t *error = 0;
+ error =
+ vlib_physmem_region_alloc (vm, (char *) pool_name, size, socket_id,
+ VLIB_PHYSMEM_F_HAVE_BUFFERS, &pri);
+ if (error)
+ clib_error_report (error);
+
+ pr = vlib_physmem_get_region (vm, pri);
+
+ priv.mbp_priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE +
+ VLIB_BUFFER_DATA_SIZE;
+ priv.mbp_priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE;
+
+#if 0
+ /* Check that pg_shift parameter is valid. */
+ if (pg_shift > MEMPOOL_PG_SHIFT_MAX)
+ {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+#endif
+ rmp = rte_mempool_create_empty ((char *) pool_name, /* pool name */
+ num_mbufs, /* number of mbufs */
+ elt_size, 512, /* cache size */
+ sizeof (dpdk_mempool_private_t), /* private data size */
+ socket_id, 0); /* flags */
if (rmp)
{
- {
- struct rte_mempool_memhdr *memhdr;
+ rte_mempool_set_ops_byname (rmp, RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
- STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
- vlib_buffer_add_mem_range (vm, (uword) memhdr->addr, memhdr->len);
- }
- if (rmp)
+ /* call the mempool priv initializer */
+ rte_pktmbuf_pool_init (rmp, &priv);
+
+ ret = rte_mempool_populate_phys_tab (rmp, pr->mem, pr->page_table,
+ pr->n_pages, pr->log2_page_size,
+ NULL, NULL);
+ if (ret == (i32) rmp->size)
{
+ /* call the object initializers */
+ rte_mempool_obj_iter (rmp, rte_pktmbuf_init, 0);
+
+ dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp);
+ privp->region_index = pri;
+
dm->pktmbuf_pools[socket_id] = rmp;
- vec_free (pool_name);
+
return 0;
}
+
+ rte_mempool_free (rmp);
}
vec_free (pool_name);
@@ -455,10 +503,9 @@ dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
{
if (dm->pktmbuf_pools[i])
{
- clib_warning
- ("WARNING: Failed to allocate mempool for CPU socket %u. "
- "Threads running on socket %u will use socket %u mempool.",
- socket_id, socket_id, i);
+ clib_warning ("WARNING: Failed to allocate mempool for CPU socket "
+ "%u. Threads running on socket %u will use socket %u "
+ "mempool.", socket_id, socket_id, i);
dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i];
return 0;
}
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index acf712ffced..e94d2fa8588 100755
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -24,6 +24,8 @@
#include <dpdk/device/dpdk.h>
#include <vlib/pci/pci.h>
+#include <rte_ring.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -137,6 +139,60 @@ dpdk_device_lock_init (dpdk_device_t * xd)
}
}
+static struct rte_mempool_ops *
+get_ops_by_name (i8 * ops_name)
+{
+ u32 i;
+
+ for (i = 0; i < rte_mempool_ops_table.num_ops; i++)
+ {
+ if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name))
+ return &rte_mempool_ops_table.ops[i];
+ }
+
+ return 0;
+}
+
+static int
+dpdk_ring_alloc (struct rte_mempool *mp)
+{
+ u32 rg_flags = 0, count;
+ i32 ret;
+ i8 rg_name[RTE_RING_NAMESIZE];
+ struct rte_ring *r;
+
+ ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name);
+ if (ret < 0 || ret >= (i32) sizeof (rg_name))
+ return -ENAMETOOLONG;
+
+ /* ring flags */
+ if (mp->flags & MEMPOOL_F_SP_PUT)
+ rg_flags |= RING_F_SP_ENQ;
+ if (mp->flags & MEMPOOL_F_SC_GET)
+ rg_flags |= RING_F_SC_DEQ;
+
+ count = rte_align32pow2 (mp->size + 1);
+ /*
+ * Allocate the ring that will be used to store objects.
+ * Ring functions will return appropriate errors if we are
+ * running as a secondary process etc., so no checks made
+ * in this function for that condition.
+ */
+ /* XXX can we get memory from the right socket? */
+ r = clib_mem_alloc_aligned (rte_ring_get_memsize (count),
+ CLIB_CACHE_LINE_BYTES);
+
+ /* XXX rte_ring_lookup will not work */
+
+ ret = rte_ring_init (r, rg_name, count, rg_flags);
+ if (ret)
+ return ret;
+
+ mp->pool_data = r;
+
+ return 0;
+}
+
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
@@ -420,10 +476,6 @@ dpdk_lib_init (dpdk_main_t * dm)
xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
break;
- case VNET_DPDK_PMD_VHOST_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
- break;
-
default:
xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
}
@@ -987,9 +1039,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
u32 x, *mem_by_socket = 0;
uword c = 0;
- u8 use_1g = 1;
- u8 use_2m = 1;
- u8 less_than_1g = 1;
int rv;
umount ((char *) huge_dir_path);
@@ -1011,9 +1060,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
break;
vec_add1 (mem_by_socket, x);
-
- if (x > 1023)
- less_than_1g = 0;
}
/* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
unformat_free (&in);
@@ -1025,39 +1071,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
{
vec_validate(mem_by_socket, c);
- mem_by_socket[c] = 256; /* default per-socket mem */
+ mem_by_socket[c] = 64; /* default per-socket mem */
}
));
/* *INDENT-ON* */
}
- /* check if available enough 1GB pages for each socket */
/* *INDENT-OFF* */
clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
{
- int pages_avail, page_size, mem;
- clib_error_t *e = 0;
+ clib_error_t *e;
vec_validate(mem_by_socket, c);
- mem = mem_by_socket[c];
-
- page_size = 1024;
- e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
-
- if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
- use_1g = 0;
+ e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2);
if (e)
- clib_error_free (e);
-
- page_size = 2;
- e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
-
- if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
- use_2m = 0;
-
- if (e)
- clib_error_free (e);
+ clib_error_report (e);
}));
/* *INDENT-ON* */
@@ -1082,19 +1111,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
goto done;
}
- if (use_1g && !(less_than_1g && use_2m))
- {
- rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0,
- "pagesize=1G");
- }
- else if (use_2m)
- {
- rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
- }
- else
- {
- return clib_error_return (0, "not enough free huge pages");
- }
+ rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
if (rv)
{
@@ -1229,6 +1246,23 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
fprintf (stdout, "DPDK physical memory layout:\n");
rte_dump_physmem_layout (stdout);
+ /* set custom ring memory allocator */
+ {
+ struct rte_mempool_ops *ops = NULL;
+
+ ops = get_ops_by_name ("ring_sp_sc");
+ ops->alloc = dpdk_ring_alloc;
+
+ ops = get_ops_by_name ("ring_mp_sc");
+ ops->alloc = dpdk_ring_alloc;
+
+ ops = get_ops_by_name ("ring_sp_mc");
+ ops->alloc = dpdk_ring_alloc;
+
+ ops = get_ops_by_name ("ring_mp_mc");
+ ops->alloc = dpdk_ring_alloc;
+ }
+
/* main thread 1st */
error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
if (error)