diff options
-rw-r--r-- | src/plugins/dpdk/buffer.c | 85 | ||||
-rwxr-xr-x | src/plugins/dpdk/device/init.c | 122 |
2 files changed, 144 insertions, 63 deletions
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c index e09d80194ed..c23a42070b1 100644 --- a/src/plugins/dpdk/buffer.c +++ b/src/plugins/dpdk/buffer.c @@ -409,13 +409,26 @@ dpdk_packet_template_init (vlib_main_t * vm, vlib_worker_thread_barrier_release (vm); } +typedef struct +{ + /* must be first */ + struct rte_pktmbuf_pool_private mbp_priv; + vlib_physmem_region_index_t region_index; +} dpdk_mempool_private_t; + clib_error_t * dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, unsigned socket_id) { dpdk_main_t *dm = &dpdk_main; struct rte_mempool *rmp; - int i; + dpdk_mempool_private_t priv; + vlib_physmem_region_t *pr; + vlib_physmem_region_index_t pri; + u8 *pool_name; + unsigned elt_size; + u32 size; + i32 i, ret; vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); @@ -423,29 +436,64 @@ dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, if (dm->pktmbuf_pools[socket_id]) return 0; - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + pool_name = format (0, "dpdk_mbuf_pool_socket%u%c", socket_id, 0); + + elt_size = sizeof (struct rte_mbuf) + + VLIB_BUFFER_HDR_SIZE /* priv size */ + + VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE; /*data room size */ - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ + size = rte_mempool_xmem_size (num_mbufs, elt_size, 21); + clib_error_t *error = 0; + error = + vlib_physmem_region_alloc (vm, (char *) pool_name, size, socket_id, + VLIB_PHYSMEM_F_HAVE_BUFFERS, &pri); + if (error) + clib_error_report (error); + + pr = vlib_physmem_get_region (vm, pri); + + priv.mbp_priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE + + VLIB_BUFFER_DATA_SIZE; + priv.mbp_priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE; + +#if 0 + /* Check that pg_shift parameter is valid. */ + if (pg_shift > MEMPOOL_PG_SHIFT_MAX) + { + rte_errno = EINVAL; + return NULL; + } +#endif + rmp = rte_mempool_create_empty ((char *) pool_name, /* pool name */ + num_mbufs, /* number of mbufs */ + elt_size, 512, /* cache size */ + sizeof (dpdk_mempool_private_t), /* private data size */ + socket_id, 0); /* flags */ if (rmp) { - { - struct rte_mempool_memhdr *memhdr; + rte_mempool_set_ops_byname (rmp, RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - vlib_buffer_add_mem_range (vm, (uword) memhdr->addr, memhdr->len); - } - if (rmp) + /* call the mempool priv initializer */ + rte_pktmbuf_pool_init (rmp, &priv); + + ret = rte_mempool_populate_phys_tab (rmp, pr->mem, pr->page_table, + pr->n_pages, pr->log2_page_size, + NULL, NULL); + if (ret == (i32) rmp->size) { + /* call the object initializers */ + rte_mempool_obj_iter (rmp, rte_pktmbuf_init, 0); + + dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp); + privp->region_index = pri; + dm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); + return 0; } + + rte_mempool_free (rmp); } vec_free (pool_name); @@ -455,10 +503,9 @@ dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, { if (dm->pktmbuf_pools[i]) { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); + clib_warning ("WARNING: Failed to allocate mempool for CPU socket " + "%u. Threads running on socket %u will use socket %u " + "mempool.", socket_id, socket_id, i); dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; return 0; } diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index acf712ffced..e94d2fa8588 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -24,6 +24,8 @@ #include <dpdk/device/dpdk.h> #include <vlib/pci/pci.h> +#include <rte_ring.h> + #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -137,6 +139,60 @@ dpdk_device_lock_init (dpdk_device_t * xd) } } +static struct rte_mempool_ops * +get_ops_by_name (i8 * ops_name) +{ + u32 i; + + for (i = 0; i < rte_mempool_ops_table.num_ops; i++) + { + if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name)) + return &rte_mempool_ops_table.ops[i]; + } + + return 0; +} + +static int +dpdk_ring_alloc (struct rte_mempool *mp) +{ + u32 rg_flags = 0, count; + i32 ret; + i8 rg_name[RTE_RING_NAMESIZE]; + struct rte_ring *r; + + ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name); + if (ret < 0 || ret >= (i32) sizeof (rg_name)) + return -ENAMETOOLONG; + + /* ring flags */ + if (mp->flags & MEMPOOL_F_SP_PUT) + rg_flags |= RING_F_SP_ENQ; + if (mp->flags & MEMPOOL_F_SC_GET) + rg_flags |= RING_F_SC_DEQ; + + count = rte_align32pow2 (mp->size + 1); + /* + * Allocate the ring that will be used to store objects. + * Ring functions will return appropriate errors if we are + * running as a secondary process etc., so no checks made + * in this function for that condition. + */ + /* XXX can we get memory from the right socket? */ + r = clib_mem_alloc_aligned (rte_ring_get_memsize (count), + CLIB_CACHE_LINE_BYTES); + + /* XXX rte_ring_lookup will not work */ + + ret = rte_ring_init (r, rg_name, count, rg_flags); + if (ret) + return ret; + + mp->pool_data = r; + + return 0; +} + static clib_error_t * dpdk_lib_init (dpdk_main_t * dm) { @@ -420,10 +476,6 @@ dpdk_lib_init (dpdk_main_t * dm) xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER; break; - case VNET_DPDK_PMD_VHOST_ETHER: - xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER; - break; - default: xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; } @@ -987,9 +1039,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) { u32 x, *mem_by_socket = 0; uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; int rv; umount ((char *) huge_dir_path); @@ -1011,9 +1060,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) break; vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; } /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ unformat_free (&in); @@ -1025,39 +1071,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ + mem_by_socket[c] = 64; /* default per-socket mem */ } )); /* *INDENT-ON* */ } - /* check if available enough 1GB pages for each socket */ /* *INDENT-OFF* */ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { - int pages_avail, page_size, mem; - clib_error_t *e = 0; + clib_error_t *e; vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); - - if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; + e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2); if (e) - clib_error_free (e); - - page_size = 2; - e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); - - if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; - - if (e) - clib_error_free (e); + clib_error_report (e); })); /* *INDENT-ON* */ @@ -1082,19 +1111,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) goto done; } - if (use_1g && !(less_than_1g && use_2m)) - { - rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, - "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } + rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); if (rv) { @@ -1229,6 +1246,23 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) fprintf (stdout, "DPDK physical memory layout:\n"); rte_dump_physmem_layout (stdout); + /* set custom ring memory allocator */ + { + struct rte_mempool_ops *ops = NULL; + + ops = get_ops_by_name ("ring_sp_sc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_mp_sc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_sp_mc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_mp_mc"); + ops->alloc = dpdk_ring_alloc; + } + /* main thread 1st */ error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); if (error) |