diff options
Diffstat (limited to 'lib/librte_eal/linuxapp/eal')
-rw-r--r-- | lib/librte_eal/linuxapp/eal/Makefile | 6 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal.c | 25 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_interrupts.c | 1 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_memory.c | 176 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_pci.c | 29 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 2 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_thread.c | 10 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 2 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/rte_eal_version.map | 44 |
9 files changed, 219 insertions, 76 deletions
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 640afd08..90bca4d6 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 4 +LIBABIVER := 5 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -50,6 +50,9 @@ LDLIBS += -ldl LDLIBS += -lpthread LDLIBS += -lgcc_s LDLIBS += -lrt +ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y) +LDLIBS += -lnuma +endif # specific to linuxapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c @@ -96,6 +99,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 7c78f2dc..48f12f44 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -46,7 +46,6 @@ #include <stddef.h> #include <errno.h> #include <limits.h> -#include <errno.h> #include <sys/mman.h> #include <sys/queue.h> #include <sys/stat.h> @@ -64,6 +63,7 @@ #include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> +#include <rte_service_component.h> #include <rte_log.h> #include <rte_random.h> #include <rte_cycles.h> @@ -74,7 +74,6 @@ #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> -#include <rte_common.h> #include <rte_version.h> #include <rte_atomic.h> #include <malloc_heap.h> @@ -890,6 +889,11 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_option_device_parse()) { + rte_errno = ENODEV; + return -1; + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -932,6 +936,14 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services so vdevs register service during bus_probe. */ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -939,6 +951,15 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default service/lcore mappings and start running. Ignore + * -ENOTSUP, as it indicates no service coremask passed to EAL. + */ + ret = rte_service_start_with_defaults(); + if (ret < 0 && ret != -ENOTSUP) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 2e3bd12a..3e9ac41e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -64,6 +64,7 @@ #include <rte_malloc.h> #include <rte_errno.h> #include <rte_spinlock.h> +#include <rte_pause.h> #include "eal_private.h" #include "eal_vfio.h" diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index ebe06833..52791282 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -2,6 +2,7 @@ * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2013 6WIND. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,36 +31,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* BSD LICENSE - * - * Copyright(c) 2013 6WIND. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #define _FILE_OFFSET_BITS 64 #include <errno.h> @@ -70,7 +41,6 @@ #include <stdint.h> #include <inttypes.h> #include <string.h> -#include <stdarg.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -78,11 +48,14 @@ #include <sys/file.h> #include <unistd.h> #include <limits.h> -#include <errno.h> #include <sys/ioctl.h> #include <sys/time.h> #include <signal.h> #include <setjmp.h> +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +#include <numa.h> +#include <numaif.h> +#endif #include <rte_log.h> #include <rte_memory.h> @@ -137,6 +110,13 @@ test_phys_addrs_available(void) if (rte_xen_dom0_supported()) return; + if (!rte_eal_has_hugepages()) { + RTE_LOG(ERR, EAL, + "Started without hugepages support, physical addresses not available\n"); + phys_addrs_available = false; + return; + } + physaddr = rte_mem_virt2phy(&tmp); if (physaddr == RTE_BAD_PHYS_ADDR) { RTE_LOG(ERR, EAL, @@ -147,16 +127,6 @@ test_phys_addrs_available(void) } } -/* Lock page in physical memory and prevent from swapping. */ -int -rte_mem_lock_page(const void *virt) -{ - unsigned long virtual = (unsigned long)virt; - int page_size = getpagesize(); - unsigned long aligned = (virtual & ~ (page_size - 1)); - return mlock((void*)aligned, page_size); -} - /* * Get physical address of any mapped virtual address in the current process. */ @@ -387,6 +357,14 @@ static int huge_wrap_sigsetjmp(void) return sigsetjmp(huge_jmpenv, 1); } +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +/* Callback for numa library. */ +void numa_error(char *where) +{ + RTE_LOG(ERR, EAL, "%s failed: %s\n", where, strerror(errno)); +} +#endif + /* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the @@ -395,18 +373,78 @@ static int huge_wrap_sigsetjmp(void) * map continguous physical blocks in contiguous virtual blocks. */ static unsigned -map_all_hugepages(struct hugepage_file *hugepg_tbl, - struct hugepage_info *hpi, int orig) +map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, + uint64_t *essential_memory __rte_unused, int orig) { int fd; unsigned i; void *virtaddr; void *vma_addr = NULL; size_t vma_len = 0; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + int node_id = -1; + int essential_prev = 0; + int oldpolicy; + struct bitmask *oldmask = numa_allocate_nodemask(); + bool have_numa = true; + unsigned long maxnode = 0; + + /* Check if kernel supports NUMA. */ + if (numa_available() != 0) { + RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n"); + have_numa = false; + } + + if (orig && have_numa) { + RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n"); + if (get_mempolicy(&oldpolicy, oldmask->maskp, + oldmask->size + 1, 0, 0) < 0) { + RTE_LOG(ERR, EAL, + "Failed to get current mempolicy: %s. " + "Assuming MPOL_DEFAULT.\n", strerror(errno)); + oldpolicy = MPOL_DEFAULT; + } + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + if (internal_config.socket_mem[i]) + maxnode = i + 1; + } +#endif for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (maxnode) { + unsigned int j; + + for (j = 0; j < maxnode; j++) + if (essential_memory[j]) + break; + + if (j == maxnode) { + node_id = (node_id + 1) % maxnode; + while (!internal_config.socket_mem[node_id]) { + node_id++; + node_id %= maxnode; + } + essential_prev = 0; + } else { + node_id = j; + essential_prev = essential_memory[j]; + + if (essential_memory[j] < hugepage_sz) + essential_memory[j] = 0; + else + essential_memory[j] -= hugepage_sz; + } + + RTE_LOG(DEBUG, EAL, + "Setting policy MPOL_PREFERRED for socket %d\n", + node_id); + numa_set_preferred(node_id); + } +#endif + if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; @@ -461,7 +499,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return i; + goto out; } /* map the segment, and populate page tables, @@ -472,7 +510,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); - return i; + goto out; } if (orig) { @@ -497,7 +535,12 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, munmap(virtaddr, hugepage_sz); close(fd); unlink(hugepg_tbl[i].filepath); - return i; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (maxnode) + essential_memory[node_id] = + essential_prev; +#endif + goto out; } *(int *)virtaddr = 0; } @@ -508,7 +551,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); - return i; + goto out; } close(fd); @@ -517,6 +560,22 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, vma_len -= hugepage_sz; } +out: +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (maxnode) { + RTE_LOG(DEBUG, EAL, + "Restoring previous memory policy: %d\n", oldpolicy); + if (oldpolicy == MPOL_DEFAULT) { + numa_set_localalloc(); + } else if (set_mempolicy(oldpolicy, oldmask->maskp, + oldmask->size + 1) < 0) { + RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n", + strerror(errno)); + numa_set_localalloc(); + } + } + numa_free_cpumask(oldmask); +#endif return i; } @@ -551,8 +610,8 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) f = fopen("/proc/self/numa_maps", "r"); if (f == NULL) { - RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps," - " consider that all memory is in socket_id 0\n"); + RTE_LOG(NOTICE, EAL, "NUMA support not available" + " consider that all memory is in socket_id 0\n"); return 0; } @@ -601,6 +660,11 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) if (hugepg_tbl[i].orig_va == va) { hugepg_tbl[i].socket_id = socket_id; hp_count++; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + RTE_LOG(DEBUG, EAL, + "Hugepage %s is on socket %d\n", + hugepg_tbl[i].filepath, socket_id); +#endif } } } @@ -995,7 +1059,7 @@ rte_eal_hugepage_init(void) strerror(errno)); return -1; } - mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -1039,6 +1103,11 @@ rte_eal_hugepage_init(void) huge_register_sigbus(); + /* make a copy of socket_mem, needed for balanced allocation. */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + memory[i] = internal_config.socket_mem[i]; + + /* map all hugepages and sort them */ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ unsigned pages_old, pages_new; @@ -1056,7 +1125,8 @@ rte_eal_hugepage_init(void) /* map all hugepages available */ pages_old = hpi->num_pages[0]; - pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); + pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, + memory, 1); if (pages_new < pages_old) { RTE_LOG(DEBUG, EAL, "%d not %d hugepages of size %u MB allocated\n", @@ -1099,7 +1169,7 @@ rte_eal_hugepage_init(void) sizeof(struct hugepage_file), cmp_physaddr); /* remap all hugepages */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, NULL, 0) != hpi->num_pages[0]) { RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 595622b2..8951ce74 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -310,22 +310,20 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) dev->max_vfs = (uint16_t)tmp; } - /* get numa node */ + /* get numa node, default to 0 if not present */ snprintf(filename, sizeof(filename), "%s/numa_node", dirname); - if (access(filename, R_OK) != 0) { - /* if no NUMA support, set default to 0 */ - dev->device.numa_node = 0; + + if (access(filename, F_OK) != -1) { + if (eal_parse_sysfs_value(filename, &tmp) == 0) + dev->device.numa_node = tmp; + else + dev->device.numa_node = -1; } else { - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->device.numa_node = tmp; + dev->device.numa_node = 0; } - rte_pci_device_name(addr, dev->name, sizeof(dev->name)); - dev->device.name = dev->name; + pci_name_set(dev); /* parse resources */ snprintf(filename, sizeof(filename), "%s/resource", dirname); @@ -373,6 +371,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); free(dev); @@ -430,10 +429,10 @@ parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) /* now convert to int values */ errno = 0; - addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = strtoul(splitaddr.domain, NULL, 16); + addr->bus = strtoul(splitaddr.bus, NULL, 16); + addr->devid = strtoul(splitaddr.devid, NULL, 16); + addr->function = strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 2be13195..aa9d96ed 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -214,7 +214,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) intr_idx = VFIO_PCI_NUM_IRQS; /* get interrupt type from internal config (MSI-X by default, can be - * overriden from the command line + * overridden from the command line */ switch (internal_config.vfio_intr_mode) { case RTE_INTR_MODE_MSIX: diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 9f88530e..6481eeea 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -49,7 +49,6 @@ #include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_lcore.h> #include "eal_private.h" @@ -184,7 +183,14 @@ eal_thread_loop(__attribute__((unused)) void *arg) ret = lcore_config[lcore_id].f(fct_arg); lcore_config[lcore_id].ret = ret; rte_wmb(); - lcore_config[lcore_id].state = FINISHED; + + /* when a service core returns, it should go directly to WAIT + * state, because the application will not lcore_wait() for it. + */ + if (lcore_config[lcore_id].core_role == ROLE_SERVICE) + lcore_config[lcore_id].state = WAIT; + else + lcore_config[lcore_id].state = FINISHED; } /* never reached */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c index bddbdb07..19db1cb5 100644 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -38,7 +38,6 @@ #include <stdint.h> #include <inttypes.h> #include <string.h> -#include <stdarg.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -46,7 +45,6 @@ #include <sys/file.h> #include <unistd.h> #include <limits.h> -#include <errno.h> #include <sys/ioctl.h> #include <sys/time.h> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 670bab3a..3a8f1540 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -198,3 +198,47 @@ DPDK_17.05 { vfio_get_group_no; } DPDK_17.02; + +DPDK_17.08 { + global: + + rte_bus_find; + rte_bus_find_by_device; + rte_bus_find_by_name; + rte_log_get_level; + +} DPDK_17.05; + +EXPERIMENTAL { + global: + + rte_eal_devargs_insert; + rte_eal_devargs_parse; + rte_eal_devargs_remove; + rte_eal_hotplug_add; + rte_eal_hotplug_remove; + rte_service_disable_on_lcore; + rte_service_dump; + rte_service_enable_on_lcore; + rte_service_get_by_id; + rte_service_get_by_name; + rte_service_get_count; + rte_service_get_enabled_on_lcore; + rte_service_is_running; + rte_service_lcore_add; + rte_service_lcore_count; + rte_service_lcore_del; + rte_service_lcore_list; + rte_service_lcore_reset_all; + rte_service_lcore_start; + rte_service_lcore_stop; + rte_service_probe_capability; + rte_service_register; + rte_service_reset; + rte_service_set_stats_enable; + rte_service_start; + rte_service_start_with_defaults; + rte_service_stop; + rte_service_unregister; + +} DPDK_17.08; |