diff options
Diffstat (limited to 'lib/librte_eal/linuxapp')
26 files changed, 491 insertions, 251 deletions
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index e1093619..30b30f33 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -44,9 +44,12 @@ VPATH += $(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(SRCDIR)/include CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include +ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) +# workaround for circular dependency eal -> ivshmem -> ring/mempool -> eal CFLAGS += -I$(RTE_SDK)/lib/librte_ring CFLAGS += -I$(RTE_SDK)/lib/librte_mempool CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem +endif CFLAGS += $(WERROR_FLAGS) -O3 LDLIBS += -ldl diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 8aafd519..543ef869 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -465,24 +465,6 @@ eal_parse_vfio_intr(const char *mode) return -1; } -static inline size_t -eal_get_hugepage_mem_size(void) -{ - uint64_t size = 0; - unsigned i, j; - - for (i = 0; i < internal_config.num_hugepage_sizes; i++) { - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - size += hpi->hugepage_sz * hpi->num_pages[j]; - } - } - } - - return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; -} - /* Parse the arguments for --log-level only */ static void eal_log_level_parse(int argc, char **argv) @@ -715,12 +697,8 @@ rte_eal_iopl_init(void) #if defined(RTE_ARCH_X86) if (iopl(3) != 0) return -1; - return 0; -#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) - return 0; /* iopl syscall not supported for ARM/ARM64 */ -#else - return -1; #endif + return 0; } /* Launch threads, called at application init(). */ @@ -766,8 +744,6 @@ rte_eal_init(int argc, char **argv) if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; - else - internal_config.memory = eal_get_hugepage_mem_size(); } if (internal_config.vmware_tsc_map == 1) { @@ -863,7 +839,7 @@ rte_eal_init(int argc, char **argv) ret = rte_thread_setname(lcore_config[i].thread_id, thread_name); if (ret != 0) - RTE_LOG(ERR, EAL, + RTE_LOG(DEBUG, EAL, "Cannot set name for lcore thread\n"); } diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c index 907fbfa7..5fbc17c5 100644 --- a/lib/librte_eal/linuxapp/eal/eal_debug.c +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -77,9 +77,6 @@ void __rte_panic(const char *funcname, const char *format, ...) { va_list ap; - /* disable history */ - rte_log_set_history(0); - rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); va_start(ap, format); rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); @@ -98,9 +95,6 @@ rte_exit(int exit_code, const char *format, ...) { va_list ap; - /* disable history */ - rte_log_set_history(0); - if (exit_code != 0) RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" " Cause: ", exit_code); diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 06b26a9e..47a3b20a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -57,10 +57,8 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_debug.h> #include <rte_log.h> -#include <rte_mempool.h> #include <rte_pci.h> #include <rte_malloc.h> #include <rte_errno.h> @@ -889,7 +887,7 @@ rte_eal_intr_init(void) "eal-intr-thread"); ret_1 = rte_thread_setname(intr_thread, thread_name); if (ret_1 != 0) - RTE_LOG(ERR, EAL, + RTE_LOG(DEBUG, EAL, "Failed to set thread name for interrupt handling\n"); } diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c index 07aec694..67b3caf2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c @@ -49,7 +49,6 @@ #include <rte_string_fns.h> #include <rte_errno.h> #include <rte_ring.h> -#include <rte_mempool.h> #include <rte_malloc.h> #include <rte_common.h> #include <rte_ivshmem.h> @@ -184,21 +183,21 @@ overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) i_end2 = mz2->ioremap_addr + mz2->len; /* check for overlap in virtual addresses */ - if (start1 > start2 && start1 < end2) + if (start1 >= start2 && start1 < end2) result |= VIRT; if (start2 >= start1 && start2 < end1) result |= VIRT; /* check for overlap in physical addresses */ - if (p_start1 > p_start2 && p_start1 < p_end2) + if (p_start1 >= p_start2 && p_start1 < p_end2) result |= PHYS; - if (p_start2 > p_start1 && p_start2 < p_end1) + if (p_start2 >= p_start1 && p_start2 < p_end1) result |= PHYS; /* check for overlap in ioremap addresses */ - if (i_start1 > i_start2 && i_start1 < i_end2) + if (i_start1 >= i_start2 && i_start1 < i_end2) result |= IOREMAP; - if (i_start2 > i_start1 && i_start2 < i_end1) + if (i_start2 >= i_start1 && i_start2 < i_end1) result |= IOREMAP; return result; diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index 0b133c3e..d3911004 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -50,8 +50,7 @@ #include "eal_private.h" /* - * default log function, used once mempool (hence log history) is - * available + * default log function */ static ssize_t console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) @@ -60,9 +59,6 @@ console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) ssize_t ret; uint32_t loglevel; - /* add this log in history */ - rte_log_add_in_history(buf, size); - /* write on stdout */ ret = fwrite(buf, 1, size, stdout); fflush(stdout); @@ -110,8 +106,7 @@ rte_eal_log_init(const char *id, int facility) /* early logs */ /* - * early log function, used during boot when mempool (hence log - * history) is not available + * early log function, used before rte_eal_log_init */ static ssize_t early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 5b9132c6..5578c254 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -80,6 +80,8 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/time.h> +#include <signal.h> +#include <setjmp.h> #include <rte_log.h> #include <rte_memory.h> @@ -309,6 +311,22 @@ get_virtual_area(size_t *size, size_t hugepage_sz) return addr; } +static sigjmp_buf huge_jmpenv; + +static void huge_sigbus_handler(int signo __rte_unused) +{ + siglongjmp(huge_jmpenv, 1); +} + +/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile, + * non-static local variable in the stack frame calling sigsetjmp might be + * clobbered by a call to longjmp. + */ +static int huge_wrap_sigsetjmp(void) +{ + return sigsetjmp(huge_jmpenv, 1); +} + /* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the @@ -316,7 +334,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz) * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to * map continguous physical blocks in contiguous virtual blocks. */ -static int +static unsigned map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, int orig) { @@ -394,9 +412,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, /* try to create hugepage file */ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, + RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return -1; + return i; } /* map the segment, and populate page tables, @@ -404,10 +422,10 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); if (virtaddr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, + RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); - return -1; + return i; } if (orig) { @@ -417,12 +435,33 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, hugepg_tbl[i].final_va = virtaddr; } + if (orig) { + /* In linux, hugetlb limitations, like cgroup, are + * enforced at fault time instead of mmap(), even + * with the option of MAP_POPULATE. Kernel will send + * a SIGBUS signal. To avoid to be killed, save stack + * environment here, if SIGBUS happens, we can jump + * back here. + */ + if (huge_wrap_sigsetjmp()) { + RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more " + "hugepages of size %u MB\n", + (unsigned)(hugepage_sz / 0x100000)); + munmap(virtaddr, hugepage_sz); + close(fd); + unlink(hugepg_tbl[i].filepath); + return i; + } + *(int *)virtaddr = 0; + } + + /* set shared flock on the file. */ if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); - return -1; + return i; } close(fd); @@ -430,7 +469,8 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, vma_addr = (char *)vma_addr + hugepage_sz; vma_len -= hugepage_sz; } - return 0; + + return i; } #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS @@ -1036,6 +1076,51 @@ calc_num_pages_per_socket(uint64_t * memory, return total_num_pages; } +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +static struct sigaction huge_action_old; +static int huge_need_recover; + +static void +huge_register_sigbus(void) +{ + sigset_t mask; + struct sigaction action; + + sigemptyset(&mask); + sigaddset(&mask, SIGBUS); + action.sa_flags = 0; + action.sa_mask = mask; + action.sa_handler = huge_sigbus_handler; + + huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old); +} + +static void +huge_recover_sigbus(void) +{ + if (huge_need_recover) { + sigaction(SIGBUS, &huge_action_old, NULL); + huge_need_recover = 0; + } +} + /* * Prepare physical memory mapping: fill configuration structure with * these infos, return 0 on success. @@ -1122,8 +1207,11 @@ rte_eal_hugepage_init(void) hp_offset = 0; /* where we start the current page size entries */ + huge_register_sigbus(); + /* map all hugepages and sort them */ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ + unsigned pages_old, pages_new; struct hugepage_info *hpi; /* @@ -1137,10 +1225,28 @@ rte_eal_hugepage_init(void) continue; /* map all hugepages available */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); + pages_old = hpi->num_pages[0]; + pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); + if (pages_new < pages_old) { +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + RTE_LOG(ERR, EAL, + "%d not %d hugepages of size %u MB allocated\n", + pages_new, pages_old, + (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; +#else + RTE_LOG(DEBUG, EAL, + "%d not %d hugepages of size %u MB allocated\n", + pages_new, pages_old, + (unsigned)(hpi->hugepage_sz / 0x100000)); + + int pages = pages_old - pages_new; + + nr_hugepages -= pages; + hpi->num_pages[0] = pages_new; + if (pages_new == 0) + continue; +#endif } /* find physical addresses and sockets for each hugepage */ @@ -1172,8 +1278,9 @@ rte_eal_hugepage_init(void) hp_offset += new_pages_count[i]; #else /* remap all hugepages */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != + hpi->num_pages[0]) { + RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; } @@ -1187,6 +1294,11 @@ rte_eal_hugepage_init(void) #endif } + huge_recover_sigbus(); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) + internal_config.memory = eal_get_hugepage_mem_size(); + #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS nr_hugefiles = 0; for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1373,6 +1485,7 @@ rte_eal_hugepage_init(void) return 0; fail: + huge_recover_sigbus(); free(tmp_hp); return -1; } @@ -1399,7 +1512,7 @@ int rte_eal_hugepage_attach(void) { const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - const struct hugepage_file *hp = NULL; + struct hugepage_file *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ off_t size; @@ -1417,7 +1530,7 @@ rte_eal_hugepage_attach(void) if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " + RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary " "process\n"); return -1; } @@ -1481,7 +1594,7 @@ rte_eal_hugepage_attach(void) size = getFileSize(fd_hugepage); hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); - if (hp == NULL) { + if (hp == MAP_FAILED) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); goto error; } @@ -1545,12 +1658,19 @@ rte_eal_hugepage_attach(void) s++; } /* unmap the hugepage config file, since we are done using it */ - munmap((void *)(uintptr_t)hp, size); + munmap(hp, size); close(fd_zero); close(fd_hugepage); return 0; error: + s = 0; + while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) { + munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len); + s++; + } + if (hp != NULL && hp != MAP_FAILED) + munmap(hp, size); if (fd_zero >= 0) close(fd_zero); if (fd_hugepage >= 0) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index dbf12a84..f9c3efd2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -66,8 +66,8 @@ pci_unbind_kernel_driver(struct rte_pci_device *dev) /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ snprintf(filename, sizeof(filename), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", - loc->domain, loc->bus, loc->devid, loc->function); + "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); f = fopen(filename, "w"); if (f == NULL) /* device was not bound */ @@ -190,12 +190,13 @@ pci_find_max_end_va(void) return RTE_PTR_ADD(last->addr, last->len); } -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int +pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags) { - FILE *f; - char buf[BUFSIZ]; union pci_resource_info { struct { char *phys_addr; @@ -204,6 +205,31 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) }; char *ptrs[PCI_RESOURCE_FMT_NVAL]; } res_info; + + if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + errno = 0; + *phys_addr = strtoull(res_info.phys_addr, NULL, 16); + *end_addr = strtoull(res_info.end_addr, NULL, 16); + *flags = strtoull(res_info.flags, NULL, 16); + if (errno != 0) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + + return 0; +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; int i; uint64_t phys_addr, end_addr, flags; @@ -220,21 +246,9 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) "%s(): cannot read resource\n", __func__); goto error; } - - if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) goto error; - } - errno = 0; - phys_addr = strtoull(res_info.phys_addr, NULL, 16); - end_addr = strtoull(res_info.end_addr, NULL, 16); - flags = strtoull(res_info.flags, NULL, 16); - if (errno != 0) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - goto error; - } if (flags & IORESOURCE_MEM) { dev->mem_resource[i].phys_addr = phys_addr; @@ -306,6 +320,16 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } dev->id.subsystem_device_id = (uint16_t)tmp; + /* get class_id */ + snprintf(filename, sizeof(filename), "%s/class", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + /* the least 24 bits are valid: class, subclass, program interface */ + dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; + /* get max_vfs */ dev->max_vfs = 0; snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); @@ -453,7 +477,7 @@ rte_eal_pci_scan(void) uint16_t domain; uint8_t bus, devid, function; - dir = opendir(SYSFS_PCI_DEVICES); + dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", __func__, strerror(errno)); @@ -468,8 +492,8 @@ rte_eal_pci_scan(void) &bus, &devid, &function) != 0) continue; - snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, - e->d_name); + snprintf(dirname, sizeof(dirname), "%s/%s", + pci_get_sysfs_path(), e->d_name); if (pci_scan_one(dirname, domain, bus, devid, function) < 0) goto error; } @@ -481,18 +505,6 @@ error: return -1; } -#ifdef RTE_PCI_CONFIG -/* - * It is deprecated, all its configurations have been moved into - * each PMD respectively. - */ -void -pci_config_space_set(__rte_unused struct rte_pci_device *dev) -{ - RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n"); -} -#endif - /* Read PCI config space. */ int rte_eal_pci_read_config(const struct rte_pci_device *device, void *buf, size_t len, off_t offset) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 7011753d..f72a2548 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -36,12 +36,22 @@ #include "eal_vfio.h" +/** IO resource type: */ +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + /* * Helper function to map PCI resources right after hugepages in virtual memory */ extern void *pci_map_addr; void *pci_find_max_end_va(void); +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags); + int pci_uio_alloc_resource(struct rte_pci_device *dev, struct mapped_pci_resource **uio_res); void pci_uio_free_resource(struct rte_pci_device *dev, diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 068694dc..1786b754 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -35,6 +35,7 @@ #include <unistd.h> #include <fcntl.h> #include <dirent.h> +#include <inttypes.h> #include <sys/stat.h> #include <sys/mman.h> #include <linux/pci_regs.h> @@ -161,14 +162,14 @@ pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, * or uio:uioX */ snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", + "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function); dir = opendir(dirname); if (dir == NULL) { /* retry with the parent directory */ snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, + "%s/" PCI_PRI_FMT, pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function); dir = opendir(dirname); @@ -309,7 +310,7 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, struct mapped_pci_resource *uio_res, int map_idx) { int fd; - char devname[PATH_MAX]; /* contains the /dev/uioX */ + char devname[PATH_MAX]; void *mapaddr; struct rte_pci_addr *loc; struct pci_map *maps; @@ -319,7 +320,8 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, /* update devname for mmap */ snprintf(devname, sizeof(devname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", + "%s/" PCI_PRI_FMT "/resource%d", + pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function, res_idx); @@ -368,11 +370,11 @@ error: return -1; } +#if defined(RTE_ARCH_X86) int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { -#if defined(RTE_ARCH_X86) char dirname[PATH_MAX]; char filename[PATH_MAX]; int uio_num; @@ -411,81 +413,154 @@ pci_uio_ioport_map(struct rte_pci_device *dev, int bar, RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); p->base = start; + p->len = 0; return 0; +} #else - RTE_SET_USED(dev); - RTE_SET_USED(bar); - RTE_SET_USED(p); +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + FILE *f; + char buf[BUFSIZ]; + char filename[PATH_MAX]; + uint64_t phys_addr, end_addr, flags; + int fd, i; + void *addr; + + /* open and read addresses of the corresponding resource in sysfs */ + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", + pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", + strerror(errno)); + return -1; + } + for (i = 0; i < bar + 1; i++) { + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); + goto error; + } + } + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) + goto error; + if ((flags & IORESOURCE_IO) == 0) { + RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); + goto error; + } + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", + pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function, bar); + + /* mmap the pci resource */ + fd = open(filename, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + goto error; + } + addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", + strerror(errno)); + goto error; + } + + /* strangely, the base address is mmap addr + phys_addr */ + p->base = (uintptr_t)addr + phys_addr; + p->len = end_addr + 1; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); + fclose(f); + + return 0; + +error: + fclose(f); return -1; -#endif } +#endif void pci_uio_ioport_read(struct rte_pci_ioport *p, void *data, size_t len, off_t offset) { -#if defined(RTE_ARCH_X86) uint8_t *d; int size; - unsigned short reg = p->base + offset; + uintptr_t reg = p->base + offset; for (d = data; len > 0; d += size, reg += size, len -= size) { if (len >= 4) { size = 4; +#if defined(RTE_ARCH_X86) *(uint32_t *)d = inl(reg); +#else + *(uint32_t *)d = *(volatile uint32_t *)reg; +#endif } else if (len >= 2) { size = 2; +#if defined(RTE_ARCH_X86) *(uint16_t *)d = inw(reg); +#else + *(uint16_t *)d = *(volatile uint16_t *)reg; +#endif } else { size = 1; +#if defined(RTE_ARCH_X86) *d = inb(reg); - } - } #else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); + *d = *(volatile uint8_t *)reg; #endif + } + } } void pci_uio_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset) { -#if defined(RTE_ARCH_X86) const uint8_t *s; int size; - unsigned short reg = p->base + offset; + uintptr_t reg = p->base + offset; for (s = data; len > 0; s += size, reg += size, len -= size) { if (len >= 4) { size = 4; +#if defined(RTE_ARCH_X86) outl_p(*(const uint32_t *)s, reg); +#else + *(volatile uint32_t *)reg = *(const uint32_t *)s; +#endif } else if (len >= 2) { size = 2; +#if defined(RTE_ARCH_X86) outw_p(*(const uint16_t *)s, reg); +#else + *(volatile uint16_t *)reg = *(const uint16_t *)s; +#endif } else { size = 1; +#if defined(RTE_ARCH_X86) outb_p(*s, reg); - } - } #else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); + *(volatile uint8_t *)reg = *s; #endif + } + } } int pci_uio_ioport_unmap(struct rte_pci_ioport *p) { - RTE_SET_USED(p); #if defined(RTE_ARCH_X86) + RTE_SET_USED(p); /* FIXME close intr fd ? */ return 0; #else - return -1; + return munmap((void *)(uintptr_t)p->base, p->len); #endif } diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 10266f8f..f91b9242 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -602,7 +602,7 @@ pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no) /* try to find out IOMMU group for this device */ snprintf(linkname, sizeof(linkname), - SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr); + "%s/%s/iommu_group", pci_get_sysfs_path(), pci_addr); ret = readlink(linkname, filename, sizeof(filename)); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c index d9188fde..d54ded88 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c @@ -287,7 +287,10 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg) struct linger l; l.l_onoff = 1; l.l_linger = 60; - setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)); + + if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0) + RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option " + "on listen socket (%s)\n", strerror(errno)); ret = vfio_mp_sync_receive_request(conn_sock); @@ -396,7 +399,7 @@ pci_vfio_mp_sync_setup(void) snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync"); ret = rte_thread_setname(socket_thread, thread_name); if (ret) - RTE_LOG(ERR, EAL, + RTE_LOG(DEBUG, EAL, "Failed to set thread name for secondary processes!\n"); return 0; diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 18bd8e04..9f88530e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -197,3 +197,16 @@ int rte_sys_gettid(void) { return (int)syscall(SYS_gettid); } + +int rte_thread_setname(pthread_t id, const char *name) +{ + int ret = -1; +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 12) + ret = pthread_setname_np(id, name); +#endif +#endif + RTE_SET_USED(id); + RTE_SET_USED(name); + return ret; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c index f2abb7b6..afa32f5c 100644 --- a/lib/librte_eal/linuxapp/eal/eal_timer.c +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -222,8 +222,8 @@ rte_eal_hpet_init(int make_default) snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); ret = rte_thread_setname(msb_inc_thread_id, thread_name); if (ret != 0) - RTE_LOG(ERR, EAL, - "ERROR: Cannot set HPET timer thread name!\n"); + RTE_LOG(DEBUG, EAL, + "Cannot set HPET timer thread name!\n"); if (make_default) eal_timer_source = EAL_TIMER_HPET; diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c index 495eef9e..0b612bb1 100644 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -156,13 +156,27 @@ get_xen_memory_size(void) * Based on physical address to caculate MFN in Xen Dom0. */ phys_addr_t -rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) { - int mfn_id; + int mfn_id, i; uint64_t mfn, mfn_offset; struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct rte_memseg *memseg = mcfg->memseg; + /* find the memory segment owning the physical address */ + if (memseg_id == -1) { + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if ((phy_addr >= memseg[i].phys_addr) && + (phys_addr < memseg[i].phys_addr + + memseg[i].size)) { + memseg_id = i; + break; + } + } + if (memseg_id == -1) + return RTE_BAD_PHYS_ADDR; + } + mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; /*the MFN is contiguous in 2M */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 7e5e5984..2acdfd9b 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -113,7 +113,9 @@ struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); char pad0[10]; uint16_t data_off; /**< Start address of data in segment buffer. */ - char pad1[4]; + char pad1[2]; + uint8_t nb_segs; /**< Number of segments. */ + char pad4[1]; uint64_t ol_flags; /**< Offload features. */ char pad2[4]; uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 12503efa..05134673 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -154,3 +154,13 @@ DPDK_16.04 { rte_eal_primary_proc_alive; } DPDK_2.2; + +DPDK_16.07 { + global: + + pci_get_sysfs_path; + rte_keepalive_mark_sleep; + rte_keepalive_register_relay_callback; + rte_thread_setname; + +} DPDK_16.04; diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index c1d45a66..0d781e48 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -24,6 +24,15 @@ #define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 #endif +/* + * for kernels < 2.6.38 and backported patch that moves MSI-X entry definition + * to pci_regs.h Those kernels has PCI_MSIX_ENTRY_SIZE defined but not + * PCI_MSIX_ENTRY_CTRL_MASKBIT + */ +#ifndef PCI_MSIX_ENTRY_CTRL_MASKBIT +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ (!(defined(RHEL_RELEASE_CODE) && \ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index 72b26923..45a5720e 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -81,62 +81,10 @@ store_max_vfs(struct device *dev, struct device_attribute *attr, return err ? err : count; } -#ifdef RTE_PCI_CONFIG -static ssize_t -show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} - -static ssize_t -store_extended_tag(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} - -static ssize_t -show_max_read_request_size(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} - -static ssize_t -store_max_read_request_size(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} -#endif - static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs); -#ifdef RTE_PCI_CONFIG -static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag, - store_extended_tag); -static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR, - show_max_read_request_size, store_max_read_request_size); -#endif static struct attribute *dev_attrs[] = { &dev_attr_max_vfs.attr, -#ifdef RTE_PCI_CONFIG - &dev_attr_extended_tag.attr, - &dev_attr_max_read_request_size.attr, -#endif NULL, }; diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile index ac99d3f1..8cc6b61c 100644 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -47,7 +47,7 @@ MODULE_CFLAGS += -Wall -Werror ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ - | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1) + | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1) MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" endif diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index cf100b67..647ba3ce 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -14,16 +14,27 @@ #endif /* < 2.6.39 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) +#define HAVE_SIMPLIFIED_PERNET_OPERATIONS +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) #define sk_sleep(s) (s)->sk_sleep +#endif -#endif /* < 2.6.35 */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) +#define HAVE_CHANGE_CARRIER_CB +#endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) #define HAVE_IOV_ITER_MSGHDR #endif -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) ) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) #define HAVE_KIOCB_MSG_PARAM -#endif /* < 4.1.0 */ +#define HAVE_REBUILD_HEADER +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) +#define HAVE_TRANS_START_HELPER +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c index df224702..140a2a47 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -3300,12 +3300,13 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data) *data = E1000_READ_REG(hw, E1000_MPHY_DATA); /* Disable access to mPHY if it was originally disabled */ - if (locked) + if (locked) { ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS); + } return E1000_SUCCESS; } @@ -3365,12 +3366,13 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, E1000_WRITE_REG(hw, E1000_MPHY_DATA, data); /* Disable access to mPHY if it was originally disabled */ - if (locked) + if (locked) { ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS); + } return E1000_SUCCESS; } diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c index 017dfe16..c6f4130d 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -867,12 +867,13 @@ s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { /* Set KX4/KX/KR support according to speed requested */ autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) autoc |= IXGBE_AUTOC_KX4_SUPP; if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) && (hw->phy.smart_speed_active == false)) autoc |= IXGBE_AUTOC_KR_SUPP; + } if (speed & IXGBE_LINK_SPEED_1GB_FULL) autoc |= IXGBE_AUTOC_KX_SUPP; } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) && diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c index 8c1d2fe3..92fc9fc7 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -59,8 +59,6 @@ #undef CONFIG_DCA_MODULE char ixgbe_driver_name[] = "ixgbe"; -static const char ixgbe_driver_string[] = - "Intel(R) 10 Gigabit PCI Express Network Driver"; #define DRV_HW_PERF #ifndef CONFIG_IXGBE_NAPI @@ -79,8 +77,6 @@ static const char ixgbe_driver_string[] = #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG const char ixgbe_driver_version[] = DRV_VERSION; -static const char ixgbe_copyright[] = - "Copyright (c) 1999-2012 Intel Corporation."; /* ixgbe_pci_tbl - PCI Device ID Table * diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c index ae8133f3..59d15ca6 100644 --- a/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -26,6 +26,7 @@ #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/pci.h> #include <linux/kthread.h> #include <linux/rwsem.h> @@ -34,6 +35,8 @@ #include <net/netns/generic.h> #include <exec-env/rte_kni_common.h> + +#include "compat.h" #include "kni_dev.h" MODULE_LICENSE("Dual BSD/GPL"); @@ -104,7 +107,7 @@ struct kni_net { static int __net_init kni_init_net(struct net *net) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); #else struct kni_net *knet; @@ -115,7 +118,7 @@ static int __net_init kni_init_net(struct net *net) ret = -ENOMEM; return ret; } -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif /* Clear the bit of device in use */ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); @@ -123,7 +126,7 @@ static int __net_init kni_init_net(struct net *net) init_rwsem(&knet->kni_list_lock); INIT_LIST_HEAD(&knet->kni_list_head); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS return 0; #else ret = net_assign_generic(net, kni_net_id, knet); @@ -131,25 +134,25 @@ static int __net_init kni_init_net(struct net *net) kfree(knet); return ret; -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif } static void __net_exit kni_exit_net(struct net *net) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32) +#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); kfree(knet); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif } static struct pernet_operations kni_net_ops = { .init = kni_init_net, .exit = kni_exit_net, -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS .id = &kni_net_id, .size = sizeof(struct kni_net), -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif }; static int __init @@ -164,11 +167,11 @@ kni_init(void) return -EINVAL; } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS rc = register_pernet_subsys(&kni_net_ops); #else rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif if (rc) return -EPERM; @@ -186,11 +189,11 @@ kni_init(void) return 0; out: -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS unregister_pernet_subsys(&kni_net_ops); #else register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif return rc; } @@ -198,11 +201,11 @@ static void __exit kni_exit(void) { misc_deregister(&kni_misc); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS unregister_pernet_subsys(&kni_net_ops); #else register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif KNI_PRINT("####### DPDK kni module unloaded #######\n"); } @@ -542,6 +545,15 @@ kni_ioctl_create(struct net *net, if (pci) pci_dev_put(pci); + if (kni->lad_dev) + memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + else + /* + * Generate random mac address. eth_random_addr() is the newer + * version of generating mac address in linux kernel. + */ + random_ether_addr(net_dev->dev_addr); + ret = register_netdev(net_dev); if (ret) { KNI_ERR("error %i registering device \"%s\"\n", diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c index cfa83398..fc82193a 100644 --- a/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -38,6 +38,8 @@ #include <exec-env/rte_kni_common.h> #include <kni_fifo.h> + +#include "compat.h" #include "kni_dev.h" #define WD_TIMEOUT 5 /*jiffies */ @@ -69,15 +71,6 @@ kni_net_open(struct net_device *dev) struct rte_kni_request req; struct kni_dev *kni = netdev_priv(dev); - if (kni->lad_dev) - memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); - else - /* - * Generate random mac address. eth_random_addr() is the newer - * version of generating mac address in linux kernel. - */ - random_ether_addr(dev->dev_addr); - netif_start_queue(dev); memset(&req, 0, sizeof(req)); @@ -156,7 +149,8 @@ kni_net_rx_normal(struct kni_dev *kni) /* Transfer received packets to netif */ for (i = 0; i < num_rx; i++) { kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; - len = kva->data_len; + len = kva->pkt_len; + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; @@ -165,22 +159,41 @@ kni_net_rx_normal(struct kni_dev *kni) KNI_ERR("Out of mem, dropping pkts\n"); /* Update statistics */ kni->stats.rx_dropped++; + continue; } - else { - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); + + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + + if (kva->nb_segs == 1) { memcpy(skb_put(skb, len), data_kva, len); - skb->dev = dev; - skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + int nb_segs; + int kva_nb_segs = kva->nb_segs; - /* Call netif interface */ - netif_rx_ni(skb); + for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) { + memcpy(skb_put(skb, kva->data_len), + data_kva, kva->data_len); - /* Update statistics */ - kni->stats.rx_bytes += len; - kni->stats.rx_packets++; + if (!kva->next) + break; + + kva = kva->next - kni->mbuf_va + kni->mbuf_kva; + data_kva = kva->buf_addr + kva->data_off + - kni->mbuf_va + kni->mbuf_kva; + } } + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* Call netif interface */ + netif_rx_ni(skb); + + /* Update statistics */ + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; } /* Burst enqueue mbufs into free_q */ @@ -317,7 +330,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Copy mbufs to sk buffer and then call tx interface */ for (i = 0; i < num; i++) { kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; - len = kva->data_len; + len = kva->pkt_len; data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; @@ -338,20 +351,39 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) if (skb == NULL) { KNI_ERR("Out of mem, dropping pkts\n"); kni->stats.rx_dropped++; + continue; } - else { - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); + + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + + if (kva->nb_segs == 1) { memcpy(skb_put(skb, len), data_kva, len); - skb->dev = dev; - skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + int nb_segs; + int kva_nb_segs = kva->nb_segs; - kni->stats.rx_bytes += len; - kni->stats.rx_packets++; + for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) { + memcpy(skb_put(skb, kva->data_len), + data_kva, kva->data_len); - /* call tx interface */ - kni_net_tx(skb, dev); + if (!kva->next) + break; + + kva = kva->next - kni->mbuf_va + kni->mbuf_kva; + data_kva = kva->buf_addr + kva->data_off + - kni->mbuf_va + kni->mbuf_kva; + } } + + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; + + /* call tx interface */ + kni_net_tx(skb, dev); } /* enqueue all the mbufs from rx_q into free_q */ @@ -396,7 +428,12 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev) struct rte_kni_mbuf *pkt_kva = NULL; struct rte_kni_mbuf *pkt_va = NULL; - dev->trans_start = jiffies; /* save the timestamp */ + /* save the timestamp */ +#ifdef HAVE_TRANS_START_HELPER + netif_trans_update(dev); +#else + dev->trans_start = jiffies; +#endif /* Check if the length of skb is less than mbuf size */ if (skb->len > kni->mbuf_size) @@ -604,7 +641,7 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev, /* * Re-fill the eth header */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) +#ifdef HAVE_REBUILD_HEADER static int kni_net_rebuild_header(struct sk_buff *skb) { @@ -634,7 +671,7 @@ static int kni_net_set_mac(struct net_device *netdev, void *p) return 0; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#ifdef HAVE_CHANGE_CARRIER_CB static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) @@ -647,7 +684,7 @@ static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) static const struct header_ops kni_net_header_ops = { .create = kni_net_header, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) +#ifdef HAVE_REBUILD_HEADER .rebuild = kni_net_rebuild_header, #endif /* < 4.1.0 */ .cache = NULL, /* disable caching */ @@ -664,7 +701,7 @@ static const struct net_device_ops kni_net_netdev_ops = { .ndo_get_stats = kni_net_stats, .ndo_tx_timeout = kni_net_tx_timeout, .ndo_set_mac_address = kni_net_set_mac, -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#ifdef HAVE_CHANGE_CARRIER_CB .ndo_change_carrier = kni_net_change_carrier, #endif }; |