diff options
Diffstat (limited to 'lib/librte_eal')
55 files changed, 840 insertions, 628 deletions
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index 9054ad61..698fa0a1 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -40,8 +40,6 @@ VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) CFLAGS += -I$(SRCDIR)/include CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include -CFLAGS += -I$(RTE_SDK)/lib/librte_ring -CFLAGS += -I$(RTE_SDK)/lib/librte_mempool CFLAGS += $(WERROR_FLAGS) -O3 LDLIBS += -lexecinfo diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 06bfd4e0..a0c8f8c8 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -605,7 +605,7 @@ rte_eal_init(int argc, char **argv) /* Set thread_name for aid in debugging. */ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "lcore-slave-%d", i); - pthread_set_name_np(lcore_config[i].thread_id, thread_name); + rte_thread_setname(lcore_config[i].thread_id, thread_name); } /* diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c index 907fbfa7..5fbc17c5 100644 --- a/lib/librte_eal/bsdapp/eal/eal_debug.c +++ b/lib/librte_eal/bsdapp/eal/eal_debug.c @@ -77,9 +77,6 @@ void __rte_panic(const char *funcname, const char *format, ...) { va_list ap; - /* disable history */ - rte_log_set_history(0); - rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); va_start(ap, format); rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); @@ -98,9 +95,6 @@ rte_exit(int exit_code, const char *format, ...) { va_list ap; - /* disable history */ - rte_log_set_history(0); - if (exit_code != 0) RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" " Cause: ", exit_code); diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c index 2d16d782..374b68f2 100644 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -278,6 +278,11 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) /* get subsystem_device id */ dev->id.subsystem_device_id = conf->pc_subdevice; + /* get class id */ + dev->id.class_id = (conf->pc_class << 16) | + (conf->pc_subclass << 8) | + (conf->pc_progif); + /* TODO: get max_vfs */ dev->max_vfs = 0; @@ -422,7 +427,7 @@ int rte_eal_pci_read_config(const struct rte_pci_device *dev, goto error; } - fd = open("/dev/pci", O_RDONLY); + fd = open("/dev/pci", O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); goto error; @@ -466,7 +471,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *dev, memcpy(&pi.pi_data, buf, len); - fd = open("/dev/pci", O_RDONLY); + fd = open("/dev/pci", O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); goto error; diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c index 9a034373..1b8cd8a6 100644 --- a/lib/librte_eal/bsdapp/eal/eal_thread.c +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -199,3 +199,10 @@ int rte_sys_gettid(void) thr_self(&lwpid); return (int)lwpid; } + +int rte_thread_setname(pthread_t id, const char *name) +{ + /* this BSD function returns no error */ + pthread_set_name_np(id, name); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 58c2951e..1852c4a4 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -151,3 +151,13 @@ DPDK_16.04 { rte_eal_primary_proc_alive; } DPDK_2.2; + +DPDK_16.07 { + global: + + pci_get_sysfs_path; + rte_keepalive_mark_sleep; + rte_keepalive_register_relay_callback; + rte_thread_setname; + +} DPDK_16.04; diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index 2bfe54a1..e403717b 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -58,7 +58,7 @@ rte_eal_parse_devargs_str(const char *devargs_str, return -1; *drvname = strdup(devargs_str); - if (drvname == NULL) + if (*drvname == NULL) return -1; /* set the first ',' to '\0' to split name and arguments */ diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index a4263ba5..2cd41320 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -104,7 +104,7 @@ rte_eal_cpu_init(void) RTE_LOG(DEBUG, EAL, "Support maximum %u logical core(s) by configuration.\n", RTE_MAX_LCORE); - RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count); + RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count); return 0; } diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index 1ae8de70..7916c781 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -31,54 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <string.h> #include <stdio.h> #include <stdint.h> #include <stdarg.h> -#include <sys/types.h> #include <stdlib.h> -#include <unistd.h> -#include <inttypes.h> -#include <errno.h> -#include <sys/queue.h> #include <rte_log.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_launch.h> -#include <rte_common.h> -#include <rte_cycles.h> -#include <rte_eal.h> #include <rte_per_lcore.h> -#include <rte_lcore.h> -#include <rte_atomic.h> -#include <rte_debug.h> -#include <rte_spinlock.h> -#include <rte_branch_prediction.h> -#include <rte_ring.h> -#include <rte_mempool.h> #include "eal_private.h" -#define LOG_ELT_SIZE 2048 - -#define LOG_HISTORY_MP_NAME "log_history" - -STAILQ_HEAD(log_history_list, log_history); - -/** - * The structure of a message log in the log history. - */ -struct log_history { - STAILQ_ENTRY(log_history) next; - unsigned size; - char buf[0]; -}; - -static struct rte_mempool *log_history_mp = NULL; -static unsigned log_history_size = 0; -static struct log_history_list log_history; - /* global log structure */ struct rte_logs rte_logs = { .type = ~0, @@ -86,10 +48,7 @@ struct rte_logs rte_logs = { .file = NULL, }; -static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER; -static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER; static FILE *default_log_stream; -static int history_enabled = 1; /** * This global structure stores some informations about the message @@ -98,66 +57,24 @@ static int history_enabled = 1; struct log_cur_msg { uint32_t loglevel; /**< log level - see rte_log.h */ uint32_t logtype; /**< log type - see rte_log.h */ -} __rte_cache_aligned; -static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */ +}; + /* per core log */ +static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg); /* default logs */ int -rte_log_add_in_history(const char *buf, size_t size) +rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused) { - struct log_history *hist_buf = NULL; - static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf); - void *obj; - - if (history_enabled == 0) - return 0; - - rte_spinlock_lock(&log_list_lock); - - /* get a buffer for adding in history */ - if (log_history_size > RTE_LOG_HISTORY) { - hist_buf = STAILQ_FIRST(&log_history); - if (hist_buf) { - STAILQ_REMOVE_HEAD(&log_history, next); - log_history_size--; - } - } - else { - if (rte_mempool_mc_get(log_history_mp, &obj) < 0) - obj = NULL; - hist_buf = obj; - } - - /* no buffer */ - if (hist_buf == NULL) { - rte_spinlock_unlock(&log_list_lock); - return -ENOBUFS; - } - - /* not enough room for msg, buffer go back in mempool */ - if (size >= hist_buf_size) { - rte_mempool_mp_put(log_history_mp, hist_buf); - rte_spinlock_unlock(&log_list_lock); - return -ENOBUFS; - } - - /* add in history */ - memcpy(hist_buf->buf, buf, size); - hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0'; - hist_buf->size = size; - STAILQ_INSERT_TAIL(&log_history, hist_buf, next); - log_history_size++; - rte_spinlock_unlock(&log_list_lock); - return 0; } void rte_log_set_history(int enable) { - history_enabled = enable; + if (enable) + RTE_LOG(WARNING, EAL, "The log history is deprecated.\n"); } /* Change the stream that will be used by logging system */ @@ -205,63 +122,19 @@ rte_get_log_type(void) /* get the current loglevel for the message beeing processed */ int rte_log_cur_msg_loglevel(void) { - unsigned lcore_id; - lcore_id = rte_lcore_id(); - if (lcore_id >= RTE_MAX_LCORE) - return rte_get_log_level(); - return log_cur_msg[lcore_id].loglevel; + return RTE_PER_LCORE(log_cur_msg).loglevel; } /* get the current logtype for the message beeing processed */ int rte_log_cur_msg_logtype(void) { - unsigned lcore_id; - lcore_id = rte_lcore_id(); - if (lcore_id >= RTE_MAX_LCORE) - return rte_get_log_type(); - return log_cur_msg[lcore_id].logtype; + return RTE_PER_LCORE(log_cur_msg).logtype; } /* Dump log history to file */ void -rte_log_dump_history(FILE *out) +rte_log_dump_history(FILE *out __rte_unused) { - struct log_history_list tmp_log_history; - struct log_history *hist_buf; - unsigned i; - - /* only one dump at a time */ - rte_spinlock_lock(&log_dump_lock); - - /* save list, and re-init to allow logging during dump */ - rte_spinlock_lock(&log_list_lock); - tmp_log_history = log_history; - STAILQ_INIT(&log_history); - log_history_size = 0; - rte_spinlock_unlock(&log_list_lock); - - for (i=0; i<RTE_LOG_HISTORY; i++) { - - /* remove one message from history list */ - hist_buf = STAILQ_FIRST(&tmp_log_history); - - if (hist_buf == NULL) - break; - - STAILQ_REMOVE_HEAD(&tmp_log_history, next); - - /* write on stdout */ - if (fwrite(hist_buf->buf, hist_buf->size, 1, out) == 0) { - rte_mempool_mp_put(log_history_mp, hist_buf); - break; - } - - /* put back message structure in pool */ - rte_mempool_mp_put(log_history_mp, hist_buf); - } - fflush(out); - - rte_spinlock_unlock(&log_dump_lock); } /* @@ -273,17 +146,13 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) { int ret; FILE *f = rte_logs.file; - unsigned lcore_id; if ((level > rte_logs.level) || !(logtype & rte_logs.type)) return 0; /* save loglevel and logtype in a global per-lcore variable */ - lcore_id = rte_lcore_id(); - if (lcore_id < RTE_MAX_LCORE) { - log_cur_msg[lcore_id].loglevel = level; - log_cur_msg[lcore_id].logtype = logtype; - } + RTE_PER_LCORE(log_cur_msg).loglevel = level; + RTE_PER_LCORE(log_cur_msg).logtype = logtype; ret = vfprintf(f, format, ap); fflush(f); @@ -308,30 +177,17 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...) } /* - * called by environment-specific log init function to initialize log - * history + * called by environment-specific log init function */ int rte_eal_common_log_init(FILE *default_log) { - STAILQ_INIT(&log_history); - - /* reserve RTE_LOG_HISTORY*2 elements, so we can dump and - * keep logging during this time */ - log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2, - LOG_ELT_SIZE, 0, 0, - NULL, NULL, - NULL, NULL, - SOCKET_ID_ANY, 0); - - if ((log_history_mp == NULL) && - ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){ - RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n", - __func__); - return -1; - } - default_log_stream = default_log; rte_openlog_stream(default_log); + +#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG + RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n"); +#endif + return 0; } diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 711c8457..5d28341f 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -119,6 +119,9 @@ find_heap_max_free_elem(int *s, unsigned align) } } + if (len < MALLOC_ELEM_OVERHEAD + align) + return 0; + return len - MALLOC_ELEM_OVERHEAD - align; } @@ -126,6 +129,7 @@ static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { + struct rte_memzone *mz; struct rte_mem_config *mcfg; size_t requested_len; int socket, i; @@ -148,6 +152,13 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } + if (strlen(name) >= sizeof(mz->name) - 1) { + RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n", + __func__, name); + rte_errno = EEXIST; + return NULL; + } + /* if alignment is not a power of two */ if (align && !rte_is_power_of_2(align)) { RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__, @@ -189,8 +200,13 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, if (len == 0) { if (bound != 0) requested_len = bound; - else + else { requested_len = find_heap_max_free_elem(&socket_id, align); + if (requested_len == 0) { + rte_errno = ENOMEM; + return NULL; + } + } } if (socket_id == SOCKET_ID_ANY) @@ -223,7 +239,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ - struct rte_memzone *mz = get_next_free_memzone(); + mz = get_next_free_memzone(); if (mz == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " @@ -321,15 +337,19 @@ rte_memzone_free(const struct rte_memzone *mz) idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); idx = idx / sizeof(struct rte_memzone); - addr = mcfg->memzone[idx].addr; #ifdef RTE_LIBRTE_IVSHMEM /* * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot * free it. */ - if (mcfg->memzone[idx].ioremap_addr != 0) - ret = -EINVAL; + if (mcfg->memzone[idx].ioremap_addr != 0) { + rte_rwlock_write_unlock(&mcfg->mlock); + return -EINVAL; + } #endif + + addr = mcfg->memzone[idx].addr; + if (addr == NULL) ret = -EINVAL; else if (mcfg->memzone_cnt == 0) { diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 2b418d52..3efc90f0 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -139,7 +139,11 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->syslog_facility = LOG_DAEMON; /* default value from build option */ +#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG + internal_cfg->log_level = RTE_LOG_INFO; +#else internal_cfg->log_level = RTE_LOG_LEVEL; +#endif internal_cfg->xen_dom0_support = 0; diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c index 40f49229..7248c38b 100644 --- a/lib/librte_eal/common/eal_common_pci.c +++ b/lib/librte_eal/common/eal_common_pci.c @@ -85,6 +85,19 @@ struct pci_driver_list pci_driver_list; struct pci_device_list pci_device_list; +#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" + +const char *pci_get_sysfs_path(void) +{ + const char *path = NULL; + + path = getenv("SYSFS_PCI_DEVICES"); + if (path == NULL) + return SYSFS_PCI_DEVICES; + + return path; +} + static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) { struct rte_devargs *devargs; @@ -162,23 +175,26 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d if (id_table->subsystem_device_id != dev->id.subsystem_device_id && id_table->subsystem_device_id != PCI_ANY_ID) continue; + if (id_table->class_id != dev->id.class_id && + id_table->class_id != RTE_CLASS_ANY_ID) + continue; struct rte_pci_addr *loc = &dev->addr; - RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, loc->function, dev->numa_node); - RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); - /* no initialization when blacklisted, return without error */ if (dev->devargs != NULL && dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { - RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); + RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n"); return 1; } + RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { /* map resources for devices that use igb_uio */ ret = rte_eal_pci_map_device(dev); diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c index f062e81d..367a6816 100644 --- a/lib/librte_eal/common/eal_common_pci_uio.c +++ b/lib/librte_eal/common/eal_common_pci_uio.c @@ -53,7 +53,7 @@ EAL_REGISTER_TAILQ(rte_uio_tailq) static int pci_uio_map_secondary(struct rte_pci_device *dev) { - int fd, i; + int fd, i, j; struct mapped_pci_resource *uio_res; struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); @@ -85,6 +85,16 @@ pci_uio_map_secondary(struct rte_pci_device *dev) "Cannot mmap device resource file %s to address: %p\n", uio_res->maps[i].path, uio_res->maps[i].addr); + if (mapaddr != MAP_FAILED) { + /* unmap addrs correctly mapped */ + for (j = 0; j < i; j++) + pci_unmap_resource( + uio_res->maps[j].addr, + (size_t)uio_res->maps[j].size); + /* unmap addr wrongly mapped */ + pci_unmap_resource(mapaddr, + (size_t)uio_res->maps[i].size); + } return -1; } } @@ -159,7 +169,8 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res) for (i = 0; i != uio_res->nb_maps; i++) { pci_unmap_resource(uio_res->maps[i].addr, (size_t)uio_res->maps[i].size); - rte_free(uio_res->maps[i].path); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(uio_res->maps[i].path); } } diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 2342fa16..857dc3ea 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -49,9 +49,6 @@ int rte_eal_memzone_init(void); /** * Common log initialization function (private to eal). * - * Called by environment-specific log initialization function to initialize - * log history. - * * @param default_log * The default log stream to be used. * @return diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h index 988125b3..da6c233a 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -323,12 +323,6 @@ rte_memcpy(void *dst, const void *src, size_t n) return memcpy(dst, src, n); } -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) -{ - return memcpy(dst, src, n); -} - #endif /* RTE_ARCH_ARM_NEON_MEMCPY */ #ifdef __cplusplus diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h index 917cdc1b..5db66b63 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h @@ -80,12 +80,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src) #define rte_memcpy(d, s, n) memcpy((d), (s), (n)) -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) -{ - return memcpy(dst, src, n); -} - #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h index 9b5b37ef..e606957c 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h @@ -80,12 +80,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src) #define rte_memcpy(d, s, n) memcpy((d), (s), (n)) -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) -{ - return memcpy(dst, src, n); -} - #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index f463ab30..413035e7 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -363,71 +363,26 @@ rte_mov128(uint8_t *dst, const uint8_t *src) } /** - * Copy 256 bytes from one location to another, - * locations should not overlap. - */ -static inline void -rte_mov256(uint8_t *dst, const uint8_t *src) -{ - rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); - rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); - rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); - rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); - rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32); - rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32); - rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32); - rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32); -} - -/** - * Copy 64-byte blocks from one location to another, - * locations should not overlap. - */ -static inline void -rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n) -{ - __m256i ymm0, ymm1; - - while (n >= 64) { - ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); - n -= 64; - ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); - src = (const uint8_t *)src + 64; - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); - dst = (uint8_t *)dst + 64; - } -} - -/** - * Copy 256-byte blocks from one location to another, + * Copy 128-byte blocks from one location to another, * locations should not overlap. */ static inline void -rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n) +rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) { - __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + __m256i ymm0, ymm1, ymm2, ymm3; - while (n >= 256) { + while (n >= 128) { ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); - n -= 256; + n -= 128; ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); - ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32)); - ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32)); - ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32)); - ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32)); - src = (const uint8_t *)src + 256; + src = (const uint8_t *)src + 128; _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7); - dst = (uint8_t *)dst + 256; + dst = (uint8_t *)dst + 128; } } @@ -466,51 +421,56 @@ rte_memcpy(void *dst, const void *src, size_t n) } /** - * Fast way when copy size doesn't exceed 512 bytes + * Fast way when copy size doesn't exceed 256 bytes */ if (n <= 32) { rte_mov16((uint8_t *)dst, (const uint8_t *)src); - rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 48) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst + 16, (const uint8_t *)src + 16); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); return ret; } if (n <= 64) { rte_mov32((uint8_t *)dst, (const uint8_t *)src); - rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); return ret; } - if (n <= 512) { - if (n >= 256) { - n -= 256; - rte_mov256((uint8_t *)dst, (const uint8_t *)src); - src = (const uint8_t *)src + 256; - dst = (uint8_t *)dst + 256; - } + if (n <= 256) { if (n >= 128) { n -= 128; rte_mov128((uint8_t *)dst, (const uint8_t *)src); src = (const uint8_t *)src + 128; dst = (uint8_t *)dst + 128; } +COPY_BLOCK_128_BACK31: if (n >= 64) { n -= 64; rte_mov64((uint8_t *)dst, (const uint8_t *)src); src = (const uint8_t *)src + 64; dst = (uint8_t *)dst + 64; } -COPY_BLOCK_64_BACK31: if (n > 32) { rte_mov32((uint8_t *)dst, (const uint8_t *)src); - rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); return ret; } if (n > 0) { - rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); } return ret; } /** - * Make store aligned when copy size exceeds 512 bytes + * Make store aligned when copy size exceeds 256 bytes */ dstofss = (uintptr_t)dst & 0x1F; if (dstofss > 0) { @@ -522,35 +482,19 @@ COPY_BLOCK_64_BACK31: } /** - * Copy 256-byte blocks. - * Use copy block function for better instruction order control, - * which is important when load is unaligned. + * Copy 128-byte blocks */ - rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n); + rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n); bits = n; - n = n & 255; + n = n & 127; bits -= n; src = (const uint8_t *)src + bits; dst = (uint8_t *)dst + bits; /** - * Copy 64-byte blocks. - * Use copy block function for better instruction order control, - * which is important when load is unaligned. - */ - if (n >= 64) { - rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n); - bits = n; - n = n & 63; - bits -= n; - src = (const uint8_t *)src + bits; - dst = (uint8_t *)dst + bits; - } - - /** * Copy whatever left */ - goto COPY_BLOCK_64_BACK31; + goto COPY_BLOCK_128_BACK31; } #else /* RTE_MACHINE_CPUFLAG */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h index d9356419..0649f794 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -50,11 +50,10 @@ void rte_xend(void) asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory"); } -static __attribute__((__always_inline__)) inline -void rte_xabort(const unsigned int status) -{ - asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); -} +/* not an inline function to workaround a clang bug with -O0 */ +#define rte_xabort(status) do { \ + asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); \ +} while (0) static __attribute__((__always_inline__)) inline int rte_xtest(void) diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h index 03e84773..afb0afe4 100644 --- a/lib/librte_eal/common/include/generic/rte_memcpy.h +++ b/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -134,11 +134,4 @@ rte_memcpy(void *dst, const void *src, size_t n); #endif /* __DOXYGEN__ */ -/* - * memcpy() function used by rte_memcpy macro - */ -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); - - #endif /* _RTE_MEMCPY_H_ */ diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h index 94129fab..cab6fb4c 100644 --- a/lib/librte_eal/common/include/rte_debug.h +++ b/lib/librte_eal/common/include/rte_debug.h @@ -43,6 +43,9 @@ * the implementation is architecture-specific. */ +#include "rte_log.h" +#include "rte_branch_prediction.h" + #ifdef __cplusplus extern "C" { #endif @@ -76,8 +79,13 @@ void rte_dump_registers(void); #define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") #define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) +#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG +#define RTE_ASSERT(exp) RTE_VERIFY(exp) +#else +#define RTE_ASSERT(exp) do {} while (0) +#endif #define RTE_VERIFY(exp) do { \ - if (!(exp)) \ + if (unlikely(!(exp))) \ rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ } while (0) diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h index 10dac2e0..88ad8e48 100644 --- a/lib/librte_eal/common/include/rte_keepalive.h +++ b/lib/librte_eal/common/include/rte_keepalive.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright 2015 Intel Shannon Ltd. All rights reserved. + * Copyright 2015-2016 Intel Shannon Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -48,17 +48,47 @@ #define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE #endif +enum rte_keepalive_state { + RTE_KA_STATE_UNUSED = 0, + RTE_KA_STATE_ALIVE = 1, + RTE_KA_STATE_MISSING = 4, + RTE_KA_STATE_DEAD = 2, + RTE_KA_STATE_GONE = 3, + RTE_KA_STATE_DOZING = 5, + RTE_KA_STATE_SLEEP = 6 +}; + /** * Keepalive failure callback. * * Receives a data pointer passed to rte_keepalive_create() and the id of the * failed core. + * @param data Data pointer passed to rte_keepalive_create() + * @param id_core ID of the core that has failed */ typedef void (*rte_keepalive_failure_callback_t)( void *data, const int id_core); /** + * Keepalive relay callback. + * + * Receives a data pointer passed to rte_keepalive_register_relay_callback(), + * the id of the core for which state is to be forwarded, and details of the + * current core state. + * @param data Data pointer passed to rte_keepalive_register_relay_callback() + * @param id_core ID of the core for which state is being reported + * @param core_state The current state of the core + * @param Timestamp of when core was last seen alive + */ +typedef void (*rte_keepalive_relay_callback_t)( + void *data, + const int id_core, + enum rte_keepalive_state core_state, + uint64_t last_seen + ); + +/** * Keepalive state structure. * @internal */ @@ -105,4 +135,35 @@ void rte_keepalive_register_core(struct rte_keepalive *keepcfg, void rte_keepalive_mark_alive(struct rte_keepalive *keepcfg); +/** + * Per-core sleep-time indication. + * @param *keepcfg + * Keepalive structure pointer + * + * If CPU idling is enabled, this function needs to be called from within + * the main process loop of the LCore going to sleep, in order to avoid + * the LCore being mis-detected as dead. + */ +void +rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg); + +/** + * Registers a 'live core' callback. + * + * The complement of the 'dead core' callback. This is called when a + * core is known to be alive, and is intended for cases when an app + * needs to know 'liveness' beyond just knowing when a core has died. + * + * @param *keepcfg + * Keepalive structure pointer + * @param callback + * Function called upon detection of a dead core. + * @param data + * Data pointer to be passed to function callback. + */ +void +rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg, + rte_keepalive_relay_callback_t callback, + void *data); + #endif /* _KEEPALIVE_H_ */ diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index ac151302..fe7b5865 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -250,23 +250,16 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp); /** * Set thread names. * - * Macro to wrap `pthread_setname_np()` with a glibc version check. - * Only glibc >= 2.12 supports this feature. + * @note It fails with glibc < 2.12. * - * This macro only used for Linux, BSD does direct libc call. - * BSD libc version of function is `pthread_set_name_np()`. + * @param id + * Thread id. + * @param name + * Thread name to set. + * @return + * On success, return 0; otherwise return a negative value. */ -#if defined(__DOXYGEN__) -#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) -#endif - -#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) -#if __GLIBC_PREREQ(2, 12) -#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) -#else -#define rte_thread_setname(...) 0 -#endif -#endif +int rte_thread_setname(pthread_t id, const char *name); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index 2e47e7f6..b1add04c 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -42,6 +42,8 @@ * This file provides a log API to RTE applications. */ +#include "rte_common.h" /* for __rte_deprecated macro */ + #ifdef __cplusplus extern "C" { #endif @@ -179,22 +181,27 @@ int rte_log_cur_msg_loglevel(void); int rte_log_cur_msg_logtype(void); /** + * @deprecated * Enable or disable the history (enabled by default) * * @param enable * true to enable, or 0 to disable history. */ +__rte_deprecated void rte_log_set_history(int enable); /** + * @deprecated * Dump the log history to a file * * @param f * A pointer to a file for output */ +__rte_deprecated void rte_log_dump_history(FILE *f); /** + * @deprecated * Add a log message to the history. * * This function can be called from a user-defined log stream. It adds @@ -209,6 +216,7 @@ void rte_log_dump_history(FILE *f); * - 0: Success. * - (-ENOBUFS) if there is no room to store the message. */ +__rte_deprecated int rte_log_add_in_history(const char *buf, size_t size); /** diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index f8dbece0..06611093 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -200,21 +200,22 @@ unsigned rte_memory_get_nrank(void); int rte_xen_dom0_supported(void); /**< Internal use only - phys to virt mapping for xen */ -phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t); +phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t); /** * Return the physical address of elt, which is an element of the pool mp. * * @param memseg_id - * The mempool is from which memory segment. + * Identifier of the memory segment owning the physical address. If + * set to -1, find it automatically. * @param phy_addr * physical address of elt. * * @return - * The physical address or error. + * The physical address or RTE_BAD_PHYS_ADDR on error. */ static inline phys_addr_t -rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) { if (rte_xen_dom0_supported()) return rte_xen_mem_phy2mch(memseg_id, phy_addr); @@ -250,7 +251,7 @@ static inline int rte_xen_dom0_supported(void) } static inline phys_addr_t -rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr) +rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr) { return phy_addr; } diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index e692094e..fa749626 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -91,7 +91,7 @@ extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */ /** Pathname of PCI devices directory. */ -#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" +const char *pci_get_sysfs_path(void); /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 @@ -105,9 +105,6 @@ extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. /** Nb. of values in PCI resource format. */ #define PCI_RESOURCE_FMT_NVAL 3 -/** IO resource type: memory address space */ -#define IORESOURCE_MEM 0x00000200 - /** * A structure describing a PCI resource. */ @@ -125,6 +122,7 @@ struct rte_pci_resource { * table of these IDs for each device that it supports. */ struct rte_pci_id { + uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */ uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ @@ -170,10 +168,12 @@ struct rte_pci_device { /** Any PCI device identifier (vendor, device, ...) */ #define PCI_ANY_ID (0xffff) +#define RTE_CLASS_ANY_ID (0xffffff) #ifdef __cplusplus /** C++ macro used to help building up tables of device IDs */ #define RTE_PCI_DEVICE(vend, dev) \ + RTE_CLASS_ANY_ID, \ (vend), \ (dev), \ PCI_ANY_ID, \ @@ -181,6 +181,7 @@ struct rte_pci_device { #else /** Macro used to help building up tables of device IDs */ #define RTE_PCI_DEVICE(vend, dev) \ + .class_id = RTE_CLASS_ANY_ID, \ .vendor_id = (vend), \ .device_id = (dev), \ .subsystem_vendor_id = PCI_ANY_ID, \ @@ -213,8 +214,6 @@ struct rte_pci_driver { /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ #define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device driver must be registered several times until failure - deprecated */ -#pragma GCC poison RTE_PCI_DRV_MULTIPLE /** Device needs to be unbound even if no module is provided */ #define RTE_PCI_DRV_FORCE_UNBIND 0x0004 /** Device driver supports link state interrupt */ @@ -520,15 +519,17 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device, struct rte_pci_ioport { struct rte_pci_device *dev; uint64_t base; + uint64_t len; /* only filled for memory mapped ports */ }; /** - * Initialises a rte_pci_ioport object for a pci device io resource. + * Initialize a rte_pci_ioport object for a pci device io resource. + * * This object is then used to gain access to those io resources (see below). * * @param dev - * A pointer to a rte_pci_device structure describing the device. - * to use + * A pointer to a rte_pci_device structure describing the device + * to use. * @param bar * Index of the io pci resource we want to access. * @param p @@ -544,6 +545,8 @@ int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, * * @param p * The rte_pci_ioport object to be uninitialized. + * @return + * 0 on success, negative on error. */ int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); @@ -577,20 +580,6 @@ void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, void rte_eal_pci_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset); -#ifdef RTE_PCI_CONFIG -#include <rte_common.h> -/** - * Set special config space registers for performance purpose. - * It is deprecated, as all configurations have been moved into - * each PMDs respectively. - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - */ -void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated; -#endif /* RTE_PCI_CONFIG */ - #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h index cf7b5487..af39fbbd 100644 --- a/lib/librte_eal/common/include/rte_pci_dev_ids.h +++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h @@ -63,11 +63,12 @@ * This file contains a list of the PCI device IDs recognised by DPDK, which * can be used to fill out an array of structures describing the devices. * - * Currently four families of devices are recognised: those supported by the - * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio - * driver which is a para virtualization driver running in guest virtual machine. - * The inclusion of these in an array built using this file depends on the - * definition of + * Currently five families of devices are recognised: those supported by the + * IGB driver, by EM driver, those supported by the IXGBE driver, those + * supported by the BNXT driver, and by virtio driver which is a para + * virtualization driver running in guest virtual machine. The inclusion of + * these in an array built using this file depends on the definition of + * RTE_PCI_DEV_ID_DECL_BNXT * RTE_PCI_DEV_ID_DECL_EM * RTE_PCI_DEV_ID_DECL_IGB * RTE_PCI_DEV_ID_DECL_IGBVF @@ -152,6 +153,10 @@ #define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev) #endif +#ifndef RTE_PCI_DEV_ID_DECL_BNXT +#define RTE_PCI_DEV_ID_DECL_BNXT(vend, dev) +#endif + #ifndef PCI_VENDOR_ID_INTEL /** Vendor ID used by Intel devices */ #define PCI_VENDOR_ID_INTEL 0x8086 @@ -446,12 +451,14 @@ RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) #define IXGBE_DEV_ID_X550EM_A_KR 0x15C2 #define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3 #define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 -#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15C6 -#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15C7 +#define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6 +#define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7 #define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 #define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA #define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC #define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE +#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15E4 +#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15E5 #define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA #define IXGBE_DEV_ID_X550EM_X_KR 0x15AB @@ -506,12 +513,14 @@ RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) @@ -532,12 +541,16 @@ RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) #define I40E_DEV_ID_20G_KR2 0x1587 #define I40E_DEV_ID_20G_KR2_A 0x1588 #define I40E_DEV_ID_10G_BASE_T4 0x1589 +#define I40E_DEV_ID_25G_B 0x158A +#define I40E_DEV_ID_25G_SFP28 0x158B #define I40E_DEV_ID_X722_A0 0x374C #define I40E_DEV_ID_KX_X722 0x37CE #define I40E_DEV_ID_QSFP_X722 0x37CF #define I40E_DEV_ID_SFP_X722 0x37D0 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 +#define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_QSFP_I_X722 0x37D4 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU) @@ -550,12 +563,16 @@ RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_25G_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_25G_SFP28) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_X722) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_X722) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722) RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_I_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_I_X722) /*************** Physical FM10K devices from fm10k_type.h ***************/ @@ -686,6 +703,30 @@ RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF) RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF) #endif +/****************** Broadcom bnxt devices ******************/ + +#define BROADCOM_DEV_ID_57301 0x16c8 +#define BROADCOM_DEV_ID_57302 0x16c9 +#define BROADCOM_DEV_ID_57304_PF 0x16ca +#define BROADCOM_DEV_ID_57304_VF 0x16cb +#define BROADCOM_DEV_ID_57402 0x16d0 +#define BROADCOM_DEV_ID_57404 0x16d1 +#define BROADCOM_DEV_ID_57406_PF 0x16d2 +#define BROADCOM_DEV_ID_57406_VF 0x16d3 +#define BROADCOM_DEV_ID_57406_MF 0x16d4 +#define BROADCOM_DEV_ID_57314 0x16df + +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57301) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57302) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57304_PF) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57304_VF) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57402) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57404) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57406_PF) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57406_VF) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57406_MF) +RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57314) + /* * Undef all RTE_PCI_DEV_ID_DECL_* here. */ @@ -702,3 +743,4 @@ RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF) #undef RTE_PCI_DEV_ID_DECL_VMXNET3 #undef RTE_PCI_DEV_ID_DECL_FM10K #undef RTE_PCI_DEV_ID_DECL_FM10KVF +#undef RTE_PCI_DEV_ID_DECL_BNXT diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index f8ea6d73..dbe09975 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -60,7 +60,7 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 4 +#define RTE_VER_MONTH 7 /** * Patch level number i.e. the z in yy.mm.z @@ -70,14 +70,14 @@ extern "C" { /** * Extra string to be appended to version number */ -#define RTE_VER_SUFFIX "" +#define RTE_VER_SUFFIX "-rc" /** * Patch release number * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 16 +#define RTE_VER_RELEASE 1 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c index 23363ec1..9765d1bd 100644 --- a/lib/librte_eal/common/rte_keepalive.c +++ b/lib/librte_eal/common/rte_keepalive.c @@ -42,12 +42,8 @@ struct rte_keepalive { /** Core Liveness. */ - enum rte_keepalive_state { - ALIVE = 1, - MISSING = 0, - DEAD = 2, - GONE = 3 - } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES]; + enum rte_keepalive_state __rte_cache_aligned state_flags[ + RTE_KEEPALIVE_MAXCORES]; /** Last-seen-alive timestamps */ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES]; @@ -68,6 +64,15 @@ struct rte_keepalive { void *callback_data; uint64_t tsc_initial; uint64_t tsc_mhz; + + /** Core state relay handler. */ + rte_keepalive_relay_callback_t relay_callback; + + /** + * Core state relay handler app data. + * Pointer is passed to live core handler. + */ + void *relay_callback_data; }; static void @@ -92,16 +97,18 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, continue; switch (keepcfg->state_flags[idx_core]) { - case ALIVE: /* Alive */ - keepcfg->state_flags[idx_core] = MISSING; + case RTE_KA_STATE_UNUSED: + break; + case RTE_KA_STATE_ALIVE: /* Alive */ + keepcfg->state_flags[idx_core] = RTE_KA_STATE_MISSING; keepcfg->last_alive[idx_core] = rte_rdtsc(); break; - case MISSING: /* MIA */ + case RTE_KA_STATE_MISSING: /* MIA */ print_trace("Core MIA. ", keepcfg, idx_core); - keepcfg->state_flags[idx_core] = DEAD; + keepcfg->state_flags[idx_core] = RTE_KA_STATE_DEAD; break; - case DEAD: /* Dead */ - keepcfg->state_flags[idx_core] = GONE; + case RTE_KA_STATE_DEAD: /* Dead */ + keepcfg->state_flags[idx_core] = RTE_KA_STATE_GONE; print_trace("Core died. ", keepcfg, idx_core); if (keepcfg->callback) keepcfg->callback( @@ -109,9 +116,22 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, idx_core ); break; - case GONE: /* Buried */ + case RTE_KA_STATE_GONE: /* Buried */ + break; + case RTE_KA_STATE_DOZING: /* Core going idle */ + keepcfg->state_flags[idx_core] = RTE_KA_STATE_SLEEP; + keepcfg->last_alive[idx_core] = rte_rdtsc(); + break; + case RTE_KA_STATE_SLEEP: /* Idled core */ break; } + if (keepcfg->relay_callback) + keepcfg->relay_callback( + keepcfg->relay_callback_data, + idx_core, + keepcfg->state_flags[idx_core], + keepcfg->last_alive[idx_core] + ); } } @@ -133,11 +153,19 @@ rte_keepalive_create(rte_keepalive_failure_callback_t callback, return keepcfg; } +void rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg, + rte_keepalive_relay_callback_t callback, + void *data) +{ + keepcfg->relay_callback = callback; + keepcfg->relay_callback_data = data; +} + void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core) { if (id_core < RTE_KEEPALIVE_MAXCORES) { - keepcfg->active_cores[id_core] = 1; + keepcfg->active_cores[id_core] = RTE_KA_STATE_ALIVE; keepcfg->last_alive[id_core] = rte_rdtsc(); } } @@ -145,5 +173,11 @@ rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core) void rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) { - keepcfg->state_flags[rte_lcore_id()] = ALIVE; + keepcfg->state_flags[rte_lcore_id()] = RTE_KA_STATE_ALIVE; +} + +void +rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg) +{ + keepcfg->state_flags[rte_lcore_id()] = RTE_KA_STATE_DOZING; } diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index e1093619..30b30f33 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -44,9 +44,12 @@ VPATH += $(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(SRCDIR)/include CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include +ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) +# workaround for circular dependency eal -> ivshmem -> ring/mempool -> eal CFLAGS += -I$(RTE_SDK)/lib/librte_ring CFLAGS += -I$(RTE_SDK)/lib/librte_mempool CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem +endif CFLAGS += $(WERROR_FLAGS) -O3 LDLIBS += -ldl diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 8aafd519..543ef869 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -465,24 +465,6 @@ eal_parse_vfio_intr(const char *mode) return -1; } -static inline size_t -eal_get_hugepage_mem_size(void) -{ - uint64_t size = 0; - unsigned i, j; - - for (i = 0; i < internal_config.num_hugepage_sizes; i++) { - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - size += hpi->hugepage_sz * hpi->num_pages[j]; - } - } - } - - return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; -} - /* Parse the arguments for --log-level only */ static void eal_log_level_parse(int argc, char **argv) @@ -715,12 +697,8 @@ rte_eal_iopl_init(void) #if defined(RTE_ARCH_X86) if (iopl(3) != 0) return -1; - return 0; -#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) - return 0; /* iopl syscall not supported for ARM/ARM64 */ -#else - return -1; #endif + return 0; } /* Launch threads, called at application init(). */ @@ -766,8 +744,6 @@ rte_eal_init(int argc, char **argv) if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; - else - internal_config.memory = eal_get_hugepage_mem_size(); } if (internal_config.vmware_tsc_map == 1) { @@ -863,7 +839,7 @@ rte_eal_init(int argc, char **argv) ret = rte_thread_setname(lcore_config[i].thread_id, thread_name); if (ret != 0) - RTE_LOG(ERR, EAL, + RTE_LOG(DEBUG, EAL, "Cannot set name for lcore thread\n"); } diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c index 907fbfa7..5fbc17c5 100644 --- a/lib/librte_eal/linuxapp/eal/eal_debug.c +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -77,9 +77,6 @@ void __rte_panic(const char *funcname, const char *format, ...) { va_list ap; - /* disable history */ - rte_log_set_history(0); - rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); va_start(ap, format); rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); @@ -98,9 +95,6 @@ rte_exit(int exit_code, const char *format, ...) { va_list ap; - /* disable history */ - rte_log_set_history(0); - if (exit_code != 0) RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" " Cause: ", exit_code); diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 06b26a9e..47a3b20a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -57,10 +57,8 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_debug.h> #include <rte_log.h> -#include <rte_mempool.h> #include <rte_pci.h> #include <rte_malloc.h> #include <rte_errno.h> @@ -889,7 +887,7 @@ rte_eal_intr_init(void) "eal-intr-thread"); ret_1 = rte_thread_setname(intr_thread, thread_name); if (ret_1 != 0) - RTE_LOG(ERR, EAL, + RTE_LOG(DEBUG, EAL, "Failed to set thread name for interrupt handling\n"); } diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c index 07aec694..67b3caf2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c @@ -49,7 +49,6 @@ #include <rte_string_fns.h> #include <rte_errno.h> #include <rte_ring.h> -#include <rte_mempool.h> #include <rte_malloc.h> #include <rte_common.h> #include <rte_ivshmem.h> @@ -184,21 +183,21 @@ overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) i_end2 = mz2->ioremap_addr + mz2->len; /* check for overlap in virtual addresses */ - if (start1 > start2 && start1 < end2) + if (start1 >= start2 && start1 < end2) result |= VIRT; if (start2 >= start1 && start2 < end1) result |= VIRT; /* check for overlap in physical addresses */ - if (p_start1 > p_start2 && p_start1 < p_end2) + if (p_start1 >= p_start2 && p_start1 < p_end2) result |= PHYS; - if (p_start2 > p_start1 && p_start2 < p_end1) + if (p_start2 >= p_start1 && p_start2 < p_end1) result |= PHYS; /* check for overlap in ioremap addresses */ - if (i_start1 > i_start2 && i_start1 < i_end2) + if (i_start1 >= i_start2 && i_start1 < i_end2) result |= IOREMAP; - if (i_start2 > i_start1 && i_start2 < i_end1) + if (i_start2 >= i_start1 && i_start2 < i_end1) result |= IOREMAP; return result; diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index 0b133c3e..d3911004 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -50,8 +50,7 @@ #include "eal_private.h" /* - * default log function, used once mempool (hence log history) is - * available + * default log function */ static ssize_t console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) @@ -60,9 +59,6 @@ console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) ssize_t ret; uint32_t loglevel; - /* add this log in history */ - rte_log_add_in_history(buf, size); - /* write on stdout */ ret = fwrite(buf, 1, size, stdout); fflush(stdout); @@ -110,8 +106,7 @@ rte_eal_log_init(const char *id, int facility) /* early logs */ /* - * early log function, used during boot when mempool (hence log - * history) is not available + * early log function, used before rte_eal_log_init */ static ssize_t early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 5b9132c6..5578c254 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -80,6 +80,8 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/time.h> +#include <signal.h> +#include <setjmp.h> #include <rte_log.h> #include <rte_memory.h> @@ -309,6 +311,22 @@ get_virtual_area(size_t *size, size_t hugepage_sz) return addr; } +static sigjmp_buf huge_jmpenv; + +static void huge_sigbus_handler(int signo __rte_unused) +{ + siglongjmp(huge_jmpenv, 1); +} + +/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile, + * non-static local variable in the stack frame calling sigsetjmp might be + * clobbered by a call to longjmp. + */ +static int huge_wrap_sigsetjmp(void) +{ + return sigsetjmp(huge_jmpenv, 1); +} + /* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the @@ -316,7 +334,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz) * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to * map continguous physical blocks in contiguous virtual blocks. */ -static int +static unsigned map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, int orig) { @@ -394,9 +412,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, /* try to create hugepage file */ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, + RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return -1; + return i; } /* map the segment, and populate page tables, @@ -404,10 +422,10 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); if (virtaddr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, + RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); - return -1; + return i; } if (orig) { @@ -417,12 +435,33 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, hugepg_tbl[i].final_va = virtaddr; } + if (orig) { + /* In linux, hugetlb limitations, like cgroup, are + * enforced at fault time instead of mmap(), even + * with the option of MAP_POPULATE. Kernel will send + * a SIGBUS signal. To avoid to be killed, save stack + * environment here, if SIGBUS happens, we can jump + * back here. + */ + if (huge_wrap_sigsetjmp()) { + RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more " + "hugepages of size %u MB\n", + (unsigned)(hugepage_sz / 0x100000)); + munmap(virtaddr, hugepage_sz); + close(fd); + unlink(hugepg_tbl[i].filepath); + return i; + } + *(int *)virtaddr = 0; + } + + /* set shared flock on the file. */ if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); - return -1; + return i; } close(fd); @@ -430,7 +469,8 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, vma_addr = (char *)vma_addr + hugepage_sz; vma_len -= hugepage_sz; } - return 0; + + return i; } #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS @@ -1036,6 +1076,51 @@ calc_num_pages_per_socket(uint64_t * memory, return total_num_pages; } +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +static struct sigaction huge_action_old; +static int huge_need_recover; + +static void +huge_register_sigbus(void) +{ + sigset_t mask; + struct sigaction action; + + sigemptyset(&mask); + sigaddset(&mask, SIGBUS); + action.sa_flags = 0; + action.sa_mask = mask; + action.sa_handler = huge_sigbus_handler; + + huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old); +} + +static void +huge_recover_sigbus(void) +{ + if (huge_need_recover) { + sigaction(SIGBUS, &huge_action_old, NULL); + huge_need_recover = 0; + } +} + /* * Prepare physical memory mapping: fill configuration structure with * these infos, return 0 on success. @@ -1122,8 +1207,11 @@ rte_eal_hugepage_init(void) hp_offset = 0; /* where we start the current page size entries */ + huge_register_sigbus(); + /* map all hugepages and sort them */ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ + unsigned pages_old, pages_new; struct hugepage_info *hpi; /* @@ -1137,10 +1225,28 @@ rte_eal_hugepage_init(void) continue; /* map all hugepages available */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); + pages_old = hpi->num_pages[0]; + pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); + if (pages_new < pages_old) { +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + RTE_LOG(ERR, EAL, + "%d not %d hugepages of size %u MB allocated\n", + pages_new, pages_old, + (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; +#else + RTE_LOG(DEBUG, EAL, + "%d not %d hugepages of size %u MB allocated\n", + pages_new, pages_old, + (unsigned)(hpi->hugepage_sz / 0x100000)); + + int pages = pages_old - pages_new; + + nr_hugepages -= pages; + hpi->num_pages[0] = pages_new; + if (pages_new == 0) + continue; +#endif } /* find physical addresses and sockets for each hugepage */ @@ -1172,8 +1278,9 @@ rte_eal_hugepage_init(void) hp_offset += new_pages_count[i]; #else /* remap all hugepages */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != + hpi->num_pages[0]) { + RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; } @@ -1187,6 +1294,11 @@ rte_eal_hugepage_init(void) #endif } + huge_recover_sigbus(); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) + internal_config.memory = eal_get_hugepage_mem_size(); + #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS nr_hugefiles = 0; for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1373,6 +1485,7 @@ rte_eal_hugepage_init(void) return 0; fail: + huge_recover_sigbus(); free(tmp_hp); return -1; } @@ -1399,7 +1512,7 @@ int rte_eal_hugepage_attach(void) { const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - const struct hugepage_file *hp = NULL; + struct hugepage_file *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ off_t size; @@ -1417,7 +1530,7 @@ rte_eal_hugepage_attach(void) if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " + RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary " "process\n"); return -1; } @@ -1481,7 +1594,7 @@ rte_eal_hugepage_attach(void) size = getFileSize(fd_hugepage); hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); - if (hp == NULL) { + if (hp == MAP_FAILED) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); goto error; } @@ -1545,12 +1658,19 @@ rte_eal_hugepage_attach(void) s++; } /* unmap the hugepage config file, since we are done using it */ - munmap((void *)(uintptr_t)hp, size); + munmap(hp, size); close(fd_zero); close(fd_hugepage); return 0; error: + s = 0; + while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) { + munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len); + s++; + } + if (hp != NULL && hp != MAP_FAILED) + munmap(hp, size); if (fd_zero >= 0) close(fd_zero); if (fd_hugepage >= 0) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index dbf12a84..f9c3efd2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -66,8 +66,8 @@ pci_unbind_kernel_driver(struct rte_pci_device *dev) /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ snprintf(filename, sizeof(filename), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", - loc->domain, loc->bus, loc->devid, loc->function); + "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); f = fopen(filename, "w"); if (f == NULL) /* device was not bound */ @@ -190,12 +190,13 @@ pci_find_max_end_va(void) return RTE_PTR_ADD(last->addr, last->len); } -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int +pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags) { - FILE *f; - char buf[BUFSIZ]; union pci_resource_info { struct { char *phys_addr; @@ -204,6 +205,31 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) }; char *ptrs[PCI_RESOURCE_FMT_NVAL]; } res_info; + + if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + errno = 0; + *phys_addr = strtoull(res_info.phys_addr, NULL, 16); + *end_addr = strtoull(res_info.end_addr, NULL, 16); + *flags = strtoull(res_info.flags, NULL, 16); + if (errno != 0) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + + return 0; +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; int i; uint64_t phys_addr, end_addr, flags; @@ -220,21 +246,9 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) "%s(): cannot read resource\n", __func__); goto error; } - - if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) goto error; - } - errno = 0; - phys_addr = strtoull(res_info.phys_addr, NULL, 16); - end_addr = strtoull(res_info.end_addr, NULL, 16); - flags = strtoull(res_info.flags, NULL, 16); - if (errno != 0) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - goto error; - } if (flags & IORESOURCE_MEM) { dev->mem_resource[i].phys_addr = phys_addr; @@ -306,6 +320,16 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } dev->id.subsystem_device_id = (uint16_t)tmp; + /* get class_id */ + snprintf(filename, sizeof(filename), "%s/class", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + /* the least 24 bits are valid: class, subclass, program interface */ + dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; + /* get max_vfs */ dev->max_vfs = 0; snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); @@ -453,7 +477,7 @@ rte_eal_pci_scan(void) uint16_t domain; uint8_t bus, devid, function; - dir = opendir(SYSFS_PCI_DEVICES); + dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", __func__, strerror(errno)); @@ -468,8 +492,8 @@ rte_eal_pci_scan(void) &bus, &devid, &function) != 0) continue; - snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, - e->d_name); + snprintf(dirname, sizeof(dirname), "%s/%s", + pci_get_sysfs_path(), e->d_name); if (pci_scan_one(dirname, domain, bus, devid, function) < 0) goto error; } @@ -481,18 +505,6 @@ error: return -1; } -#ifdef RTE_PCI_CONFIG -/* - * It is deprecated, all its configurations have been moved into - * each PMD respectively. - */ -void -pci_config_space_set(__rte_unused struct rte_pci_device *dev) -{ - RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n"); -} -#endif - /* Read PCI config space. */ int rte_eal_pci_read_config(const struct rte_pci_device *device, void *buf, size_t len, off_t offset) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 7011753d..f72a2548 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -36,12 +36,22 @@ #include "eal_vfio.h" +/** IO resource type: */ +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + /* * Helper function to map PCI resources right after hugepages in virtual memory */ extern void *pci_map_addr; void *pci_find_max_end_va(void); +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags); + int pci_uio_alloc_resource(struct rte_pci_device *dev, struct mapped_pci_resource **uio_res); void pci_uio_free_resource(struct rte_pci_device *dev, diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 068694dc..1786b754 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -35,6 +35,7 @@ #include <unistd.h> #include <fcntl.h> #include <dirent.h> +#include <inttypes.h> #include <sys/stat.h> #include <sys/mman.h> #include <linux/pci_regs.h> @@ -161,14 +162,14 @@ pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, * or uio:uioX */ snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", + "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function); dir = opendir(dirname); if (dir == NULL) { /* retry with the parent directory */ snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, + "%s/" PCI_PRI_FMT, pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function); dir = opendir(dirname); @@ -309,7 +310,7 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, struct mapped_pci_resource *uio_res, int map_idx) { int fd; - char devname[PATH_MAX]; /* contains the /dev/uioX */ + char devname[PATH_MAX]; void *mapaddr; struct rte_pci_addr *loc; struct pci_map *maps; @@ -319,7 +320,8 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, /* update devname for mmap */ snprintf(devname, sizeof(devname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", + "%s/" PCI_PRI_FMT "/resource%d", + pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function, res_idx); @@ -368,11 +370,11 @@ error: return -1; } +#if defined(RTE_ARCH_X86) int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { -#if defined(RTE_ARCH_X86) char dirname[PATH_MAX]; char filename[PATH_MAX]; int uio_num; @@ -411,81 +413,154 @@ pci_uio_ioport_map(struct rte_pci_device *dev, int bar, RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); p->base = start; + p->len = 0; return 0; +} #else - RTE_SET_USED(dev); - RTE_SET_USED(bar); - RTE_SET_USED(p); +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + FILE *f; + char buf[BUFSIZ]; + char filename[PATH_MAX]; + uint64_t phys_addr, end_addr, flags; + int fd, i; + void *addr; + + /* open and read addresses of the corresponding resource in sysfs */ + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", + pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", + strerror(errno)); + return -1; + } + for (i = 0; i < bar + 1; i++) { + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); + goto error; + } + } + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) + goto error; + if ((flags & IORESOURCE_IO) == 0) { + RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); + goto error; + } + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", + pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function, bar); + + /* mmap the pci resource */ + fd = open(filename, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + goto error; + } + addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", + strerror(errno)); + goto error; + } + + /* strangely, the base address is mmap addr + phys_addr */ + p->base = (uintptr_t)addr + phys_addr; + p->len = end_addr + 1; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); + fclose(f); + + return 0; + +error: + fclose(f); return -1; -#endif } +#endif void pci_uio_ioport_read(struct rte_pci_ioport *p, void *data, size_t len, off_t offset) { -#if defined(RTE_ARCH_X86) uint8_t *d; int size; - unsigned short reg = p->base + offset; + uintptr_t reg = p->base + offset; for (d = data; len > 0; d += size, reg += size, len -= size) { if (len >= 4) { size = 4; +#if defined(RTE_ARCH_X86) *(uint32_t *)d = inl(reg); +#else + *(uint32_t *)d = *(volatile uint32_t *)reg; +#endif } else if (len >= 2) { size = 2; +#if defined(RTE_ARCH_X86) *(uint16_t *)d = inw(reg); +#else + *(uint16_t *)d = *(volatile uint16_t *)reg; +#endif } else { size = 1; +#if defined(RTE_ARCH_X86) *d = inb(reg); - } - } #else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); + *d = *(volatile uint8_t *)reg; #endif + } + } } void pci_uio_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset) { -#if defined(RTE_ARCH_X86) const uint8_t *s; int size; - unsigned short reg = p->base + offset; + uintptr_t reg = p->base + offset; for (s = data; len > 0; s += size, reg += size, len -= size) { if (len >= 4) { size = 4; +#if defined(RTE_ARCH_X86) outl_p(*(const uint32_t *)s, reg); +#else + *(volatile uint32_t *)reg = *(const uint32_t *)s; +#endif } else if (len >= 2) { size = 2; +#if defined(RTE_ARCH_X86) outw_p(*(const uint16_t *)s, reg); +#else + *(volatile uint16_t *)reg = *(const uint16_t *)s; +#endif } else { size = 1; +#if defined(RTE_ARCH_X86) outb_p(*s, reg); - } - } #else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); + *(volatile uint8_t *)reg = *s; #endif + } + } } int pci_uio_ioport_unmap(struct rte_pci_ioport *p) { - RTE_SET_USED(p); #if defined(RTE_ARCH_X86) + RTE_SET_USED(p); /* FIXME close intr fd ? */ return 0; #else - return -1; + return munmap((void *)(uintptr_t)p->base, p->len); #endif } diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 10266f8f..f91b9242 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -602,7 +602,7 @@ pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no) /* try to find out IOMMU group for this device */ snprintf(linkname, sizeof(linkname), - SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr); + "%s/%s/iommu_group", pci_get_sysfs_path(), pci_addr); ret = readlink(linkname, filename, sizeof(filename)); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c index d9188fde..d54ded88 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c @@ -287,7 +287,10 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg) struct linger l; l.l_onoff = 1; l.l_linger = 60; - setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)); + + if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0) + RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option " + "on listen socket (%s)\n", strerror(errno)); ret = vfio_mp_sync_receive_request(conn_sock); @@ -396,7 +399,7 @@ pci_vfio_mp_sync_setup(void) snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync"); ret = rte_thread_setname(socket_thread, thread_name); if (ret) - RTE_LOG(ERR, EAL, + RTE_LOG(DEBUG, EAL, "Failed to set thread name for secondary processes!\n"); return 0; diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 18bd8e04..9f88530e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -197,3 +197,16 @@ int rte_sys_gettid(void) { return (int)syscall(SYS_gettid); } + +int rte_thread_setname(pthread_t id, const char *name) +{ + int ret = -1; +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 12) + ret = pthread_setname_np(id, name); +#endif +#endif + RTE_SET_USED(id); + RTE_SET_USED(name); + return ret; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c index f2abb7b6..afa32f5c 100644 --- a/lib/librte_eal/linuxapp/eal/eal_timer.c +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -222,8 +222,8 @@ rte_eal_hpet_init(int make_default) snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); ret = rte_thread_setname(msb_inc_thread_id, thread_name); if (ret != 0) - RTE_LOG(ERR, EAL, - "ERROR: Cannot set HPET timer thread name!\n"); + RTE_LOG(DEBUG, EAL, + "Cannot set HPET timer thread name!\n"); if (make_default) eal_timer_source = EAL_TIMER_HPET; diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c index 495eef9e..0b612bb1 100644 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -156,13 +156,27 @@ get_xen_memory_size(void) * Based on physical address to caculate MFN in Xen Dom0. */ phys_addr_t -rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) { - int mfn_id; + int mfn_id, i; uint64_t mfn, mfn_offset; struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct rte_memseg *memseg = mcfg->memseg; + /* find the memory segment owning the physical address */ + if (memseg_id == -1) { + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if ((phy_addr >= memseg[i].phys_addr) && + (phys_addr < memseg[i].phys_addr + + memseg[i].size)) { + memseg_id = i; + break; + } + } + if (memseg_id == -1) + return RTE_BAD_PHYS_ADDR; + } + mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; /*the MFN is contiguous in 2M */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 7e5e5984..2acdfd9b 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -113,7 +113,9 @@ struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); char pad0[10]; uint16_t data_off; /**< Start address of data in segment buffer. */ - char pad1[4]; + char pad1[2]; + uint8_t nb_segs; /**< Number of segments. */ + char pad4[1]; uint64_t ol_flags; /**< Offload features. */ char pad2[4]; uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 12503efa..05134673 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -154,3 +154,13 @@ DPDK_16.04 { rte_eal_primary_proc_alive; } DPDK_2.2; + +DPDK_16.07 { + global: + + pci_get_sysfs_path; + rte_keepalive_mark_sleep; + rte_keepalive_register_relay_callback; + rte_thread_setname; + +} DPDK_16.04; diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index c1d45a66..0d781e48 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -24,6 +24,15 @@ #define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 #endif +/* + * for kernels < 2.6.38 and backported patch that moves MSI-X entry definition + * to pci_regs.h Those kernels has PCI_MSIX_ENTRY_SIZE defined but not + * PCI_MSIX_ENTRY_CTRL_MASKBIT + */ +#ifndef PCI_MSIX_ENTRY_CTRL_MASKBIT +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ (!(defined(RHEL_RELEASE_CODE) && \ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index 72b26923..45a5720e 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -81,62 +81,10 @@ store_max_vfs(struct device *dev, struct device_attribute *attr, return err ? err : count; } -#ifdef RTE_PCI_CONFIG -static ssize_t -show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} - -static ssize_t -store_extended_tag(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} - -static ssize_t -show_max_read_request_size(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} - -static ssize_t -store_max_read_request_size(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - dev_info(dev, "Deprecated\n"); - - return 0; -} -#endif - static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs); -#ifdef RTE_PCI_CONFIG -static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag, - store_extended_tag); -static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR, - show_max_read_request_size, store_max_read_request_size); -#endif static struct attribute *dev_attrs[] = { &dev_attr_max_vfs.attr, -#ifdef RTE_PCI_CONFIG - &dev_attr_extended_tag.attr, - &dev_attr_max_read_request_size.attr, -#endif NULL, }; diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile index ac99d3f1..8cc6b61c 100644 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -47,7 +47,7 @@ MODULE_CFLAGS += -Wall -Werror ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ - | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1) + | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1) MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" endif diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index cf100b67..647ba3ce 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -14,16 +14,27 @@ #endif /* < 2.6.39 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) +#define HAVE_SIMPLIFIED_PERNET_OPERATIONS +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) #define sk_sleep(s) (s)->sk_sleep +#endif -#endif /* < 2.6.35 */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) +#define HAVE_CHANGE_CARRIER_CB +#endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) #define HAVE_IOV_ITER_MSGHDR #endif -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) ) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) #define HAVE_KIOCB_MSG_PARAM -#endif /* < 4.1.0 */ +#define HAVE_REBUILD_HEADER +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) +#define HAVE_TRANS_START_HELPER +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c index df224702..140a2a47 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -3300,12 +3300,13 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data) *data = E1000_READ_REG(hw, E1000_MPHY_DATA); /* Disable access to mPHY if it was originally disabled */ - if (locked) + if (locked) { ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS); + } return E1000_SUCCESS; } @@ -3365,12 +3366,13 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, E1000_WRITE_REG(hw, E1000_MPHY_DATA, data); /* Disable access to mPHY if it was originally disabled */ - if (locked) + if (locked) { ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS); + } return E1000_SUCCESS; } diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c index 017dfe16..c6f4130d 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -867,12 +867,13 @@ s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { /* Set KX4/KX/KR support according to speed requested */ autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) autoc |= IXGBE_AUTOC_KX4_SUPP; if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) && (hw->phy.smart_speed_active == false)) autoc |= IXGBE_AUTOC_KR_SUPP; + } if (speed & IXGBE_LINK_SPEED_1GB_FULL) autoc |= IXGBE_AUTOC_KX_SUPP; } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) && diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c index 8c1d2fe3..92fc9fc7 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -59,8 +59,6 @@ #undef CONFIG_DCA_MODULE char ixgbe_driver_name[] = "ixgbe"; -static const char ixgbe_driver_string[] = - "Intel(R) 10 Gigabit PCI Express Network Driver"; #define DRV_HW_PERF #ifndef CONFIG_IXGBE_NAPI @@ -79,8 +77,6 @@ static const char ixgbe_driver_string[] = #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG const char ixgbe_driver_version[] = DRV_VERSION; -static const char ixgbe_copyright[] = - "Copyright (c) 1999-2012 Intel Corporation."; /* ixgbe_pci_tbl - PCI Device ID Table * diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c index ae8133f3..59d15ca6 100644 --- a/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -26,6 +26,7 @@ #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/pci.h> #include <linux/kthread.h> #include <linux/rwsem.h> @@ -34,6 +35,8 @@ #include <net/netns/generic.h> #include <exec-env/rte_kni_common.h> + +#include "compat.h" #include "kni_dev.h" MODULE_LICENSE("Dual BSD/GPL"); @@ -104,7 +107,7 @@ struct kni_net { static int __net_init kni_init_net(struct net *net) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); #else struct kni_net *knet; @@ -115,7 +118,7 @@ static int __net_init kni_init_net(struct net *net) ret = -ENOMEM; return ret; } -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif /* Clear the bit of device in use */ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); @@ -123,7 +126,7 @@ static int __net_init kni_init_net(struct net *net) init_rwsem(&knet->kni_list_lock); INIT_LIST_HEAD(&knet->kni_list_head); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS return 0; #else ret = net_assign_generic(net, kni_net_id, knet); @@ -131,25 +134,25 @@ static int __net_init kni_init_net(struct net *net) kfree(knet); return ret; -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif } static void __net_exit kni_exit_net(struct net *net) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32) +#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); kfree(knet); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif } static struct pernet_operations kni_net_ops = { .init = kni_init_net, .exit = kni_exit_net, -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS .id = &kni_net_id, .size = sizeof(struct kni_net), -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif }; static int __init @@ -164,11 +167,11 @@ kni_init(void) return -EINVAL; } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS rc = register_pernet_subsys(&kni_net_ops); #else rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif if (rc) return -EPERM; @@ -186,11 +189,11 @@ kni_init(void) return 0; out: -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS unregister_pernet_subsys(&kni_net_ops); #else register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif return rc; } @@ -198,11 +201,11 @@ static void __exit kni_exit(void) { misc_deregister(&kni_misc); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS unregister_pernet_subsys(&kni_net_ops); #else register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +#endif KNI_PRINT("####### DPDK kni module unloaded #######\n"); } @@ -542,6 +545,15 @@ kni_ioctl_create(struct net *net, if (pci) pci_dev_put(pci); + if (kni->lad_dev) + memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + else + /* + * Generate random mac address. eth_random_addr() is the newer + * version of generating mac address in linux kernel. + */ + random_ether_addr(net_dev->dev_addr); + ret = register_netdev(net_dev); if (ret) { KNI_ERR("error %i registering device \"%s\"\n", diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c index cfa83398..fc82193a 100644 --- a/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -38,6 +38,8 @@ #include <exec-env/rte_kni_common.h> #include <kni_fifo.h> + +#include "compat.h" #include "kni_dev.h" #define WD_TIMEOUT 5 /*jiffies */ @@ -69,15 +71,6 @@ kni_net_open(struct net_device *dev) struct rte_kni_request req; struct kni_dev *kni = netdev_priv(dev); - if (kni->lad_dev) - memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); - else - /* - * Generate random mac address. eth_random_addr() is the newer - * version of generating mac address in linux kernel. - */ - random_ether_addr(dev->dev_addr); - netif_start_queue(dev); memset(&req, 0, sizeof(req)); @@ -156,7 +149,8 @@ kni_net_rx_normal(struct kni_dev *kni) /* Transfer received packets to netif */ for (i = 0; i < num_rx; i++) { kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; - len = kva->data_len; + len = kva->pkt_len; + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; @@ -165,22 +159,41 @@ kni_net_rx_normal(struct kni_dev *kni) KNI_ERR("Out of mem, dropping pkts\n"); /* Update statistics */ kni->stats.rx_dropped++; + continue; } - else { - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); + + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + + if (kva->nb_segs == 1) { memcpy(skb_put(skb, len), data_kva, len); - skb->dev = dev; - skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + int nb_segs; + int kva_nb_segs = kva->nb_segs; - /* Call netif interface */ - netif_rx_ni(skb); + for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) { + memcpy(skb_put(skb, kva->data_len), + data_kva, kva->data_len); - /* Update statistics */ - kni->stats.rx_bytes += len; - kni->stats.rx_packets++; + if (!kva->next) + break; + + kva = kva->next - kni->mbuf_va + kni->mbuf_kva; + data_kva = kva->buf_addr + kva->data_off + - kni->mbuf_va + kni->mbuf_kva; + } } + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* Call netif interface */ + netif_rx_ni(skb); + + /* Update statistics */ + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; } /* Burst enqueue mbufs into free_q */ @@ -317,7 +330,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Copy mbufs to sk buffer and then call tx interface */ for (i = 0; i < num; i++) { kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; - len = kva->data_len; + len = kva->pkt_len; data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; @@ -338,20 +351,39 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) if (skb == NULL) { KNI_ERR("Out of mem, dropping pkts\n"); kni->stats.rx_dropped++; + continue; } - else { - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); + + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + + if (kva->nb_segs == 1) { memcpy(skb_put(skb, len), data_kva, len); - skb->dev = dev; - skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + int nb_segs; + int kva_nb_segs = kva->nb_segs; - kni->stats.rx_bytes += len; - kni->stats.rx_packets++; + for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) { + memcpy(skb_put(skb, kva->data_len), + data_kva, kva->data_len); - /* call tx interface */ - kni_net_tx(skb, dev); + if (!kva->next) + break; + + kva = kva->next - kni->mbuf_va + kni->mbuf_kva; + data_kva = kva->buf_addr + kva->data_off + - kni->mbuf_va + kni->mbuf_kva; + } } + + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; + + /* call tx interface */ + kni_net_tx(skb, dev); } /* enqueue all the mbufs from rx_q into free_q */ @@ -396,7 +428,12 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev) struct rte_kni_mbuf *pkt_kva = NULL; struct rte_kni_mbuf *pkt_va = NULL; - dev->trans_start = jiffies; /* save the timestamp */ + /* save the timestamp */ +#ifdef HAVE_TRANS_START_HELPER + netif_trans_update(dev); +#else + dev->trans_start = jiffies; +#endif /* Check if the length of skb is less than mbuf size */ if (skb->len > kni->mbuf_size) @@ -604,7 +641,7 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev, /* * Re-fill the eth header */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) +#ifdef HAVE_REBUILD_HEADER static int kni_net_rebuild_header(struct sk_buff *skb) { @@ -634,7 +671,7 @@ static int kni_net_set_mac(struct net_device *netdev, void *p) return 0; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#ifdef HAVE_CHANGE_CARRIER_CB static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) @@ -647,7 +684,7 @@ static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) static const struct header_ops kni_net_header_ops = { .create = kni_net_header, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) +#ifdef HAVE_REBUILD_HEADER .rebuild = kni_net_rebuild_header, #endif /* < 4.1.0 */ .cache = NULL, /* disable caching */ @@ -664,7 +701,7 @@ static const struct net_device_ops kni_net_netdev_ops = { .ndo_get_stats = kni_net_stats, .ndo_tx_timeout = kni_net_tx_timeout, .ndo_set_mac_address = kni_net_set_mac, -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#ifdef HAVE_CHANGE_CARRIER_CB .ndo_change_carrier = kni_net_change_carrier, #endif }; |