diff options
Diffstat (limited to 'lib')
264 files changed, 16931 insertions, 11705 deletions
diff --git a/lib/Makefile b/lib/Makefile index 86caba17..dc4e8df7 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -33,6 +33,8 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-y += librte_compat DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal +DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci +DEPDIRS-librte_pci := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring DEPDIRS-librte_ring := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += librte_mempool @@ -42,7 +44,6 @@ DEPDIRS-librte_mbuf := librte_eal librte_mempool DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer DEPDIRS-librte_timer := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile -DEPDIRS-librte_cfgfile := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline DEPDIRS-librte_cmdline := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether @@ -51,8 +52,12 @@ DEPDIRS-librte_ether += librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf DEPDIRS-librte_cryptodev += librte_kvargs +DIRS-$(CONFIG_RTE_LIBRTE_SECURITY) += librte_security +DEPDIRS-librte_security := librte_eal librte_mempool librte_ring librte_mbuf +DEPDIRS-librte_security += librte_ether +DEPDIRS-librte_security += librte_cryptodev DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev -DEPDIRS-librte_eventdev := librte_eal librte_ring +DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash @@ -63,6 +68,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DEPDIRS-librte_lpm := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DEPDIRS-librte_acl := librte_eal +DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member +DEPDIRS-librte_member := librte_eal librte_hash DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net DEPDIRS-librte_net := librte_mbuf librte_eal DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag @@ -82,6 +89,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power DEPDIRS-librte_power := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter DEPDIRS-librte_meter := librte_eal +DIRS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += librte_flow_classify +DEPDIRS-librte_flow_classify := librte_net librte_table librte_acl DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net DEPDIRS-librte_sched += librte_timer @@ -108,10 +117,13 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether +DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso +DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net +DEPDIRS-librte_gso += librte_mempool ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni endif -DEPDIRS-librte_kni:= librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_kni := librte_eal librte_mempool librte_mbuf librte_ether include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index 59767920..e7e3c91d 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -36,6 +36,7 @@ LIB = librte_acl.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal EXPORT_MAP := rte_acl_version.map diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c index d1f40bef..67f41f3d 100644 --- a/lib/librte_acl/rte_acl.c +++ b/lib/librte_acl/rte_acl.c @@ -120,8 +120,7 @@ rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, enum rte_acl_classify_alg alg) * if both conditions are met: * at build time compiler supports AVX2 and target cpu supports AVX2. */ -static void __attribute__((constructor)) -rte_acl_init(void) +RTE_INIT(rte_acl_init) { enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT; diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h index 9e4af530..ac712bfa 100644 --- a/lib/librte_acl/rte_acl_osdep.h +++ b/lib/librte_acl/rte_acl_osdep.h @@ -66,7 +66,6 @@ #include <rte_prefetch.h> #include <rte_byteorder.h> #include <rte_branch_prediction.h> -#include <rte_memzone.h> #include <rte_malloc.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> diff --git a/lib/librte_bitratestats/Makefile b/lib/librte_bitratestats/Makefile index 58a20ea0..5054b679 100644 --- a/lib/librte_bitratestats/Makefile +++ b/lib/librte_bitratestats/Makefile @@ -35,10 +35,11 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_bitratestats.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal -lrte_metrics -lrte_ethdev EXPORT_MAP := rte_bitratestats_version.map -LIBABIVER := 1 +LIBABIVER := 2 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_BITRATE) := rte_bitrate.c diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c index 3ceb3516..f373697a 100644 --- a/lib/librte_bitratestats/rte_bitrate.c +++ b/lib/librte_bitratestats/rte_bitrate.c @@ -84,7 +84,7 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data) int rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, - uint8_t port_id) + uint16_t port_id) { struct rte_stats_bitrate *port_data; struct rte_eth_stats eth_stats; diff --git a/lib/librte_bitratestats/rte_bitrate.h b/lib/librte_bitratestats/rte_bitrate.h index 15fc270a..16467221 100644 --- a/lib/librte_bitratestats/rte_bitrate.h +++ b/lib/librte_bitratestats/rte_bitrate.h @@ -85,7 +85,7 @@ int rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data); * - Negative value on error */ int rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, - uint8_t port_id); + uint16_t port_id); #ifdef __cplusplus } diff --git a/lib/librte_cfgfile/Makefile b/lib/librte_cfgfile/Makefile index 755ef11f..0bee43e2 100644 --- a/lib/librte_cfgfile/Makefile +++ b/lib/librte_cfgfile/Makefile @@ -38,6 +38,7 @@ LIB = librte_cfgfile.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(SRCDIR)/../librte_eal/common/include EXPORT_MAP := rte_cfgfile_version.map diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c index b54a523d..eacf93a8 100644 --- a/lib/librte_cfgfile/rte_cfgfile.c +++ b/lib/librte_cfgfile/rte_cfgfile.c @@ -35,21 +35,23 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <errno.h> #include <rte_common.h> -#include <rte_string_fns.h> #include "rte_cfgfile.h" struct rte_cfgfile_section { char name[CFG_NAME_LEN]; int num_entries; - struct rte_cfgfile_entry *entries[0]; + int allocated_entries; + struct rte_cfgfile_entry *entries; }; struct rte_cfgfile { int flags; int num_sections; - struct rte_cfgfile_section *sections[0]; + int allocated_sections; + struct rte_cfgfile_section *sections; }; /** when we resize a file structure, how many extra entries @@ -105,6 +107,49 @@ _strip(char *str, unsigned len) return newlen; } +static struct rte_cfgfile_section * +_get_section(struct rte_cfgfile *cfg, const char *sectionname) +{ + int i; + + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i].name, sectionname, + sizeof(cfg->sections[0].name)) == 0) + return &cfg->sections[i]; + } + return NULL; +} + +static int +_add_entry(struct rte_cfgfile_section *section, const char *entryname, + const char *entryvalue) +{ + /* resize entry structure if we don't have room for more entries */ + if (section->num_entries == section->allocated_entries) { + struct rte_cfgfile_entry *n_entries = realloc( + section->entries, + sizeof(struct rte_cfgfile_entry) * + ((section->allocated_entries) + + CFG_ALLOC_ENTRY_BATCH)); + + if (n_entries == NULL) + return -ENOMEM; + + section->entries = n_entries; + section->allocated_entries += CFG_ALLOC_ENTRY_BATCH; + } + /* fill up entry fields with key name and value */ + struct rte_cfgfile_entry *curr_entry = + §ion->entries[section->num_entries]; + + snprintf(curr_entry->name, sizeof(curr_entry->name), "%s", entryname); + snprintf(curr_entry->value, + sizeof(curr_entry->value), "%s", entryvalue); + section->num_entries++; + + return 0; +} + static int rte_cfgfile_check_params(const struct rte_cfgfile_parameters *params) { @@ -144,10 +189,6 @@ struct rte_cfgfile * rte_cfgfile_load_with_params(const char *filename, int flags, const struct rte_cfgfile_parameters *params) { - int allocated_sections = CFG_ALLOC_SECTION_BATCH; - int allocated_entries = 0; - int curr_section = -1; - int curr_entry = -1; char buffer[CFG_NAME_LEN + CFG_VALUE_LEN + 4] = {0}; int lineno = 0; struct rte_cfgfile *cfg = NULL; @@ -159,28 +200,7 @@ rte_cfgfile_load_with_params(const char *filename, int flags, if (f == NULL) return NULL; - cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) * - allocated_sections); - if (cfg == NULL) - goto error2; - - memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections); - - if (flags & CFG_FLAG_GLOBAL_SECTION) { - curr_section = 0; - allocated_entries = CFG_ALLOC_ENTRY_BATCH; - cfg->sections[curr_section] = malloc( - sizeof(*cfg->sections[0]) + - sizeof(cfg->sections[0]->entries[0]) * - allocated_entries); - if (cfg->sections[curr_section] == NULL) { - printf("Error - no memory for global section\n"); - goto error1; - } - - snprintf(cfg->sections[curr_section]->name, - sizeof(cfg->sections[0]->name), "GLOBAL"); - } + cfg = rte_cfgfile_create(flags); while (fgets(buffer, sizeof(buffer), f) != NULL) { char *pos = NULL; @@ -191,13 +211,15 @@ rte_cfgfile_load_with_params(const char *filename, int flags, "Check if line too long\n", lineno); goto error1; } + /* skip parsing if comment character found */ pos = memchr(buffer, params->comment_character, len); - if (pos != NULL) { + if (pos != NULL && (*(pos-1) != '\\')) { *pos = '\0'; len = pos - buffer; } len = _strip(buffer, len); + /* skip lines without useful content */ if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL) continue; @@ -205,151 +227,252 @@ rte_cfgfile_load_with_params(const char *filename, int flags, /* section heading line */ char *end = memchr(buffer, ']', len); if (end == NULL) { - printf("Error line %d - no terminating '['" + printf("Error line %d - no terminating ']'" "character found\n", lineno); goto error1; } *end = '\0'; _strip(&buffer[1], end - &buffer[1]); - /* close off old section and add start new one */ - if (curr_section >= 0) - cfg->sections[curr_section]->num_entries = - curr_entry + 1; - curr_section++; - - /* resize overall struct if we don't have room for more - sections */ - if (curr_section == allocated_sections) { - allocated_sections += CFG_ALLOC_SECTION_BATCH; - struct rte_cfgfile *n_cfg = realloc(cfg, - sizeof(*cfg) + sizeof(cfg->sections[0]) - * allocated_sections); - if (n_cfg == NULL) { - curr_section--; - printf("Error - no more memory\n"); - goto error1; - } - cfg = n_cfg; - } - - /* allocate space for new section */ - allocated_entries = CFG_ALLOC_ENTRY_BATCH; - curr_entry = -1; - cfg->sections[curr_section] = malloc( - sizeof(*cfg->sections[0]) + - sizeof(cfg->sections[0]->entries[0]) * - allocated_entries); - if (cfg->sections[curr_section] == NULL) { - printf("Error - no more memory\n"); - goto error1; - } - - snprintf(cfg->sections[curr_section]->name, - sizeof(cfg->sections[0]->name), - "%s", &buffer[1]); + rte_cfgfile_add_section(cfg, &buffer[1]); } else { - /* value line */ - if (curr_section < 0) { - printf("Error line %d - value outside of" - "section\n", lineno); + /* key and value line */ + char *split[2] = {NULL}; + + split[0] = buffer; + split[1] = memchr(buffer, '=', len); + if (split[1] == NULL) { + printf("Error line %d - no '='" + "character found\n", lineno); goto error1; } - - struct rte_cfgfile_section *sect = - cfg->sections[curr_section]; - int n; - char *split[2] = {NULL}; - n = rte_strsplit(buffer, sizeof(buffer), split, 2, '='); - if (flags & CFG_FLAG_EMPTY_VALUES) { - if ((n < 1) || (n > 2)) { - printf("Error at line %d - cannot split string, n=%d\n", - lineno, n); - goto error1; - } - } else { - if (n != 2) { - printf("Error at line %d - cannot split string, n=%d\n", - lineno, n); - goto error1; - } + *split[1] = '\0'; + split[1]++; + + _strip(split[0], strlen(split[0])); + _strip(split[1], strlen(split[1])); + char *end = memchr(split[1], '\\', strlen(split[1])); + + while (end != NULL) { + if (*(end+1) == params->comment_character) { + *end = '\0'; + strcat(split[1], end+1); + } else + end++; + end = memchr(end, '\\', strlen(end)); } - curr_entry++; - if (curr_entry == allocated_entries) { - allocated_entries += CFG_ALLOC_ENTRY_BATCH; - struct rte_cfgfile_section *n_sect = realloc( - sect, sizeof(*sect) + - sizeof(sect->entries[0]) * - allocated_entries); - if (n_sect == NULL) { - curr_entry--; - printf("Error - no more memory\n"); - goto error1; - } - sect = cfg->sections[curr_section] = n_sect; + if (!(flags & CFG_FLAG_EMPTY_VALUES) && + (*split[1] == '\0')) { + printf("Error at line %d - cannot use empty " + "values\n", lineno); + goto error1; } - sect->entries[curr_entry] = malloc( - sizeof(*sect->entries[0])); - if (sect->entries[curr_entry] == NULL) { - printf("Error - no more memory\n"); + if (cfg->num_sections == 0) goto error1; - } - struct rte_cfgfile_entry *entry = sect->entries[ - curr_entry]; - snprintf(entry->name, sizeof(entry->name), "%s", - split[0]); - snprintf(entry->value, sizeof(entry->value), "%s", - split[1] ? split[1] : ""); - _strip(entry->name, strnlen(entry->name, - sizeof(entry->name))); - _strip(entry->value, strnlen(entry->value, - sizeof(entry->value))); + _add_entry(&cfg->sections[cfg->num_sections - 1], + split[0], split[1]); } } fclose(f); - cfg->flags = flags; - cfg->num_sections = curr_section + 1; - /* curr_section will still be -1 if we have an empty file */ - if (curr_section >= 0) - cfg->sections[curr_section]->num_entries = curr_entry + 1; return cfg; - error1: - cfg->num_sections = curr_section + 1; - if (curr_section >= 0) - cfg->sections[curr_section]->num_entries = curr_entry + 1; rte_cfgfile_close(cfg); -error2: fclose(f); return NULL; } +struct rte_cfgfile * +rte_cfgfile_create(int flags) +{ + int i; + struct rte_cfgfile *cfg = NULL; -int rte_cfgfile_close(struct rte_cfgfile *cfg) + cfg = malloc(sizeof(*cfg)); + + if (cfg == NULL) + return NULL; + + cfg->flags = flags; + cfg->num_sections = 0; + + /* allocate first batch of sections and entries */ + cfg->sections = malloc(sizeof(struct rte_cfgfile_section) * + CFG_ALLOC_SECTION_BATCH); + + if (cfg->sections == NULL) + goto error1; + + cfg->allocated_sections = CFG_ALLOC_SECTION_BATCH; + + for (i = 0; i < CFG_ALLOC_SECTION_BATCH; i++) { + cfg->sections[i].entries = malloc(sizeof( + struct rte_cfgfile_entry) * CFG_ALLOC_ENTRY_BATCH); + + if (cfg->sections[i].entries == NULL) + goto error1; + + cfg->sections[i].num_entries = 0; + cfg->sections[i].allocated_entries = CFG_ALLOC_ENTRY_BATCH; + } + + if (flags & CFG_FLAG_GLOBAL_SECTION) + rte_cfgfile_add_section(cfg, "GLOBAL"); + + return cfg; +error1: + if (cfg->sections != NULL) { + for (i = 0; i < cfg->allocated_sections; i++) { + if (cfg->sections[i].entries != NULL) { + free(cfg->sections[i].entries); + cfg->sections[i].entries = NULL; + } + } + free(cfg->sections); + cfg->sections = NULL; + } + free(cfg); + return NULL; +} + +int +rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname) +{ + int i; + + if (cfg == NULL) + return -EINVAL; + + if (sectionname == NULL) + return -EINVAL; + + /* resize overall struct if we don't have room for more sections */ + if (cfg->num_sections == cfg->allocated_sections) { + + struct rte_cfgfile_section *n_sections = + realloc(cfg->sections, + sizeof(struct rte_cfgfile_section) * + ((cfg->allocated_sections) + + CFG_ALLOC_SECTION_BATCH)); + + if (n_sections == NULL) + return -ENOMEM; + + for (i = 0; i < CFG_ALLOC_SECTION_BATCH; i++) { + n_sections[i + cfg->allocated_sections].num_entries = 0; + n_sections[i + + cfg->allocated_sections].allocated_entries = 0; + n_sections[i + cfg->allocated_sections].entries = NULL; + } + cfg->sections = n_sections; + cfg->allocated_sections += CFG_ALLOC_SECTION_BATCH; + } + + snprintf(cfg->sections[cfg->num_sections].name, + sizeof(cfg->sections[0].name), "%s", sectionname); + cfg->sections[cfg->num_sections].num_entries = 0; + cfg->num_sections++; + + return 0; +} + +int rte_cfgfile_add_entry(struct rte_cfgfile *cfg, + const char *sectionname, const char *entryname, + const char *entryvalue) +{ + int ret; + + if ((cfg == NULL) || (sectionname == NULL) || (entryname == NULL) + || (entryvalue == NULL)) + return -EINVAL; + + if (rte_cfgfile_has_entry(cfg, sectionname, entryname) != 0) + return -EEXIST; + + /* search for section pointer by sectionname */ + struct rte_cfgfile_section *curr_section = _get_section(cfg, + sectionname); + if (curr_section == NULL) + return -EINVAL; + + ret = _add_entry(curr_section, entryname, entryvalue); + + return ret; +} + +int rte_cfgfile_set_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname, const char *entryvalue) +{ + int i; + + if ((cfg == NULL) || (sectionname == NULL) || (entryname == NULL)) + return -EINVAL; + + /* search for section pointer by sectionname */ + struct rte_cfgfile_section *curr_section = _get_section(cfg, + sectionname); + if (curr_section == NULL) + return -EINVAL; + + if (entryvalue == NULL) + entryvalue = ""; + + for (i = 0; i < curr_section->num_entries; i++) + if (!strcmp(curr_section->entries[i].name, entryname)) { + snprintf(curr_section->entries[i].value, + sizeof(curr_section->entries[i].value), + "%s", entryvalue); + return 0; + } + printf("Error - entry name doesn't exist\n"); + return -EINVAL; +} + +int rte_cfgfile_save(struct rte_cfgfile *cfg, const char *filename) { int i, j; + if ((cfg == NULL) || (filename == NULL)) + return -EINVAL; + + FILE *f = fopen(filename, "w"); + + if (f == NULL) + return -EINVAL; + + for (i = 0; i < cfg->num_sections; i++) { + fprintf(f, "[%s]\n", cfg->sections[i].name); + + for (j = 0; j < cfg->sections[i].num_entries; j++) { + fprintf(f, "%s=%s\n", + cfg->sections[i].entries[j].name, + cfg->sections[i].entries[j].value); + } + } + return fclose(f); +} + +int rte_cfgfile_close(struct rte_cfgfile *cfg) +{ + int i; + if (cfg == NULL) return -1; - for (i = 0; i < cfg->num_sections; i++) { - if (cfg->sections[i] != NULL) { - if (cfg->sections[i]->num_entries) { - for (j = 0; j < cfg->sections[i]->num_entries; - j++) { - if (cfg->sections[i]->entries[j] != - NULL) - free(cfg->sections[i]-> - entries[j]); - } + if (cfg->sections != NULL) { + for (i = 0; i < cfg->allocated_sections; i++) { + if (cfg->sections[i].entries != NULL) { + free(cfg->sections[i].entries); + cfg->sections[i].entries = NULL; } - free(cfg->sections[i]); } + free(cfg->sections); + cfg->sections = NULL; } free(cfg); + cfg = NULL; return 0; } @@ -361,7 +484,7 @@ size_t length) int i; int num_sections = 0; for (i = 0; i < cfg->num_sections; i++) { - if (strncmp(cfg->sections[i]->name, sectionname, length) == 0) + if (strncmp(cfg->sections[i].name, sectionname, length) == 0) num_sections++; } return num_sections; @@ -375,23 +498,11 @@ rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], for (i = 0; i < cfg->num_sections && i < max_sections; i++) snprintf(sections[i], CFG_NAME_LEN, "%s", - cfg->sections[i]->name); + cfg->sections[i].name); return i; } -static const struct rte_cfgfile_section * -_get_section(struct rte_cfgfile *cfg, const char *sectionname) -{ - int i; - for (i = 0; i < cfg->num_sections; i++) { - if (strncmp(cfg->sections[i]->name, sectionname, - sizeof(cfg->sections[0]->name)) == 0) - return cfg->sections[i]; - } - return NULL; -} - int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname) { @@ -408,7 +519,18 @@ rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, return s->num_entries; } +int +rte_cfgfile_section_num_entries_by_index(struct rte_cfgfile *cfg, + char *sectionname, int index) +{ + if (index < 0 || index >= cfg->num_sections) + return -1; + const struct rte_cfgfile_section *sect = &(cfg->sections[index]); + + snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name); + return sect->num_entries; +} int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, struct rte_cfgfile_entry *entries, int max_entries) @@ -418,7 +540,7 @@ rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, if (sect == NULL) return -1; for (i = 0; i < max_entries && i < sect->num_entries; i++) - entries[i] = *sect->entries[i]; + entries[i] = sect->entries[i]; return i; } @@ -432,11 +554,10 @@ rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index, if (index < 0 || index >= cfg->num_sections) return -1; - - sect = cfg->sections[index]; + sect = &cfg->sections[index]; snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name); for (i = 0; i < max_entries && i < sect->num_entries; i++) - entries[i] = *sect->entries[i]; + entries[i] = sect->entries[i]; return i; } @@ -449,9 +570,9 @@ rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname, if (sect == NULL) return NULL; for (i = 0; i < sect->num_entries; i++) - if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) - == 0) - return sect->entries[i]->value; + if (strncmp(sect->entries[i].name, entryname, CFG_NAME_LEN) + == 0) + return sect->entries[i].value; return NULL; } diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h index fa10d408..17f72757 100644 --- a/lib/librte_cfgfile/rte_cfgfile.h +++ b/lib/librte_cfgfile/rte_cfgfile.h @@ -121,6 +121,82 @@ struct rte_cfgfile *rte_cfgfile_load_with_params(const char *filename, int flags, const struct rte_cfgfile_parameters *params); /** + * Create new cfgfile instance with empty sections and entries + * + * @param flags + * - CFG_FLAG_GLOBAL_SECTION + * Indicates that the file supports key value entries before the first + * defined section. These entries can be accessed in the "GLOBAL" + * section. + * - CFG_FLAG_EMPTY_VALUES + * Indicates that file supports key value entries where the value can + * be zero length (e.g., "key="). + * @return + * Handle to cfgfile instance on success, NULL otherwise + */ +struct rte_cfgfile *rte_cfgfile_create(int flags); + +/** + * Add section in cfgfile instance. + * + * @param cfg + * Pointer to the cfgfile structure. + * @param sectionname + * Section name which will be add to cfgfile. + * @return + * 0 on success, -ENOMEM if can't add section + */ +int +rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname); + +/** + * Add entry to specified section in cfgfile instance. + * + * @param cfg + * Pointer to the cfgfile structure. + * @param sectionname + * Given section name to add an entry. + * @param entryname + * Entry name to add. + * @param entryvalue + * Entry value to add. + * @return + * 0 on success, -EEXIST if entry already exist, -EINVAL if bad argument + */ +int rte_cfgfile_add_entry(struct rte_cfgfile *cfg, + const char *sectionname, const char *entryname, + const char *entryvalue); + +/** + * Update value of specified entry name in given section in config file + * + * @param cfg + * Config file + * @param sectionname + * Section name + * @param entryname + * Entry name to look for the value change + * @param entryvalue + * New entry value. Can be also an empty string if CFG_FLAG_EMPTY_VALUES = 1 + * @return + * 0 on success, -EINVAL if bad argument + */ +int rte_cfgfile_set_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname, const char *entryvalue); + +/** + * Save object cfgfile to file on disc + * + * @param cfg + * Config file structure + * @param filename + * File name to save data + * @return + * 0 on success, errno otherwise + */ +int rte_cfgfile_save(struct rte_cfgfile *cfg, const char *filename); + +/** * Get number of sections in config file * * @param cfg @@ -184,6 +260,26 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, const char *sectionname); /** +* Get number of entries in given config file section +* +* The index of a section is the same as the index of its name in the +* result of rte_cfgfile_sections. This API can be used when there are +* multiple sections with the same name. +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param index +* Section index +* @return +* Number of entries in section on success, -1 otherwise +*/ +int rte_cfgfile_section_num_entries_by_index(struct rte_cfgfile *cfg, + char *sectionname, + int index); + +/** * Get section entries as key-value pairs * * If multiple sections have the given name this function operates on the diff --git a/lib/librte_cfgfile/rte_cfgfile_version.map b/lib/librte_cfgfile/rte_cfgfile_version.map index 5fe60f72..cc4a11f6 100644 --- a/lib/librte_cfgfile/rte_cfgfile_version.map +++ b/lib/librte_cfgfile/rte_cfgfile_version.map @@ -27,3 +27,14 @@ DPDK_17.05 { rte_cfgfile_load_with_params; } DPDK_16.04; + +DPDK_17.11 { + global: + + rte_cfgfile_add_entry; + rte_cfgfile_add_section; + rte_cfgfile_create; + rte_cfgfile_save; + rte_cfgfile_set_entry; + +} DPDK_17.05; diff --git a/lib/librte_cmdline/Makefile b/lib/librte_cmdline/Makefile index 644f68e4..2c48e62b 100644 --- a/lib/librte_cmdline/Makefile +++ b/lib/librte_cmdline/Makefile @@ -54,6 +54,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_socket.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_portlist.c CFLAGS += -D_GNU_SOURCE +LDLIBS += -lrte_eal # install includes INCS := cmdline.h cmdline_parse.h cmdline_parse_num.h cmdline_parse_ipaddr.h diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c index a9c47be3..d7491651 100644 --- a/lib/librte_cmdline/cmdline.c +++ b/lib/librte_cmdline/cmdline.c @@ -205,7 +205,8 @@ cmdline_printf(const struct cmdline *cl, const char *fmt, ...) } if (ret >= BUFSIZ) ret = BUFSIZ - 1; - write(cl->s_out, buf, ret); + ret = write(cl->s_out, buf, ret); + (void)ret; free(buf); #endif } diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c index 56491eac..3e12ee54 100644 --- a/lib/librte_cmdline/cmdline_parse.c +++ b/lib/librte_cmdline/cmdline_parse.c @@ -163,7 +163,7 @@ static int match_inst(cmdline_parse_inst_t *inst, const char *buf, unsigned int nb_match_token, void *resbuf, unsigned resbuf_size) { - cmdline_parse_token_hdr_t * token_p; + cmdline_parse_token_hdr_t *token_p = NULL; unsigned int i=0; int n = 0; struct cmdline_token_hdr token_hdr; diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile index 6ac331bc..8e780b83 100644 --- a/lib/librte_cryptodev/Makefile +++ b/lib/librte_cryptodev/Makefile @@ -34,11 +34,13 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_cryptodev.a # library version -LIBABIVER := 3 +LIBABIVER := 4 # build flags CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mempool -lrte_ring -lrte_mbuf +LDLIBS += -lrte_kvargs # library source files SRCS-y += rte_cryptodev.c rte_cryptodev_pmd.c @@ -48,8 +50,6 @@ SYMLINK-y-include += rte_crypto.h SYMLINK-y-include += rte_crypto_sym.h SYMLINK-y-include += rte_cryptodev.h SYMLINK-y-include += rte_cryptodev_pmd.h -SYMLINK-y-include += rte_cryptodev_vdev.h -SYMLINK-y-include += rte_cryptodev_pci.h # versioning export map EXPORT_MAP := rte_cryptodev_version.map diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h index 10fe0804..3d672fe7 100644 --- a/lib/librte_cryptodev/rte_crypto.h +++ b/lib/librte_cryptodev/rte_crypto.h @@ -86,7 +86,8 @@ enum rte_crypto_op_status { */ enum rte_crypto_op_sess_type { RTE_CRYPTO_OP_WITH_SESSION, /**< Session based crypto operation */ - RTE_CRYPTO_OP_SESSIONLESS /**< Session-less crypto operation */ + RTE_CRYPTO_OP_SESSIONLESS, /**< Session-less crypto operation */ + RTE_CRYPTO_OP_SECURITY_SESSION /**< Security session crypto operation */ }; /** @@ -117,7 +118,7 @@ struct rte_crypto_op { struct rte_mempool *mempool; /**< crypto operation mempool which operation is allocated from */ - phys_addr_t phys_addr; + rte_iova_t phys_addr; /**< physical address of crypto operation */ RTE_STD_C11 @@ -144,6 +145,7 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type) case RTE_CRYPTO_OP_TYPE_SYMMETRIC: __rte_crypto_sym_op_reset(op->sym); break; + case RTE_CRYPTO_OP_TYPE_UNDEFINED: default: break; } diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h index 0ceaa917..c981f0b9 100644 --- a/lib/librte_cryptodev/rte_crypto_sym.h +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -160,9 +160,6 @@ struct rte_crypto_cipher_xform { * Cipher key length is in bytes. For AES it can be 128 bits (16 bytes), * 192 bits (24 bytes) or 256 bits (32 bytes). * - * For the CCM mode of operation, the only supported key length is 128 - * bits (16 bytes). - * * For the RTE_CRYPTO_CIPHER_AES_F8 mode of operation, key.length * should be set to the combined length of the encryption key and the * keymask. Since the keymask and the encryption key are the same size, @@ -196,7 +193,9 @@ struct rte_crypto_cipher_xform { * space for the implementation to write in the flags * in the first byte). Note that a full 16 bytes should * be allocated, even though the length field will - * have a value less than this. + * have a value less than this. Note that the PMDs may + * modify the memory reserved (the first byte and the + * final padding) * * - For AES-XTS, this is the 128bit tweak, i, from * IEEE Std 1619-2007. @@ -427,7 +426,11 @@ struct rte_crypto_aead_xform { uint16_t digest_length; uint16_t aad_length; - /**< The length of the additional authenticated data (AAD) in bytes. */ + /**< The length of the additional authenticated data (AAD) in bytes. + * For CCM mode, this is the length of the actual AAD, even though + * it is required to reserve 18 bytes before the AAD and padding + * at the end of it, so a multiple of 16 bytes is allocated. + */ }; /** Crypto transformation types */ @@ -505,6 +508,8 @@ struct rte_crypto_sym_op { /**< Handle for the initialised session context */ struct rte_crypto_sym_xform *xform; /**< Session-less API crypto operation parameters */ + struct rte_security_session *sec_session; + /**< Handle for the initialised security session context */ }; RTE_STD_C11 @@ -543,7 +548,7 @@ struct rte_crypto_sym_op { * For GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), for * "digest result" read "authentication tag T". */ - phys_addr_t phys_addr; + rte_iova_t phys_addr; /**< Physical address of digest */ } digest; /**< Digest parameters */ struct { @@ -555,20 +560,19 @@ struct rte_crypto_sym_op { * Specifically for CCM (@ref RTE_CRYPTO_AEAD_AES_CCM), * the caller should setup this field as follows: * - * - the nonce should be written starting at an offset - * of one byte into the array, leaving room for the - * implementation to write in the flags to the first - * byte. - * - * - the additional authentication data itself should + * - the additional authentication data itself should * be written starting at an offset of 18 bytes into - * the array, leaving room for the length encoding in - * the first two bytes of the second block. + * the array, leaving room for the first block (16 bytes) + * and the length encoding in the first two bytes of the + * second block. * * - the array should be big enough to hold the above - * fields, plus any padding to round this up to the - * nearest multiple of the block size (16 bytes). - * Padding will be added by the implementation. + * fields, plus any padding to round this up to the + * nearest multiple of the block size (16 bytes). + * Padding will be added by the implementation. + * + * - Note that PMDs may modify the memory reserved + * (first 18 bytes and the final padding). * * Finally, for GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), the * caller should setup this field as follows: @@ -579,7 +583,7 @@ struct rte_crypto_sym_op { * of the block size (16 bytes). * */ - phys_addr_t phys_addr; /**< physical address */ + rte_iova_t phys_addr; /**< physical address */ } aad; /**< Additional authentication parameters */ } aead; @@ -676,7 +680,7 @@ struct rte_crypto_sym_op { * will overwrite any data at this location. * */ - phys_addr_t phys_addr; + rte_iova_t phys_addr; /**< Physical address of digest */ } digest; /**< Digest parameters */ } auth; diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c index 327d7e84..b40c0282 100644 --- a/lib/librte_cryptodev/rte_cryptodev.c +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -377,12 +377,6 @@ rte_cryptodev_get_feature_name(uint64_t flag) } } -int -rte_cryptodev_create_vdev(const char *name, const char *args) -{ - return rte_vdev_init(name, args); -} - struct rte_cryptodev * rte_cryptodev_pmd_get_dev(uint8_t dev_id) { @@ -488,6 +482,16 @@ rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices, return count; } +void * +rte_cryptodev_get_sec_ctx(uint8_t dev_id) +{ + if (rte_crypto_devices[dev_id].feature_flags & + RTE_CRYPTODEV_FF_SECURITY) + return rte_crypto_devices[dev_id].security_ctx; + + return NULL; +} + int rte_cryptodev_socket_id(uint8_t dev_id) { @@ -583,6 +587,9 @@ rte_cryptodev_pmd_allocate(const char *name, int socket_id) cryptodev->data->socket_id = socket_id; cryptodev->data->dev_started = 0; + /* init user callbacks */ + TAILQ_INIT(&(cryptodev->link_intr_cbs)); + cryptodev->attached = RTE_CRYPTODEV_ATTACHED; cryptodev_globals.nb_devs++; @@ -1271,7 +1278,7 @@ rte_crypto_op_init(struct rte_mempool *mempool, __rte_crypto_op_reset(op, type); - op->phys_addr = rte_mem_virt2phy(_op_data); + op->phys_addr = rte_mem_virt2iova(_op_data); op->mempool = mempool; } @@ -1362,12 +1369,6 @@ TAILQ_HEAD(cryptodev_driver_list, cryptodev_driver); static struct cryptodev_driver_list cryptodev_driver_list = TAILQ_HEAD_INITIALIZER(cryptodev_driver_list); -struct cryptodev_driver { - TAILQ_ENTRY(cryptodev_driver) next; /**< Next in list. */ - const struct rte_driver *driver; - uint8_t id; -}; - int rte_cryptodev_driver_id_get(const char *name) { @@ -1388,6 +1389,17 @@ rte_cryptodev_driver_id_get(const char *name) } const char * +rte_cryptodev_name_get(uint8_t dev_id) +{ + struct rte_cryptodev *dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (dev == NULL) + return NULL; + + return dev->data->name; +} + +const char * rte_cryptodev_driver_name_get(uint8_t driver_id) { struct cryptodev_driver *driver; @@ -1399,15 +1411,13 @@ rte_cryptodev_driver_name_get(uint8_t driver_id) } uint8_t -rte_cryptodev_allocate_driver(const struct rte_driver *drv) +rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv, + const struct rte_driver *drv) { - struct cryptodev_driver *driver; - - driver = malloc(sizeof(*driver)); - driver->driver = drv; - driver->id = nb_drivers; + crypto_drv->driver = drv; + crypto_drv->id = nb_drivers; - TAILQ_INSERT_TAIL(&cryptodev_driver_list, driver, next); + TAILQ_INSERT_TAIL(&cryptodev_driver_list, crypto_drv, next); return nb_drivers++; } diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h index 7ec9c4bc..dade5548 100644 --- a/lib/librte_cryptodev/rte_cryptodev.h +++ b/lib/librte_cryptodev/rte_cryptodev.h @@ -49,7 +49,6 @@ extern "C" { #include "rte_crypto.h" #include "rte_dev.h" #include <rte_common.h> -#include <rte_vdev.h> extern const char **rte_cyptodev_names; @@ -60,10 +59,10 @@ extern const char **rte_cyptodev_names; RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) -#define CDEV_PMD_LOG_ERR(dev, ...) \ - RTE_LOG(ERR, CRYPTODEV, \ - RTE_FMT("[%s] %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ - dev, __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) +#define CDEV_LOG_INFO(...) \ + RTE_LOG(INFO, CRYPTODEV, \ + RTE_FMT(RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + RTE_FMT_TAIL(__VA_ARGS__,))) #ifdef RTE_LIBRTE_CRYPTODEV_DEBUG #define CDEV_LOG_DEBUG(...) \ @@ -111,7 +110,7 @@ extern const char **rte_cyptodev_names; * to calculate address from. */ #define rte_crypto_op_ctophys_offset(c, o) \ - (phys_addr_t)((c)->phys_addr + (o)) + (rte_iova_t)((c)->phys_addr + (o)) /** * Crypto parameters range description @@ -351,6 +350,8 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum, /**< Utilises CPU NEON instructions */ #define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 11) /**< Utilises ARM CPU Cryptographic Extensions */ +#define RTE_CRYPTODEV_FF_SECURITY (1ULL << 12) +/**< Support Security Protocol Processing */ /** @@ -434,33 +435,29 @@ struct rte_cryptodev_stats { /**< Max length of name of crypto PMD */ /** - * @deprecated - * - * Create a virtual crypto device + * Get the device identifier for the named crypto device. * - * @param name Cryptodev PMD name of device to be created. - * @param args Options arguments for device. + * @param name device name to select the device structure. * * @return - * - On successful creation of the cryptodev the device index is returned, - * which will be between 0 and rte_cryptodev_count(). - * - In the case of a failure, returns -1. + * - Returns crypto device identifier on success. + * - Return -1 on failure to find named crypto device. */ -__rte_deprecated extern int -rte_cryptodev_create_vdev(const char *name, const char *args); +rte_cryptodev_get_dev_id(const char *name); /** - * Get the device identifier for the named crypto device. + * Get the crypto device name given a device identifier. * - * @param name device name to select the device structure. + * @param dev_id + * The identifier of the device * * @return - * - Returns crypto device identifier on success. - * - Return -1 on failure to find named crypto device. + * - Returns crypto device name. + * - Returns NULL if crypto device is not present. */ -extern int -rte_cryptodev_get_dev_id(const char *name); +extern const char * +rte_cryptodev_name_get(uint8_t dev_id); /** * Get the total number of crypto devices that have been successfully @@ -676,6 +673,11 @@ rte_cryptodev_stats_reset(uint8_t dev_id); * @param dev_info A pointer to a structure of type * *rte_cryptodev_info* to be filled with the * contextual information of the device. + * + * @note The capabilities field of dev_info is set to point to the first + * element of an array of struct rte_cryptodev_capabilities. The element after + * the last valid element has it's op field set to + * RTE_CRYPTO_OP_TYPE_UNDEFINED. */ extern void rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info); @@ -756,11 +758,17 @@ struct rte_cryptodev { struct rte_cryptodev_cb_list link_intr_cbs; /**< User application callback for interrupts if present */ + void *security_ctx; + /**< Context for security ops */ + __extension__ uint8_t attached : 1; /**< Flag indicating the device is attached */ } __rte_cache_aligned; +void * +rte_cryptodev_get_sec_ctx(uint8_t dev_id); + /** * * The data part, with no function pointers, associated with each device. @@ -1025,26 +1033,6 @@ int rte_cryptodev_driver_id_get(const char *name); */ const char *rte_cryptodev_driver_name_get(uint8_t driver_id); -/** - * @internal - * Allocate Cryptodev driver. - * - * @param driver - * Pointer to rte_driver. - * @return - * The driver type identifier - */ -uint8_t rte_cryptodev_allocate_driver(const struct rte_driver *driver); - - -#define RTE_PMD_REGISTER_CRYPTO_DRIVER(drv, driver_id)\ -RTE_INIT(init_ ##driver_id);\ -static void init_ ##driver_id(void)\ -{\ - driver_id = rte_cryptodev_allocate_driver(&(drv).driver);\ -} - - #ifdef __cplusplus } #endif diff --git a/lib/librte_cryptodev/rte_cryptodev_pci.h b/lib/librte_cryptodev/rte_cryptodev_pci.h deleted file mode 100644 index 67eda96a..00000000 --- a/lib/librte_cryptodev/rte_cryptodev_pci.h +++ /dev/null @@ -1,92 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_CRYPTODEV_PCI_H_ -#define _RTE_CRYPTODEV_PCI_H_ - -#include <rte_pci.h> -#include "rte_cryptodev.h" - -/** - * Initialisation function of a crypto driver invoked for each matching - * crypto PCI device detected during the PCI probing phase. - * - * @param dev The dev pointer is the address of the *rte_cryptodev* - * structure associated with the matching device and which - * has been [automatically] allocated in the - * *rte_crypto_devices* array. - * - * @return - * - 0: Success, the device is properly initialised by the driver. - * In particular, the driver MUST have set up the *dev_ops* pointer - * of the *dev* structure. - * - <0: Error code of the device initialisation failure. - */ -typedef int (*cryptodev_pci_init_t)(struct rte_cryptodev *dev); - -/** - * Finalisation function of a driver invoked for each matching - * PCI device detected during the PCI closing phase. - * - * @param dev The dev pointer is the address of the *rte_cryptodev* - * structure associated with the matching device and which - * has been [automatically] allocated in the - * *rte_crypto_devices* array. - * - * * @return - * - 0: Success, the device is properly finalised by the driver. - * In particular, the driver MUST free the *dev_ops* pointer - * of the *dev* structure. - * - <0: Error code of the device initialisation failure. - */ -typedef int (*cryptodev_pci_uninit_t)(struct rte_cryptodev *dev); - -/** - * @internal - * Wrapper for use by pci drivers as a .probe function to attach to a crypto - * interface. - */ -int -rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev, - size_t private_data_size, - cryptodev_pci_init_t dev_init); - -/** - * @internal - * Wrapper for use by pci drivers as a .remove function to detach a crypto - * interface. - */ -int -rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev, - cryptodev_pci_uninit_t dev_uninit); - -#endif /* _RTE_CRYPTODEV_PCI_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c index a57faadc..b4eeb448 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.c +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c @@ -32,84 +32,48 @@ #include <rte_malloc.h> -#include "rte_cryptodev_vdev.h" -#include "rte_cryptodev_pci.h" #include "rte_cryptodev_pmd.h" /** * Parse name from argument */ static int -rte_cryptodev_vdev_parse_name_arg(const char *key __rte_unused, +rte_cryptodev_pmd_parse_name_arg(const char *key __rte_unused, const char *value, void *extra_args) { - struct rte_crypto_vdev_init_params *params = extra_args; + struct rte_cryptodev_pmd_init_params *params = extra_args; + int n; - if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) { - CDEV_LOG_ERR("Invalid name %s, should be less than " - "%u bytes", value, - RTE_CRYPTODEV_NAME_MAX_LEN - 1); - return -1; - } - - strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN); + n = snprintf(params->name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s", value); + if (n >= RTE_CRYPTODEV_NAME_MAX_LEN) + return -EINVAL; return 0; } /** - * Parse integer from argument + * Parse unsigned integer from argument */ static int -rte_cryptodev_vdev_parse_integer_arg(const char *key __rte_unused, +rte_cryptodev_pmd_parse_uint_arg(const char *key __rte_unused, const char *value, void *extra_args) { - int *i = extra_args; + int i; + char *end; + errno = 0; - *i = atoi(value); - if (*i < 0) { - CDEV_LOG_ERR("Argument has to be positive."); - return -1; - } + i = strtol(value, &end, 10); + if (*end != 0 || errno != 0 || i < 0) + return -EINVAL; + *((uint32_t *)extra_args) = i; return 0; } -struct rte_cryptodev * -rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size, - int socket_id, struct rte_vdev_device *vdev) -{ - struct rte_cryptodev *cryptodev; - - /* allocate device structure */ - cryptodev = rte_cryptodev_pmd_allocate(name, socket_id); - if (cryptodev == NULL) - return NULL; - - /* allocate private device structure */ - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - cryptodev->data->dev_private = - rte_zmalloc_socket("cryptodev device private", - dev_private_size, - RTE_CACHE_LINE_SIZE, - socket_id); - - if (cryptodev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private device" - " data"); - } - - cryptodev->device = &vdev->device; - - /* initialise user call-back tail queue */ - TAILQ_INIT(&(cryptodev->link_intr_cbs)); - - return cryptodev; -} - int -rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params, - const char *input_args) +rte_cryptodev_pmd_parse_input_args( + struct rte_cryptodev_pmd_init_params *params, + const char *args) { struct rte_kvargs *kvlist = NULL; int ret = 0; @@ -117,35 +81,36 @@ rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params, if (params == NULL) return -EINVAL; - if (input_args) { - kvlist = rte_kvargs_parse(input_args, - cryptodev_vdev_valid_params); + if (args) { + kvlist = rte_kvargs_parse(args, cryptodev_pmd_valid_params); if (kvlist == NULL) - return -1; + return -EINVAL; ret = rte_kvargs_process(kvlist, - RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, - &rte_cryptodev_vdev_parse_integer_arg, - ¶ms->max_nb_queue_pairs); + RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG, + &rte_cryptodev_pmd_parse_uint_arg, + ¶ms->max_nb_queue_pairs); if (ret < 0) goto free_kvlist; ret = rte_kvargs_process(kvlist, - RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, - &rte_cryptodev_vdev_parse_integer_arg, - ¶ms->max_nb_sessions); + RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG, + &rte_cryptodev_pmd_parse_uint_arg, + ¶ms->max_nb_sessions); if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID, - &rte_cryptodev_vdev_parse_integer_arg, - ¶ms->socket_id); + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_PMD_SOCKET_ID_ARG, + &rte_cryptodev_pmd_parse_uint_arg, + ¶ms->socket_id); if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME, - &rte_cryptodev_vdev_parse_name_arg, - params); + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_PMD_NAME_ARG, + &rte_cryptodev_pmd_parse_name_arg, + params); if (ret < 0) goto free_kvlist; } @@ -155,93 +120,80 @@ free_kvlist: return ret; } -int -rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev, - size_t private_data_size, - cryptodev_pci_init_t dev_init) +struct rte_cryptodev * +rte_cryptodev_pmd_create(const char *name, + struct rte_device *device, + struct rte_cryptodev_pmd_init_params *params) { struct rte_cryptodev *cryptodev; - char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + if (params->name[0] != '\0') { + CDEV_LOG_INFO("[%s] User specified device name = %s\n", + device->driver->name, params->name); + name = params->name; + } - int retval; + CDEV_LOG_INFO("[%s] - Creating cryptodev %s\n", + device->driver->name, name); - rte_pci_device_name(&pci_dev->addr, cryptodev_name, - sizeof(cryptodev_name)); + CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s," + "socket id: %d, max queue pairs: %u, max sessions: %u", + device->driver->name, name, + params->socket_id, params->max_nb_queue_pairs, + params->max_nb_sessions); - cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id()); - if (cryptodev == NULL) - return -ENOMEM; + /* allocate device structure */ + cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id); + if (cryptodev == NULL) { + CDEV_LOG_ERR("[%s] Failed to allocate crypto device for %s", + device->driver->name, name); + return NULL; + } + /* allocate private device structure */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { cryptodev->data->dev_private = - rte_zmalloc_socket( - "cryptodev private structure", - private_data_size, + rte_zmalloc_socket("cryptodev device private", + params->private_data_size, RTE_CACHE_LINE_SIZE, - rte_socket_id()); + params->socket_id); + + if (cryptodev->data->dev_private == NULL) { + CDEV_LOG_ERR("[%s] Cannot allocate memory for " + "cryptodev %s private data", + device->driver->name, name); - if (cryptodev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private " - "device data"); + rte_cryptodev_pmd_release_device(cryptodev); + return NULL; + } } - cryptodev->device = &pci_dev->device; + cryptodev->device = device; - /* init user callbacks */ + /* initialise user call-back tail queue */ TAILQ_INIT(&(cryptodev->link_intr_cbs)); - /* Invoke PMD device initialization function */ - RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL); - retval = dev_init(cryptodev); - if (retval == 0) - return 0; - - CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)" - " failed", pci_dev->device.driver->name, - (unsigned int) pci_dev->id.vendor_id, - (unsigned int) pci_dev->id.device_id); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(cryptodev->data->dev_private); - - /* free crypto device */ - rte_cryptodev_pmd_release_device(cryptodev); - - return -ENXIO; + return cryptodev; } int -rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev, - cryptodev_pci_uninit_t dev_uninit) +rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev) { - struct rte_cryptodev *cryptodev; - char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; - int ret; - - if (pci_dev == NULL) - return -EINVAL; - - rte_pci_device_name(&pci_dev->addr, cryptodev_name, - sizeof(cryptodev_name)); - - cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); - if (cryptodev == NULL) - return -ENODEV; + int retval; - /* Invoke PMD device uninit function */ - if (dev_uninit) { - ret = dev_uninit(cryptodev); - if (ret) - return ret; - } + CDEV_LOG_INFO("[%s] Closing crypto device %s", + cryptodev->device->driver->name, + cryptodev->device->name); /* free crypto device */ - rte_cryptodev_pmd_release_device(cryptodev); + retval = rte_cryptodev_pmd_release_device(cryptodev); + if (retval) + return retval; if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(cryptodev->data->dev_private); + cryptodev->device = NULL; cryptodev->data = NULL; diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h index c983eb21..744405e2 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -56,6 +56,35 @@ extern "C" { #include "rte_crypto.h" #include "rte_cryptodev.h" + +#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS 8 +#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_SESSIONS 2048 + +#define RTE_CRYPTODEV_PMD_NAME_ARG ("name") +#define RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG ("max_nb_queue_pairs") +#define RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG ("max_nb_sessions") +#define RTE_CRYPTODEV_PMD_SOCKET_ID_ARG ("socket_id") + + +static const char * const cryptodev_pmd_valid_params[] = { + RTE_CRYPTODEV_PMD_NAME_ARG, + RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG, + RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG, + RTE_CRYPTODEV_PMD_SOCKET_ID_ARG +}; + +/** + * @internal + * Initialisation parameters for crypto devices + */ +struct rte_cryptodev_pmd_init_params { + char name[RTE_CRYPTODEV_NAME_MAX_LEN]; + size_t private_data_size; + int socket_id; + unsigned int max_nb_queue_pairs; + unsigned int max_nb_sessions; +}; + /** Global structure used for maintaining state of allocated crypto devices */ struct rte_cryptodev_global { struct rte_cryptodev *devs; /**< Device information array */ @@ -65,6 +94,13 @@ struct rte_cryptodev_global { uint8_t max_devs; /**< Max number of devices */ }; +/* Cryptodev driver, containing the driver ID */ +struct cryptodev_driver { + TAILQ_ENTRY(cryptodev_driver) next; /**< Next in list. */ + const struct rte_driver *driver; + uint8_t id; +}; + /** pointer to global crypto devices data structure. */ extern struct rte_cryptodev_global *rte_cryptodev_globals; @@ -385,6 +421,63 @@ rte_cryptodev_pmd_allocate(const char *name, int socket_id); extern int rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev); + +/** + * @internal + * + * PMD assist function to parse initialisation arguments for crypto driver + * when creating a new crypto PMD device instance. + * + * PMD driver should set default values for that PMD before calling function, + * these default values will be over-written with successfully parsed values + * from args string. + * + * @param params parsed PMD initialisation parameters + * @param args input argument string to parse + * + * @return + * - 0 on success + * - errno on failure + */ +int +rte_cryptodev_pmd_parse_input_args( + struct rte_cryptodev_pmd_init_params *params, + const char *args); + +/** + * @internal + * + * PMD assist function to provide boiler plate code for crypto driver to create + * and allocate resources for a new crypto PMD device instance. + * + * @param name crypto device name. + * @param device base device instance + * @param params PMD initialisation parameters + * + * @return + * - crypto device instance on success + * - NULL on creation failure + */ +struct rte_cryptodev * +rte_cryptodev_pmd_create(const char *name, + struct rte_device *device, + struct rte_cryptodev_pmd_init_params *params); + +/** + * @internal + * + * PMD assist function to provide boiler plate code for crypto driver to + * destroy and free resources associated with a crypto PMD device instance. + * + * @param cryptodev crypto device handle. + * + * @return + * - 0 on success + * - errno on failure + */ +int +rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev); + /** * Executes all the user application registered callbacks for the specific * device. @@ -405,6 +498,29 @@ void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev, int rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix); +/** + * @internal + * Allocate Cryptodev driver. + * + * @param crypto_drv + * Pointer to cryptodev_driver. + * @param drv + * Pointer to rte_driver. + * + * @return + * The driver type identifier + */ +uint8_t rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv, + const struct rte_driver *drv); + + +#define RTE_PMD_REGISTER_CRYPTO_DRIVER(crypto_drv, drv, driver_id)\ +RTE_INIT(init_ ##driver_id);\ +static void init_ ##driver_id(void)\ +{\ + driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv).driver);\ +} + static inline void * get_session_private_data(const struct rte_cryptodev_sym_session *sess, uint8_t driver_id) { diff --git a/lib/librte_cryptodev/rte_cryptodev_vdev.h b/lib/librte_cryptodev/rte_cryptodev_vdev.h deleted file mode 100644 index 94ab9d33..00000000 --- a/lib/librte_cryptodev/rte_cryptodev_vdev.h +++ /dev/null @@ -1,100 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_CRYPTODEV_VDEV_H_ -#define _RTE_CRYPTODEV_VDEV_H_ - -#include <rte_vdev.h> -#include <inttypes.h> - -#include "rte_cryptodev.h" - -#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8 -#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048 - -#define RTE_CRYPTODEV_VDEV_NAME ("name") -#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs") -#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions") -#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id") - -static const char * const cryptodev_vdev_valid_params[] = { - RTE_CRYPTODEV_VDEV_NAME, - RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, - RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, - RTE_CRYPTODEV_VDEV_SOCKET_ID -}; - -/** - * @internal - * Initialisation parameters for virtual crypto devices - */ -struct rte_crypto_vdev_init_params { - unsigned int max_nb_queue_pairs; - unsigned int max_nb_sessions; - uint8_t socket_id; - char name[RTE_CRYPTODEV_NAME_MAX_LEN]; -}; - -/** - * @internal - * Creates a new virtual crypto device and returns the pointer - * to that device. - * - * @param name PMD type name - * @param dev_private_size Size of crypto PMDs private data - * @param socket_id Socket to allocate resources on. - * @param vdev Pointer to virtual device structure. - * - * @return - * - Cryptodev pointer if device is successfully created. - * - NULL if device cannot be created. - */ -struct rte_cryptodev * -rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size, - int socket_id, struct rte_vdev_device *vdev); - -/** - * @internal - * Parse virtual device initialisation parameters input arguments - * - * @params params Initialisation parameters with defaults set. - * @params input_args Command line arguments - * - * @return - * 0 on successful parse - * <0 on failure to parse - */ -int -rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params, - const char *input_args); - -#endif /* _RTE_CRYPTODEV_VDEV_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map index e9ba88ac..eb47308b 100644 --- a/lib/librte_cryptodev/rte_cryptodev_version.map +++ b/lib/librte_cryptodev/rte_cryptodev_version.map @@ -7,7 +7,6 @@ DPDK_16.04 { rte_cryptodev_close; rte_cryptodev_count; rte_cryptodev_configure; - rte_cryptodev_create_vdev; rte_cryptodev_get_dev_id; rte_cryptodev_get_feature_name; rte_cryptodev_info_get; @@ -68,14 +67,21 @@ DPDK_17.08 { rte_cryptodev_get_aead_algo_enum; rte_cryptodev_get_header_session_size; rte_cryptodev_get_private_session_size; - rte_cryptodev_pci_generic_probe; - rte_cryptodev_pci_generic_remove; rte_cryptodev_sym_capability_check_aead; rte_cryptodev_sym_session_init; rte_cryptodev_sym_session_clear; - rte_cryptodev_vdev_parse_init_params; - rte_cryptodev_vdev_pmd_init; rte_crypto_aead_algorithm_strings; rte_crypto_aead_operation_strings; } DPDK_17.05; + +DPDK_17.11 { + global: + + rte_cryptodev_get_sec_ctx; + rte_cryptodev_name_get; + rte_cryptodev_pmd_create; + rte_cryptodev_pmd_destroy; + rte_cryptodev_pmd_parse_input_args; + +} DPDK_17.08; diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile index b417ee7b..fee00121 100644 --- a/lib/librte_distributor/Makefile +++ b/lib/librte_distributor/Makefile @@ -36,6 +36,7 @@ LIB = librte_distributor.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev EXPORT_MAP := rte_distributor_version.map diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c index 20ba9ffb..57ad3397 100644 --- a/lib/librte_distributor/rte_distributor.c +++ b/lib/librte_distributor/rte_distributor.c @@ -432,7 +432,7 @@ rte_distributor_process_v1705(struct rte_distributor *d, next_value = (((int64_t)(uintptr_t)next_mb) << RTE_DISTRIB_FLAG_BITS); /* - * User is advocated to set tag vaue for each + * User is advocated to set tag value for each * mbuf before calling rte_distributor_process. * User defined tags are used to identify flows, * or sessions. diff --git a/lib/librte_distributor/rte_distributor_v20.c b/lib/librte_distributor/rte_distributor_v20.c index b09abecd..9adda52b 100644 --- a/lib/librte_distributor/rte_distributor_v20.c +++ b/lib/librte_distributor/rte_distributor_v20.c @@ -244,7 +244,7 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d, next_value = (((int64_t)(uintptr_t)next_mb) << RTE_DISTRIB_FLAG_BITS); /* - * User is advocated to set tag vaue for each + * User is advocated to set tag value for each * mbuf before calling rte_distributor_process. * User defined tags are used to identify flows, * or sessions. diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index 005019ed..afa117de 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -46,16 +46,15 @@ LDLIBS += -lexecinfo LDLIBS += -lpthread LDLIBS += -lgcc_s -EXPORT_MAP := rte_eal_version.map +EXPORT_MAP := ../../rte_eal_version.map -LIBABIVER := 5 +LIBABIVER := 6 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c @@ -68,9 +67,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c @@ -92,6 +88,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c +SRCS-y += rte_cycles.c CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) @@ -107,7 +104,7 @@ CFLAGS_eal_thread.o += -Wno-return-type CFLAGS_eal_hpet.o += -Wno-return-type endif -INC := rte_interrupts.h +INC := # no bsdapp specific headers SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 5fa59884..369a682a 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -51,7 +51,6 @@ #include <rte_common.h> #include <rte_debug.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -66,7 +65,6 @@ #include <rte_cpuflags.h> #include <rte_interrupts.h> #include <rte_bus.h> -#include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> #include <rte_version.h> @@ -112,6 +110,13 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; +/* Return mbuf pool ops name */ +const char * +rte_eal_mbuf_default_mempool_ops(void) +{ + return internal_config.mbuf_pool_ops_name; +} + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -119,6 +124,12 @@ rte_eal_get_configuration(void) return &rte_config; } +enum rte_iova_mode +rte_eal_iova_mode(void) +{ + return rte_eal_get_configuration()->iova_mode; +} + /* parse a sysfs (or other) file containing one integer value */ int eal_parse_sysfs_value(const char *filename, unsigned long *val) @@ -385,6 +396,9 @@ eal_parse_args(int argc, char **argv) continue; switch (opt) { + case OPT_MBUF_POOL_OPS_NAME_NUM: + internal_config.mbuf_pool_ops_name = optarg; + break; case 'h': eal_usage(prgname); exit(EXIT_SUCCESS); @@ -535,6 +549,29 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins\n"); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_option_device_parse()) { + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + /* autodetect the iova mapping mode (default is iova_pa) */ + rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); + if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && eal_hugepage_info_init() < 0) { @@ -603,9 +640,6 @@ rte_eal_init(int argc, char **argv) eal_check_mem_on_local_socket(); - if (eal_plugins_init() < 0) - rte_eal_init_alert("Cannot init plugins\n"); - eal_thread_init_master(rte_config.master_lcore); ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); @@ -614,17 +648,6 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); - if (eal_option_device_parse()) { - rte_errno = ENODEV; - return -1; - } - - if (rte_bus_scan()) { - rte_eal_init_alert("Cannot scan the buses for devices\n"); - rte_errno = ENODEV; - return -1; - } - RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -698,3 +721,60 @@ rte_eal_process_type(void) { return rte_config.process_type; } + +int rte_eal_has_pci(void) +{ + return !internal_config.no_pci; +} + +int rte_eal_create_uio_dev(void) +{ + return internal_config.create_uio_dev; +} + +enum rte_intr_mode +rte_eal_vfio_intr_mode(void) +{ + return RTE_INTR_MODE_NONE; +} + +/* dummy forward declaration. */ +struct vfio_device_info; + +/* dummy prototypes. */ +int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, + int *vfio_dev_fd, struct vfio_device_info *device_info); +int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); +int rte_vfio_enable(const char *modname); +int rte_vfio_is_enabled(const char *modname); +int rte_vfio_noiommu_is_enabled(void); + +int rte_vfio_setup_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *vfio_dev_fd, + __rte_unused struct vfio_device_info *device_info) +{ + return -1; +} + +int rte_vfio_release_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int fd) +{ + return -1; +} + +int rte_vfio_enable(__rte_unused const char *modname) +{ + return -1; +} + +int rte_vfio_is_enabled(__rte_unused const char *modname) +{ + return 0; +} + +int rte_vfio_noiommu_is_enabled(void) +{ + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c index ea2afff4..deba8770 100644 --- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -125,3 +125,38 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) RTE_SET_USED(intr_handle); return 0; } + +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout) +{ + RTE_SET_USED(epfd); + RTE_SET_USED(events); + RTE_SET_USED(maxevents); + RTE_SET_USED(timeout); + + return -ENOTSUP; +} + +int +rte_epoll_ctl(int epfd, int op, int fd, struct rte_epoll_event *event) +{ + RTE_SET_USED(epfd); + RTE_SET_USED(op); + RTE_SET_USED(fd); + RTE_SET_USED(event); + + return -ENOTSUP; +} + +int +rte_intr_tls_epfd(void) +{ + return -ENOTSUP; +} + +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c index 3614da8d..6ba05857 100644 --- a/lib/librte_eal/bsdapp/eal/eal_memory.c +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -54,9 +54,14 @@ phys_addr_t rte_mem_virt2phy(const void *virtaddr) { /* XXX not implemented. This function is only used by - * rte_mempool_virt2phy() when hugepages are disabled. */ + * rte_mempool_virt2iova() when hugepages are disabled. */ (void)virtaddr; - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; +} +rte_iova_t +rte_mem_virt2iova(const void *virtaddr) +{ + return rte_mem_virt2phy(virtaddr); } int @@ -73,7 +78,7 @@ rte_eal_hugepage_init(void) /* for debug purposes, hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { addr = malloc(internal_config.memory); - mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -88,7 +93,7 @@ rte_eal_hugepage_init(void) hpi = &internal_config.hugepage_info[i]; for (j = 0; j < hpi->num_pages[0]; j++) { struct rte_memseg *seg; - uint64_t physaddr; + rte_iova_t physaddr; int error; size_t sysctl_size = sizeof(physaddr); char physaddr_str[64]; @@ -114,7 +119,7 @@ rte_eal_hugepage_init(void) seg = &mcfg->memseg[seg_idx++]; seg->addr = addr; - seg->phys_addr = physaddr; + seg->iova = physaddr; seg->hugepage_sz = hpi->hugepage_sz; seg->len = hpi->hugepage_sz; seg->nchannel = mcfg->nchannel; @@ -192,3 +197,9 @@ error: close(fd_hugepage); return -1; } + +int +rte_eal_using_phys_addrs(void) +{ + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c deleted file mode 100644 index 04eacdcc..00000000 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ /dev/null @@ -1,670 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <ctype.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <unistd.h> -#include <inttypes.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <dirent.h> -#include <limits.h> -#include <sys/queue.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <sys/pciio.h> -#include <dev/pci/pcireg.h> - -#if defined(RTE_ARCH_X86) -#include <machine/cpufunc.h> -#endif - -#include <rte_interrupts.h> -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_common.h> -#include <rte_launch.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_eal.h> -#include <rte_eal_memconfig.h> -#include <rte_per_lcore.h> -#include <rte_lcore.h> -#include <rte_malloc.h> -#include <rte_string_fns.h> -#include <rte_debug.h> -#include <rte_devargs.h> - -#include "eal_filesystem.h" -#include "eal_private.h" - -/** - * @file - * PCI probing under linux - * - * This code is used to simulate a PCI probe by parsing information in - * sysfs. Moreover, when a registered driver matches a device, the - * kernel driver currently using it is unloaded and replaced by - * igb_uio module, which is a very minimal userland driver for Intel - * network card, only providing access to PCI BAR to applications, and - * enabling bus master. - */ - -extern struct rte_pci_bus rte_pci_bus; - -/* Map pci device */ -int -rte_pci_map_device(struct rte_pci_device *dev) -{ - int ret = -1; - - /* try mapping the NIC resources */ - switch (dev->kdrv) { - case RTE_KDRV_NIC_UIO: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - ret = 1; - break; - } - - return ret; -} - -/* Unmap pci device */ -void -rte_pci_unmap_device(struct rte_pci_device *dev) -{ - /* try unmapping the NIC resources */ - switch (dev->kdrv) { - case RTE_KDRV_NIC_UIO: - /* unmap resources for devices that use uio */ - pci_uio_unmap_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - break; - } -} - -void -pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res) -{ - rte_free(uio_res); - - if (dev->intr_handle.fd) { - close(dev->intr_handle.fd); - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - } -} - -int -pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res) -{ - char devname[PATH_MAX]; /* contains the /dev/uioX */ - struct rte_pci_addr *loc; - - loc = &dev->addr; - - snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", - dev->addr.bus, dev->addr.devid, dev->addr.function); - - if (access(devname, O_RDWR) < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return 1; - } - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - - /* allocate the mapping details for secondary processes*/ - *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); - if (*uio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - goto error; - } - - snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); - memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); - - return 0; - -error: - pci_uio_free_resource(dev, *uio_res); - return -1; -} - -int -pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx) -{ - int fd; - char *devname; - void *mapaddr; - uint64_t offset; - uint64_t pagesz; - struct pci_map *maps; - - maps = uio_res->maps; - devname = uio_res->path; - pagesz = sysconf(_SC_PAGESIZE); - - /* allocate memory to keep path */ - maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); - if (maps[map_idx].path == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", - strerror(errno)); - return -1; - } - - /* - * open resource file, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - /* if matching map is found, then use it */ - offset = res_idx * pagesz; - mapaddr = pci_map_resource(NULL, fd, (off_t)offset, - (size_t)dev->mem_resource[res_idx].len, 0); - close(fd); - if (mapaddr == MAP_FAILED) - goto error; - - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; - maps[map_idx].size = dev->mem_resource[res_idx].len; - maps[map_idx].addr = mapaddr; - maps[map_idx].offset = offset; - strcpy(maps[map_idx].path, devname); - dev->mem_resource[res_idx].addr = mapaddr; - - return 0; - -error: - rte_free(maps[map_idx].path); - return -1; -} - -static int -pci_scan_one(int dev_pci_fd, struct pci_conf *conf) -{ - struct rte_pci_device *dev; - struct pci_bar_io bar; - unsigned i, max; - - dev = malloc(sizeof(*dev)); - if (dev == NULL) { - return -1; - } - - memset(dev, 0, sizeof(*dev)); - dev->addr.domain = conf->pc_sel.pc_domain; - dev->addr.bus = conf->pc_sel.pc_bus; - dev->addr.devid = conf->pc_sel.pc_dev; - dev->addr.function = conf->pc_sel.pc_func; - - /* get vendor id */ - dev->id.vendor_id = conf->pc_vendor; - - /* get device id */ - dev->id.device_id = conf->pc_device; - - /* get subsystem_vendor id */ - dev->id.subsystem_vendor_id = conf->pc_subvendor; - - /* get subsystem_device id */ - dev->id.subsystem_device_id = conf->pc_subdevice; - - /* get class id */ - dev->id.class_id = (conf->pc_class << 16) | - (conf->pc_subclass << 8) | - (conf->pc_progif); - - /* TODO: get max_vfs */ - dev->max_vfs = 0; - - /* FreeBSD has no NUMA support (yet) */ - dev->device.numa_node = 0; - - pci_name_set(dev); - - /* FreeBSD has only one pass through driver */ - dev->kdrv = RTE_KDRV_NIC_UIO; - - /* parse resources */ - switch (conf->pc_hdr & PCIM_HDRTYPE) { - case PCIM_HDRTYPE_NORMAL: - max = PCIR_MAX_BAR_0; - break; - case PCIM_HDRTYPE_BRIDGE: - max = PCIR_MAX_BAR_1; - break; - case PCIM_HDRTYPE_CARDBUS: - max = PCIR_MAX_BAR_2; - break; - default: - goto skipdev; - } - - for (i = 0; i <= max; i++) { - bar.pbi_sel = conf->pc_sel; - bar.pbi_reg = PCIR_BAR(i); - if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) - continue; - - dev->mem_resource[i].len = bar.pbi_length; - if (PCI_BAR_IO(bar.pbi_base)) { - dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); - continue; - } - dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); - } - - /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { - rte_pci_add_device(dev); - } - else { - struct rte_pci_device *dev2 = NULL; - int ret; - - TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { - ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); - if (ret > 0) - continue; - else if (ret < 0) { - rte_pci_insert_device(dev2, dev); - } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - pci_name_set(dev2); - memmove(dev2->mem_resource, - dev->mem_resource, - sizeof(dev->mem_resource)); - free(dev); - } - return 0; - } - rte_pci_add_device(dev); - } - - return 0; - -skipdev: - free(dev); - return 0; -} - -/* - * Scan the content of the PCI bus, and add the devices in the devices - * list. Call pci_scan_one() for each pci entry found. - */ -int -rte_pci_scan(void) -{ - int fd; - unsigned dev_count = 0; - struct pci_conf matches[16]; - struct pci_conf_io conf_io = { - .pat_buf_len = 0, - .num_patterns = 0, - .patterns = NULL, - .match_buf_len = sizeof(matches), - .matches = &matches[0], - }; - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - fd = open("/dev/pci", O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - do { - unsigned i; - if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { - RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", - __func__, strerror(errno)); - goto error; - } - - for (i = 0; i < conf_io.num_matches; i++) - if (pci_scan_one(fd, &matches[i]) < 0) - goto error; - - dev_count += conf_io.num_matches; - } while(conf_io.status == PCI_GETCONF_MORE_DEVS); - - close(fd); - - RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count); - return 0; - -error: - if (fd >= 0) - close(fd); - return -1; -} - -int -pci_update_device(const struct rte_pci_addr *addr) -{ - int fd; - struct pci_conf matches[2]; - struct pci_match_conf match = { - .pc_sel = { - .pc_domain = addr->domain, - .pc_bus = addr->bus, - .pc_dev = addr->devid, - .pc_func = addr->function, - }, - }; - struct pci_conf_io conf_io = { - .pat_buf_len = 0, - .num_patterns = 1, - .patterns = &match, - .match_buf_len = sizeof(matches), - .matches = &matches[0], - }; - - fd = open("/dev/pci", O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { - RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", - __func__, strerror(errno)); - goto error; - } - - if (conf_io.num_matches != 1) - goto error; - - if (pci_scan_one(fd, &matches[0]) < 0) - goto error; - - close(fd); - - return 0; - -error: - if (fd >= 0) - close(fd); - return -1; -} - -/* Read PCI config space. */ -int rte_pci_read_config(const struct rte_pci_device *dev, - void *buf, size_t len, off_t offset) -{ - int fd = -1; - int size; - struct pci_io pi = { - .pi_sel = { - .pc_domain = dev->addr.domain, - .pc_bus = dev->addr.bus, - .pc_dev = dev->addr.devid, - .pc_func = dev->addr.function, - }, - .pi_reg = offset, - }; - - fd = open("/dev/pci", O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - while (len > 0) { - size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1); - pi.pi_width = size; - - if (ioctl(fd, PCIOCREAD, &pi) < 0) - goto error; - memcpy(buf, &pi.pi_data, size); - - buf = (char *)buf + size; - pi.pi_reg += size; - len -= size; - } - close(fd); - - return 0; - - error: - if (fd >= 0) - close(fd); - return -1; -} - -/* Write PCI config space. */ -int rte_pci_write_config(const struct rte_pci_device *dev, - const void *buf, size_t len, off_t offset) -{ - int fd = -1; - - struct pci_io pi = { - .pi_sel = { - .pc_domain = dev->addr.domain, - .pc_bus = dev->addr.bus, - .pc_dev = dev->addr.devid, - .pc_func = dev->addr.function, - }, - .pi_reg = offset, - .pi_data = *(const uint32_t *)buf, - .pi_width = len, - }; - - if (len == 3 || len > sizeof(pi.pi_data)) { - RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); - goto error; - } - - memcpy(&pi.pi_data, buf, len); - - fd = open("/dev/pci", O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - if (ioctl(fd, PCIOCWRITE, &pi) < 0) - goto error; - - close(fd); - return 0; - - error: - if (fd >= 0) - close(fd); - return -1; -} - -int -rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - int ret; - - switch (dev->kdrv) { -#if defined(RTE_ARCH_X86) - case RTE_KDRV_NIC_UIO: - if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) { - p->base = (uintptr_t)dev->mem_resource[bar].addr; - ret = 0; - } else - ret = -1; - break; -#endif - default: - ret = -1; - break; - } - - if (!ret) - p->dev = dev; - - return ret; -} - -static void -pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ -#if defined(RTE_ARCH_X86) - uint8_t *d; - int size; - unsigned short reg = p->base + offset; - - for (d = data; len > 0; d += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; - *(uint32_t *)d = inl(reg); - } else if (len >= 2) { - size = 2; - *(uint16_t *)d = inw(reg); - } else { - size = 1; - *d = inb(reg); - } - } -#else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); -#endif -} - -void -rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { - case RTE_KDRV_NIC_UIO: - pci_uio_ioport_read(p, data, len, offset); - break; - default: - break; - } -} - -static void -pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ -#if defined(RTE_ARCH_X86) - const uint8_t *s; - int size; - unsigned short reg = p->base + offset; - - for (s = data; len > 0; s += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; - outl(reg, *(const uint32_t *)s); - } else if (len >= 2) { - size = 2; - outw(reg, *(const uint16_t *)s); - } else { - size = 1; - outb(reg, *s); - } - } -#else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); -#endif -} - -void -rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { - case RTE_KDRV_NIC_UIO: - pci_uio_ioport_write(p, data, len, offset); - break; - default: - break; - } -} - -int -rte_pci_ioport_unmap(struct rte_pci_ioport *p) -{ - int ret; - - switch (p->dev->kdrv) { -#if defined(RTE_ARCH_X86) - case RTE_KDRV_NIC_UIO: - ret = 0; - break; -#endif - default: - ret = -1; - break; - } - - return ret; -} diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c index 783d68c5..2a2136a2 100644 --- a/lib/librte_eal/bsdapp/eal/eal_thread.c +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -46,7 +46,6 @@ #include <rte_launch.h> #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> #include <rte_lcore.h> diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c index f12d9bd2..14421943 100644 --- a/lib/librte_eal/bsdapp/eal/eal_timer.c +++ b/lib/librte_eal/bsdapp/eal/eal_timer.c @@ -42,7 +42,6 @@ #include <rte_log.h> #include <rte_cycles.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_debug.h> diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h deleted file mode 100644 index 99a33432..00000000 --- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h +++ /dev/null @@ -1,107 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_DOM0_COMMON_H_ -#define _RTE_DOM0_COMMON_H_ - -#ifdef __KERNEL__ -#include <linux/if.h> -#endif - -#define DOM0_NAME_MAX 256 -#define DOM0_MM_DEV "/dev/dom0_mm" - -#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */ -#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ -#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */ -#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ -#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ - -#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) -#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) -#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) -#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) - -/** - * A structure used to store memory information. - */ -struct memory_info { - char name[DOM0_NAME_MAX]; - uint64_t size; -}; - -/** - * A structure used to store memory segment information. - */ -struct memseg_info { - uint32_t idx; - uint64_t pfn; - uint64_t size; - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -}; - -/** - * A structure used to store memory block information. - */ -struct memblock_info { - uint8_t exchange_flag; - uint64_t vir_addr; - uint64_t pfn; - uint64_t mfn; -}; -#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h deleted file mode 100644 index c1995ee1..00000000 --- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h +++ /dev/null @@ -1,137 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_INTERRUPTS_H_ -#error "don't include this file directly, please include generic <rte_interrupts.h>" -#endif - -#ifndef _RTE_BSDAPP_INTERRUPTS_H_ -#define _RTE_BSDAPP_INTERRUPTS_H_ - -#define RTE_INTR_VEC_ZERO_OFFSET 0 -#define RTE_INTR_VEC_RXTX_OFFSET 1 - -#define RTE_MAX_RXTX_INTR_VEC_ID 32 - -enum rte_intr_handle_type { - RTE_INTR_HANDLE_UNKNOWN = 0, - RTE_INTR_HANDLE_UIO, /**< uio device handle */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_MAX -}; - -/** Handle for interrupts. */ -struct rte_intr_handle { - int fd; /**< file descriptor */ - int uio_cfg_fd; /**< UIO config file descriptor */ - enum rte_intr_handle_type type; /**< handle type */ - int max_intr; /**< max interrupt requested */ - uint32_t nb_efd; /**< number of available efds */ - int *intr_vec; /**< intr vector number array */ -}; - -/** - * @param intr_handle - * Pointer to the interrupt handle. - * @param epfd - * Epoll instance fd which the intr vector associated to. - * @param op - * The operation be performed for the vector. - * Operation type of {ADD, DEL}. - * @param vec - * RX intr vector number added to the epoll instance wait list. - * @param data - * User raw data. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, - int epfd, int op, unsigned int vec, void *data); - -/** - * It enables the fastpath event fds if it's necessary. - * It creates event fds when multi-vectors allowed, - * otherwise it multiplexes the single event fds. - * - * @param intr_handle - * Pointer to the interrupt handle. - * @param nb_efd - * Number of interrupt vector trying to enable. - * The value 0 is not allowed. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); - -/** - * It disable the fastpath event fds. - * It deletes registered eventfds and closes the open fds. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -void -rte_intr_efd_disable(struct rte_intr_handle *intr_handle); - -/** - * The fastpath interrupt is enabled or not. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); - -/** - * The interrupt handle instance allows other cause or not. - * Other cause stands for none fastpath interrupt. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int rte_intr_allow_others(struct rte_intr_handle *intr_handle); - -/** - * The multiple interrupt vector capability of interrupt handle instance. - * It returns zero if no multiple interrupt vector support. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); - -#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map deleted file mode 100644 index aac6fd77..00000000 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ /dev/null @@ -1,239 +0,0 @@ -DPDK_2.0 { - global: - - __rte_panic; - devargs_list; - eal_parse_sysfs_value; - eal_timer_source; - lcore_config; - per_lcore__lcore_id; - per_lcore__rte_errno; - rte_calloc; - rte_calloc_socket; - rte_cpu_check_supported; - rte_cpu_get_flag_enabled; - rte_cycles_vmware_tsc_map; - rte_delay_us; - rte_dump_physmem_layout; - rte_dump_registers; - rte_dump_stack; - rte_dump_tailq; - rte_eal_alarm_cancel; - rte_eal_alarm_set; - rte_eal_devargs_add; - rte_eal_devargs_dump; - rte_eal_devargs_type_count; - rte_eal_get_configuration; - rte_eal_get_lcore_state; - rte_eal_get_physmem_layout; - rte_eal_get_physmem_size; - rte_eal_has_hugepages; - rte_eal_hpet_init; - rte_eal_init; - rte_eal_iopl_init; - rte_eal_lcore_role; - rte_eal_mp_remote_launch; - rte_eal_mp_wait_lcore; - rte_eal_parse_devargs_str; - rte_eal_process_type; - rte_eal_remote_launch; - rte_eal_tailq_lookup; - rte_eal_tailq_register; - rte_eal_wait_lcore; - rte_exit; - rte_free; - rte_get_hpet_cycles; - rte_get_hpet_hz; - rte_get_log_level; - rte_get_log_type; - rte_get_tsc_hz; - rte_hexdump; - rte_intr_callback_register; - rte_intr_callback_unregister; - rte_intr_disable; - rte_intr_enable; - rte_log; - rte_log_cur_msg_loglevel; - rte_log_cur_msg_logtype; - rte_logs; - rte_malloc; - rte_malloc_dump_stats; - rte_malloc_get_socket_stats; - rte_malloc_set_limit; - rte_malloc_socket; - rte_malloc_validate; - rte_malloc_virt2phy; - rte_mem_lock_page; - rte_mem_phy2mch; - rte_mem_virt2phy; - rte_memdump; - rte_memory_get_nchannel; - rte_memory_get_nrank; - rte_memzone_dump; - rte_memzone_lookup; - rte_memzone_reserve; - rte_memzone_reserve_aligned; - rte_memzone_reserve_bounded; - rte_memzone_walk; - rte_openlog_stream; - rte_realloc; - rte_set_application_usage_hook; - rte_set_log_level; - rte_set_log_type; - rte_socket_id; - rte_strerror; - rte_strsplit; - rte_sys_gettid; - rte_thread_get_affinity; - rte_thread_set_affinity; - rte_vlog; - rte_xen_dom0_memory_attach; - rte_xen_dom0_memory_init; - rte_zmalloc; - rte_zmalloc_socket; - - local: *; -}; - -DPDK_2.1 { - global: - - rte_intr_allow_others; - rte_intr_dp_is_en; - rte_intr_efd_disable; - rte_intr_efd_enable; - rte_intr_rx_ctl; - rte_memzone_free; - -} DPDK_2.0; - -DPDK_2.2 { - global: - - rte_intr_cap_multiple; - rte_keepalive_create; - rte_keepalive_dispatch_pings; - rte_keepalive_mark_alive; - rte_keepalive_register_core; - rte_xen_dom0_supported; - -} DPDK_2.1; - -DPDK_16.04 { - global: - - rte_cpu_get_flag_name; - rte_eal_primary_proc_alive; - -} DPDK_2.2; - -DPDK_16.07 { - global: - - pci_get_sysfs_path; - rte_keepalive_mark_sleep; - rte_keepalive_register_relay_callback; - rte_rtm_supported; - rte_thread_setname; - -} DPDK_16.04; - -DPDK_16.11 { - global: - - rte_delay_us_block; - rte_delay_us_callback_register; - rte_eal_dev_attach; - rte_eal_dev_detach; - -} DPDK_16.07; - -DPDK_17.02 { - global: - - rte_bus_dump; - rte_bus_probe; - rte_bus_register; - rte_bus_scan; - rte_bus_unregister; - -} DPDK_16.11; - -DPDK_17.05 { - global: - - rte_cpu_is_supported; - rte_log_dump; - rte_log_register; - rte_log_get_global_level; - rte_log_set_global_level; - rte_log_set_level; - rte_log_set_level_regexp; - rte_pci_detach; - rte_pci_dump; - rte_pci_ioport_map; - rte_pci_ioport_read; - rte_pci_ioport_unmap; - rte_pci_ioport_write; - rte_pci_map_device; - rte_pci_probe; - rte_pci_probe_one; - rte_pci_read_config; - rte_pci_register; - rte_pci_scan; - rte_pci_unmap_device; - rte_pci_unregister; - rte_pci_write_config; - rte_vdev_init; - rte_vdev_register; - rte_vdev_uninit; - rte_vdev_unregister; - vfio_get_container_fd; - vfio_get_group_fd; - vfio_get_group_no; - -} DPDK_17.02; - -DPDK_17.08 { - global: - - rte_bus_find; - rte_bus_find_by_device; - rte_bus_find_by_name; - rte_log_get_level; - -} DPDK_17.05; - -EXPERIMENTAL { - global: - - rte_eal_devargs_insert; - rte_eal_devargs_parse; - rte_eal_devargs_remove; - rte_eal_hotplug_add; - rte_eal_hotplug_remove; - rte_service_disable_on_lcore; - rte_service_dump; - rte_service_enable_on_lcore; - rte_service_get_by_id; - rte_service_get_by_name; - rte_service_get_count; - rte_service_get_enabled_on_lcore; - rte_service_is_running; - rte_service_lcore_add; - rte_service_lcore_count; - rte_service_lcore_del; - rte_service_lcore_list; - rte_service_lcore_reset_all; - rte_service_lcore_start; - rte_service_lcore_stop; - rte_service_probe_capability; - rte_service_register; - rte_service_reset; - rte_service_set_stats_enable; - rte_service_start; - rte_service_start_with_defaults; - rte_service_stop; - rte_service_unregister; - -} DPDK_17.08; diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index e8fd67a2..9effd0d4 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -32,16 +32,18 @@ include $(RTE_SDK)/mk/rte.vars.mk INC := rte_branch_prediction.h rte_common.h -INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h +INC += rte_debug.h rte_eal.h rte_eal_interrupts.h +INC += rte_errno.h rte_launch.h rte_lcore.h +INC += rte_log.h rte_memory.h rte_memzone.h INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h +INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h +INC += rte_bitmap.h rte_vfio.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h @@ -49,7 +51,7 @@ GENERIC_INC += rte_vect.h rte_pause.h rte_io.h # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk ARCH_DIR ?= $(RTE_ARCH) -ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)) +ARCH_INC := $(sort $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))) SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC)) SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \ diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c index 5636e9c1..88f1cbe3 100644 --- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c @@ -137,7 +137,7 @@ rte_cpu_get_features(hwcap_registers_t out) _Elfx_auxv_t auxv; auxv_fd = open("/proc/self/auxv", O_RDONLY); - assert(auxv_fd); + assert(auxv_fd != -1); while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { if (auxv.a_type == AT_HWCAP) { out[REG_HWCAP] = auxv.a_un.a_val; diff --git a/lib/librte_eal/common/arch/arm/rte_cycles.c b/lib/librte_eal/common/arch/arm/rte_cycles.c new file mode 100644 index 00000000..3e31e5be --- /dev/null +++ b/lib/librte_eal/common/arch/arm/rte_cycles.c @@ -0,0 +1,45 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "eal_private.h" + +uint64_t +get_tsc_freq_arch(void) +{ +#if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU + uint64_t freq; + asm volatile("mrs %0, cntfrq_el0" : "=r" (freq)); + return freq; +#else + return 0; +#endif +} diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c index fcf96e04..970a61c5 100644 --- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c @@ -108,7 +108,7 @@ rte_cpu_get_features(hwcap_registers_t out) Elf64_auxv_t auxv; auxv_fd = open("/proc/self/auxv", O_RDONLY); - assert(auxv_fd); + assert(auxv_fd != -1); while (read(auxv_fd, &auxv, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { if (auxv.a_type == AT_HWCAP) diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cycles.c b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c new file mode 100644 index 00000000..69a9f747 --- /dev/null +++ b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c @@ -0,0 +1,52 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_lcore.h> +#include <rte_log.h> +#include "eal_filesystem.h" +#include "eal_private.h" + +static const char sys_cpu_dir[] = "/sys/devices/system/cpu"; + +uint64_t +get_tsc_freq_arch(void) +{ + unsigned long cpu_hz; + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/cpu%d/cpufreq/cpuinfo_cur_freq", + sys_cpu_dir, rte_get_master_lcore()); + if (eal_parse_sysfs_value(path, &cpu_hz) < 0) + RTE_LOG(WARNING, EAL, "Unable to parse %s\n", path); + + return cpu_hz*1000; +} diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c index 01382571..7d4a0fef 100644 --- a/lib/librte_eal/common/arch/x86/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c @@ -36,6 +36,7 @@ #include <stdio.h> #include <errno.h> #include <stdint.h> +#include <cpuid.h> enum cpu_register_t { RTE_REG_EAX = 0, @@ -156,38 +157,12 @@ const struct feature_entry rte_cpu_feature_table[] = { FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8) }; -/* - * Execute CPUID instruction and get contents of a specific register - * - * This function, when compiled with GCC, will generate architecture-neutral - * code, as per GCC manual. - */ -static void -rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) -{ -#if defined(__i386__) && defined(__PIC__) - /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ - asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" - : "=r" (out[RTE_REG_EBX]), - "=a" (out[RTE_REG_EAX]), - "=c" (out[RTE_REG_ECX]), - "=d" (out[RTE_REG_EDX]) - : "a" (leaf), "c" (subleaf)); -#else - asm volatile("cpuid" - : "=a" (out[RTE_REG_EAX]), - "=b" (out[RTE_REG_EBX]), - "=c" (out[RTE_REG_ECX]), - "=d" (out[RTE_REG_EDX]) - : "a" (leaf), "c" (subleaf)); -#endif -} - int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) { const struct feature_entry *feat; cpuid_registers_t regs; + unsigned int maxleaf; if (feature >= RTE_CPUFLAG_NUMFLAGS) /* Flag does not match anything in the feature tables */ @@ -199,13 +174,14 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) /* This entry in the table wasn't filled out! */ return -EFAULT; - rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs); - if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) || - regs[RTE_REG_EAX] < feat->leaf) + maxleaf = __get_cpuid_max(feat->leaf & 0x80000000, NULL); + + if (maxleaf < feat->leaf) return 0; - /* get the cpuid leaf containing the desired feature */ - rte_cpu_get_features(feat->leaf, feat->subleaf, regs); + __cpuid_count(feat->leaf, feat->subleaf, + regs[RTE_REG_EAX], regs[RTE_REG_EBX], + regs[RTE_REG_ECX], regs[RTE_REG_EDX]); /* check if the feature is enabled */ return (regs[feat->reg] >> feat->bit) & 1; diff --git a/lib/librte_eal/common/arch/x86/rte_cycles.c b/lib/librte_eal/common/arch/x86/rte_cycles.c new file mode 100644 index 00000000..417850ee --- /dev/null +++ b/lib/librte_eal/common/arch/x86/rte_cycles.c @@ -0,0 +1,152 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <unistd.h> +#include <cpuid.h> + +#include <rte_common.h> + +#include "eal_private.h" + +static unsigned int +rte_cpu_get_model(uint32_t fam_mod_step) +{ + uint32_t family, model, ext_model; + + family = (fam_mod_step >> 8) & 0xf; + model = (fam_mod_step >> 4) & 0xf; + + if (family == 6 || family == 15) { + ext_model = (fam_mod_step >> 16) & 0xf; + model += (ext_model << 4); + } + + return model; +} + +static int32_t +rdmsr(int msr, uint64_t *val) +{ +#ifdef RTE_EXEC_ENV_LINUXAPP + int fd; + int ret; + + fd = open("/dev/cpu/0/msr", O_RDONLY); + if (fd < 0) + return fd; + + ret = pread(fd, val, sizeof(uint64_t), msr); + + close(fd); + + return ret; +#else + RTE_SET_USED(msr); + RTE_SET_USED(val); + + return -1; +#endif +} + +static uint32_t +check_model_wsm_nhm(uint8_t model) +{ + switch (model) { + /* Westmere */ + case 0x25: + case 0x2C: + case 0x2F: + /* Nehalem */ + case 0x1E: + case 0x1F: + case 0x1A: + case 0x2E: + return 1; + } + + return 0; +} + +static uint32_t +check_model_gdm_dnv(uint8_t model) +{ + switch (model) { + /* Goldmont */ + case 0x5C: + /* Denverton */ + case 0x5F: + return 1; + } + + return 0; +} + +uint64_t +get_tsc_freq_arch(void) +{ + uint64_t tsc_hz = 0; + uint32_t a, b, c, d, maxleaf; + uint8_t mult, model; + int32_t ret; + + /* + * Time Stamp Counter and Nominal Core Crystal Clock + * Information Leaf + */ + maxleaf = __get_cpuid_max(0, NULL); + + if (maxleaf >= 0x15) { + __cpuid(0x15, a, b, c, d); + + /* EBX : TSC/Crystal ratio, ECX : Crystal Hz */ + if (b && c) + return c * (b / a); + } + + __cpuid(0x1, a, b, c, d); + model = rte_cpu_get_model(a); + + if (check_model_wsm_nhm(model)) + mult = 133; + else if ((c & bit_AVX) || check_model_gdm_dnv(model)) + mult = 100; + else + return 0; + + ret = rdmsr(0xCE, &tsc_hz); + if (ret < 0) + return 0; + + return ((tsc_hz >> 8) & 0xff) * mult * 1E6; +} diff --git a/lib/librte_eal/common/arch/x86/rte_memcpy.c b/lib/librte_eal/common/arch/x86/rte_memcpy.c new file mode 100644 index 00000000..174bef15 --- /dev/null +++ b/lib/librte_eal/common/arch/x86/rte_memcpy.c @@ -0,0 +1,58 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_memcpy.h> +#include <rte_cpuflags.h> +#include <rte_log.h> + +void *(*rte_memcpy_ptr)(void *dst, const void *src, size_t n) = NULL; + +RTE_INIT(rte_memcpy_init) +{ +#ifdef CC_SUPPORT_AVX512F + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) { + rte_memcpy_ptr = rte_memcpy_avx512f; + RTE_LOG(DEBUG, EAL, "AVX512 memcpy is using!\n"); + return; + } +#endif +#ifdef CC_SUPPORT_AVX2 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) { + rte_memcpy_ptr = rte_memcpy_avx2; + RTE_LOG(DEBUG, EAL, "AVX2 memcpy is using!\n"); + return; + } +#endif + rte_memcpy_ptr = rte_memcpy_sse; + RTE_LOG(DEBUG, EAL, "Default SSE/AVX memcpy is using!\n"); +} diff --git a/lib/librte_eal/common/arch/x86/rte_spinlock.c b/lib/librte_eal/common/arch/x86/rte_spinlock.c index c383e9f0..1244a90b 100644 --- a/lib/librte_eal/common/arch/x86/rte_spinlock.c +++ b/lib/librte_eal/common/arch/x86/rte_spinlock.c @@ -38,8 +38,7 @@ uint8_t rte_rtm_supported; /* cache the flag to avoid the overhead of the rte_cpu_get_flag_enabled function */ -static void __attribute__((constructor)) -rte_rtm_init(void) +RTE_INIT(rte_rtm_init) { rte_rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM); } diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c index 08bec2d9..3e022d51 100644 --- a/lib/librte_eal/common/eal_common_bus.c +++ b/lib/librte_eal/common/eal_common_bus.c @@ -35,6 +35,7 @@ #include <sys/queue.h> #include <rte_bus.h> +#include <rte_debug.h> #include "eal_private.h" @@ -73,11 +74,9 @@ rte_bus_scan(void) TAILQ_FOREACH(bus, &rte_bus_list, next) { ret = bus->scan(); - if (ret) { + if (ret) RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n", bus->name); - return ret; - } } return 0; @@ -97,20 +96,16 @@ rte_bus_probe(void) } ret = bus->probe(); - if (ret) { + if (ret) RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", bus->name); - return ret; - } } if (vbus) { ret = vbus->probe(); - if (ret) { + if (ret) RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", vbus->name); - return ret; - } } return 0; @@ -152,15 +147,16 @@ struct rte_bus * rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp, const void *data) { - struct rte_bus *bus = NULL; + struct rte_bus *bus; - TAILQ_FOREACH(bus, &rte_bus_list, next) { - if (start && bus == start) { - start = NULL; /* starting point found */ - continue; - } + if (start != NULL) + bus = TAILQ_NEXT(start, next); + else + bus = TAILQ_FIRST(&rte_bus_list); + while (bus != NULL) { if (cmp(bus, data) == 0) break; + bus = TAILQ_NEXT(bus, next); } return bus; } @@ -222,3 +218,26 @@ rte_bus_find_by_device_name(const char *str) c[0] = '\0'; return rte_bus_find(NULL, bus_can_parse, name); } + + +/* + * Get iommu class of devices on the bus. + */ +enum rte_iova_mode +rte_bus_get_iommu_class(void) +{ + int mode = RTE_IOVA_DC; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + + if (bus->get_iommu_class) + mode |= bus->get_iommu_class(); + } + + if (mode != RTE_IOVA_VA) { + /* Use default IOVA mode */ + mode = RTE_IOVA_PA; + } + return mode; +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index e2512755..dda8f583 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -67,7 +67,6 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name) int rte_eal_dev_attach(const char *name, const char *devargs) { struct rte_bus *bus; - int ret; if (name == NULL || devargs == NULL) { RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n"); @@ -80,22 +79,13 @@ int rte_eal_dev_attach(const char *name, const char *devargs) name); return -EINVAL; } - if (strcmp(bus->name, "pci") == 0) - return rte_eal_hotplug_add("pci", name, devargs); - if (strcmp(bus->name, "vdev") != 0) { - RTE_LOG(ERR, EAL, "Device attach is only supported for PCI and vdev devices.\n"); - return -ENOTSUP; - } + if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0) + return rte_eal_hotplug_add(bus->name, name, devargs); - /* - * If we haven't found a bus device the user meant to "hotplug" a - * virtual device instead. - */ - ret = rte_vdev_init(name, devargs); - if (ret) - RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", - name); - return ret; + RTE_LOG(ERR, EAL, + "Device attach is only supported for PCI and vdev devices.\n"); + + return -ENOTSUP; } int rte_eal_dev_detach(struct rte_device *dev) diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c index de48d8e4..dc5b7c04 100644 --- a/lib/librte_eal/common/eal_common_errno.c +++ b/lib/librte_eal/common/eal_common_errno.c @@ -46,18 +46,20 @@ RTE_DEFINE_PER_LCORE(int, _rte_errno); const char * rte_strerror(int errnum) { + /* BSD puts a colon in the "unknown error" messages, Linux doesn't */ +#ifdef RTE_EXEC_ENV_BSDAPP + static const char *sep = ":"; +#else + static const char *sep = ""; +#endif #define RETVAL_SZ 256 static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval); + char *ret = RTE_PER_LCORE(retval); /* since some implementations of strerror_r throw an error * themselves if errnum is too big, we handle that case here */ - if (errnum > RTE_MAX_ERRNO) - snprintf(RTE_PER_LCORE(retval), RETVAL_SZ, -#ifdef RTE_EXEC_ENV_BSDAPP - "Unknown error: %d", errnum); -#else - "Unknown error %d", errnum); -#endif + if (errnum >= RTE_MAX_ERRNO) + snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum); else switch (errnum){ case E_RTE_SECONDARY: @@ -65,8 +67,10 @@ rte_strerror(int errnum) case E_RTE_NO_CONFIG: return "Missing rte_config structure"; default: - strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ); + if (strerror_r(errnum, ret, RETVAL_SZ) != 0) + snprintf(ret, RETVAL_SZ, "Unknown error%s %d", + sep, errnum); } - return RTE_PER_LCORE(retval); + return ret; } diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c index 137c191d..2d5cae9f 100644 --- a/lib/librte_eal/common/eal_common_launch.c +++ b/lib/librte_eal/common/eal_common_launch.c @@ -38,7 +38,6 @@ #include <rte_launch.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_atomic.h> #include <rte_pause.h> diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index 0e3b9320..be404136 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -89,14 +89,6 @@ rte_log_set_global_level(uint32_t level) rte_logs.level = (uint32_t)level; } -/* Set global log level */ -/* replaced by rte_log_set_global_level */ -__rte_deprecated void -rte_set_log_level(uint32_t level) -{ - rte_log_set_global_level(level); -} - /* Get global log level */ uint32_t rte_log_get_global_level(void) @@ -104,14 +96,6 @@ rte_log_get_global_level(void) return rte_logs.level; } -/* Get global log level */ -/* replaced by rte_log_get_global_level */ -uint32_t -rte_get_log_level(void) -{ - return rte_log_get_global_level(); -} - int rte_log_get_level(uint32_t type) { @@ -121,30 +105,6 @@ rte_log_get_level(uint32_t type) return rte_logs.dynamic_types[type].loglevel; } -/* Set global log type */ -__rte_deprecated void -rte_set_log_type(uint32_t type, int enable) -{ - if (type < RTE_LOGTYPE_FIRST_EXT_ID) { - if (enable) - rte_logs.type |= 1 << type; - else - rte_logs.type &= ~(1 << type); - } - - if (enable) - rte_log_set_level(type, 0); - else - rte_log_set_level(type, RTE_LOG_DEBUG); -} - -/* Get global log type */ -__rte_deprecated uint32_t -rte_get_log_type(void) -{ - return rte_logs.type; -} - int rte_log_set_level(uint32_t type, uint32_t level) { @@ -289,7 +249,8 @@ static const struct logtype logtype_strings[] = { {RTE_LOGTYPE_USER8, "user8"} }; -RTE_INIT(rte_log_init); +/* Logging should be first initialzer (before drivers and bus) */ +RTE_INIT_PRIO(rte_log_init, 101); static void rte_log_init(void) { diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 996877ef..fc6c44da 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -41,7 +41,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> #include <rte_log.h> @@ -96,11 +95,11 @@ rte_dump_physmem_layout(FILE *f) if (mcfg->memseg[i].addr == NULL) break; - fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, " + fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, " "virt:%p, socket_id:%"PRId32", " "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " "nrank:%"PRIx32"\n", i, - mcfg->memseg[i].phys_addr, + mcfg->memseg[i].iova, mcfg->memseg[i].len, mcfg->memseg[i].addr, mcfg->memseg[i].socket_id, diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 3026e36b..ea072a25 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -251,7 +251,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); - mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; mz->len = (requested_len == 0 ? elem->size : requested_len); mz->hugepage_sz = elem->ms->hugepage_sz; @@ -391,10 +391,10 @@ rte_memzone_dump(FILE *f) for (i=0; i<RTE_MAX_MEMZONE; i++) { if (mcfg->memzone[i].addr == NULL) break; - fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx" + fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx" ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, mcfg->memzone[i].name, - mcfg->memzone[i].phys_addr, + mcfg->memzone[i].iova, mcfg->memzone[i].len, mcfg->memzone[i].addr, mcfg->memzone[i].socket_id, diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 1da185e5..996a0342 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -85,6 +85,7 @@ eal_long_options[] = { {OPT_LCORES, 1, NULL, OPT_LCORES_NUM }, {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM }, {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM }, + {OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM}, {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM }, {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, @@ -97,7 +98,6 @@ eal_long_options[] = { {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, - {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, {0, 0, NULL, 0 } }; @@ -208,8 +208,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->syslog_facility = LOG_DAEMON; - internal_cfg->xen_dom0_support = 0; - /* if set to NONE, interrupt mode is determined automatically */ internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE; @@ -220,6 +218,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg) #endif internal_cfg->vmware_tsc_map = 0; internal_cfg->create_uio_dev = 0; + internal_cfg->mbuf_pool_ops_name = RTE_MBUF_DEFAULT_MEMPOOL_OPS; } static int @@ -279,12 +278,13 @@ int eal_plugins_init(void) { struct shared_driver *solib = NULL; + struct stat sb; - if (*default_solib_dir != '\0') + if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 && + S_ISDIR(sb.st_mode)) eal_plugin_add(default_solib_dir); TAILQ_FOREACH(solib, &solib_list, next) { - struct stat sb; if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) { if (eal_plugindir_init(solib->name) == -1) { @@ -1279,6 +1279,7 @@ eal_common_usage(void) " '@' can be omitted if cpus and lcores have the same value\n" " -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n" " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n" + " --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n" " -n CHANNELS Number of memory channels\n" " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" " -r RANKS Force number of memory ranks (don't detect)\n" diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c deleted file mode 100644 index 52fd38cd..00000000 --- a/lib/librte_eal/common/eal_common_pci.c +++ /dev/null @@ -1,580 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * Copyright 2013-2014 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <inttypes.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <sys/queue.h> -#include <sys/mman.h> - -#include <rte_errno.h> -#include <rte_interrupts.h> -#include <rte_log.h> -#include <rte_bus.h> -#include <rte_pci.h> -#include <rte_per_lcore.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_eal.h> -#include <rte_string_fns.h> -#include <rte_common.h> -#include <rte_devargs.h> - -#include "eal_private.h" - -extern struct rte_pci_bus rte_pci_bus; - -#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" - -const char *pci_get_sysfs_path(void) -{ - const char *path = NULL; - - path = getenv("SYSFS_PCI_DEVICES"); - if (path == NULL) - return SYSFS_PCI_DEVICES; - - return path; -} - -static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) -{ - struct rte_devargs *devargs; - struct rte_pci_addr addr; - struct rte_bus *pbus; - - pbus = rte_bus_find_by_name("pci"); - TAILQ_FOREACH(devargs, &devargs_list, next) { - if (devargs->bus != pbus) - continue; - devargs->bus->parse(devargs->name, &addr); - if (!rte_eal_compare_pci_addr(&dev->addr, &addr)) - return devargs; - } - return NULL; -} - -void -pci_name_set(struct rte_pci_device *dev) -{ - struct rte_devargs *devargs; - - /* Each device has its internal, canonical name set. */ - rte_pci_device_name(&dev->addr, - dev->name, sizeof(dev->name)); - devargs = pci_devargs_lookup(dev); - dev->device.devargs = devargs; - /* In blacklist mode, if the device is not blacklisted, no - * rte_devargs exists for it. - */ - if (devargs != NULL) - /* If an rte_devargs exists, the generic rte_device uses the - * given name as its namea - */ - dev->device.name = dev->device.devargs->name; - else - /* Otherwise, it uses the internal, canonical form. */ - dev->device.name = dev->name; -} - -/* map a particular resource from a file */ -void * -pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, - int additional_flags) -{ - void *mapaddr; - - /* Map the PCI memory resource of device */ - mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, - MAP_SHARED | additional_flags, fd, offset); - if (mapaddr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", - __func__, fd, requested_addr, - (unsigned long)size, (unsigned long)offset, - strerror(errno), mapaddr); - } else - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); - - return mapaddr; -} - -/* unmap a particular resource */ -void -pci_unmap_resource(void *requested_addr, size_t size) -{ - if (requested_addr == NULL) - return; - - /* Unmap the PCI memory resource of device */ - if (munmap(requested_addr, size)) { - RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", - __func__, requested_addr, (unsigned long)size, - strerror(errno)); - } else - RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", - requested_addr); -} - -/* - * Match the PCI Driver and Device using the ID Table - * - * @param pci_drv - * PCI driver from which ID table would be extracted - * @param pci_dev - * PCI device to match against the driver - * @return - * 1 for successful match - * 0 for unsuccessful match - */ -static int -rte_pci_match(const struct rte_pci_driver *pci_drv, - const struct rte_pci_device *pci_dev) -{ - const struct rte_pci_id *id_table; - - for (id_table = pci_drv->id_table; id_table->vendor_id != 0; - id_table++) { - /* check if device's identifiers match the driver's ones */ - if (id_table->vendor_id != pci_dev->id.vendor_id && - id_table->vendor_id != PCI_ANY_ID) - continue; - if (id_table->device_id != pci_dev->id.device_id && - id_table->device_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_vendor_id != - pci_dev->id.subsystem_vendor_id && - id_table->subsystem_vendor_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_device_id != - pci_dev->id.subsystem_device_id && - id_table->subsystem_device_id != PCI_ANY_ID) - continue; - if (id_table->class_id != pci_dev->id.class_id && - id_table->class_id != RTE_CLASS_ANY_ID) - continue; - - return 1; - } - - return 0; -} - -/* - * If vendor/device ID match, call the probe() function of the - * driver. - */ -static int -rte_pci_probe_one_driver(struct rte_pci_driver *dr, - struct rte_pci_device *dev) -{ - int ret; - struct rte_pci_addr *loc; - - if ((dr == NULL) || (dev == NULL)) - return -EINVAL; - - loc = &dev->addr; - - /* The device is not blacklisted; Check if driver supports it */ - if (!rte_pci_match(dr, dev)) - /* Match of device and driver failed */ - return 1; - - RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, loc->function, - dev->device.numa_node); - - /* no initialization when blacklisted, return without error */ - if (dev->device.devargs != NULL && - dev->device.devargs->policy == - RTE_DEV_BLACKLISTED) { - RTE_LOG(INFO, EAL, " Device is blacklisted, not" - " initializing\n"); - return 1; - } - - if (dev->device.numa_node < 0) { - RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); - dev->device.numa_node = 0; - } - - RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); - - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { - /* map resources for devices that use igb_uio */ - ret = rte_pci_map_device(dev); - if (ret != 0) - return ret; - } - - /* reference driver structure */ - dev->driver = dr; - dev->device.driver = &dr->driver; - - /* call the driver probe() function */ - ret = dr->probe(dr, dev); - if (ret) { - dev->driver = NULL; - dev->device.driver = NULL; - if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && - /* Don't unmap if device is unsupported and - * driver needs mapped resources. - */ - !(ret > 0 && - (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) - rte_pci_unmap_device(dev); - } - - return ret; -} - -/* - * If vendor/device ID match, call the remove() function of the - * driver. - */ -static int -rte_pci_detach_dev(struct rte_pci_device *dev) -{ - struct rte_pci_addr *loc; - struct rte_pci_driver *dr; - - if (dev == NULL) - return -EINVAL; - - dr = dev->driver; - loc = &dev->addr; - - RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, - loc->function, dev->device.numa_node); - - RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); - - if (dr->remove && (dr->remove(dev) < 0)) - return -1; /* negative value is an error */ - - /* clear driver structure */ - dev->driver = NULL; - - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) - /* unmap resources for devices that use igb_uio */ - rte_pci_unmap_device(dev); - - return 0; -} - -/* - * If vendor/device ID match, call the probe() function of all - * registered driver for the given device. Return -1 if initialization - * failed, return 1 if no driver is found for this device. - */ -static int -pci_probe_all_drivers(struct rte_pci_device *dev) -{ - struct rte_pci_driver *dr = NULL; - int rc = 0; - - if (dev == NULL) - return -1; - - /* Check if a driver is already loaded */ - if (dev->driver != NULL) - return 0; - - FOREACH_DRIVER_ON_PCIBUS(dr) { - rc = rte_pci_probe_one_driver(dr, dev); - if (rc < 0) - /* negative value is an error */ - return -1; - if (rc > 0) - /* positive value means driver doesn't support it */ - continue; - return 0; - } - return 1; -} - -/* - * Find the pci device specified by pci address, then invoke probe function of - * the driver of the device. - */ -int -rte_pci_probe_one(const struct rte_pci_addr *addr) -{ - struct rte_pci_device *dev = NULL; - - int ret = 0; - - if (addr == NULL) - return -1; - - /* update current pci device in global list, kernel bindings might have - * changed since last time we looked at it. - */ - if (pci_update_device(addr) < 0) - goto err_return; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - if (rte_eal_compare_pci_addr(&dev->addr, addr)) - continue; - - ret = pci_probe_all_drivers(dev); - if (ret) - goto err_return; - return 0; - } - return -1; - -err_return: - RTE_LOG(WARNING, EAL, - "Requested device " PCI_PRI_FMT " cannot be used\n", - addr->domain, addr->bus, addr->devid, addr->function); - return -1; -} - -/* - * Detach device specified by its pci address. - */ -int -rte_pci_detach(const struct rte_pci_addr *addr) -{ - struct rte_pci_device *dev = NULL; - int ret = 0; - - if (addr == NULL) - return -1; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - if (rte_eal_compare_pci_addr(&dev->addr, addr)) - continue; - - ret = rte_pci_detach_dev(dev); - if (ret < 0) - /* negative value is an error */ - goto err_return; - if (ret > 0) - /* positive value means driver doesn't support it */ - continue; - - rte_pci_remove_device(dev); - free(dev); - return 0; - } - return -1; - -err_return: - RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - return -1; -} - -/* - * Scan the content of the PCI bus, and call the probe() function for - * all registered drivers that have a matching entry in its id_table - * for discovered devices. - */ -int -rte_pci_probe(void) -{ - struct rte_pci_device *dev = NULL; - size_t probed = 0, failed = 0; - struct rte_devargs *devargs; - int probe_all = 0; - int ret = 0; - - if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST) - probe_all = 1; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - probed++; - - devargs = dev->device.devargs; - /* probe all or only whitelisted devices */ - if (probe_all) - ret = pci_probe_all_drivers(dev); - else if (devargs != NULL && - devargs->policy == RTE_DEV_WHITELISTED) - ret = pci_probe_all_drivers(dev); - if (ret < 0) { - RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - rte_errno = errno; - failed++; - ret = 0; - } - } - - return (probed && probed == failed) ? -1 : 0; -} - -/* dump one device */ -static int -pci_dump_one_device(FILE *f, struct rte_pci_device *dev) -{ - int i; - - fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, - dev->id.device_id); - - for (i = 0; i != sizeof(dev->mem_resource) / - sizeof(dev->mem_resource[0]); i++) { - fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", - dev->mem_resource[i].phys_addr, - dev->mem_resource[i].len); - } - return 0; -} - -/* dump devices on the bus */ -void -rte_pci_dump(FILE *f) -{ - struct rte_pci_device *dev = NULL; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - pci_dump_one_device(f, dev); - } -} - -static int -pci_parse(const char *name, void *addr) -{ - struct rte_pci_addr *out = addr; - struct rte_pci_addr pci_addr; - bool parse; - - parse = (eal_parse_pci_BDF(name, &pci_addr) == 0 || - eal_parse_pci_DomBDF(name, &pci_addr) == 0); - if (parse && addr != NULL) - *out = pci_addr; - return parse == false; -} - -/* register a driver */ -void -rte_pci_register(struct rte_pci_driver *driver) -{ - TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); - driver->bus = &rte_pci_bus; -} - -/* unregister a driver */ -void -rte_pci_unregister(struct rte_pci_driver *driver) -{ - TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); - driver->bus = NULL; -} - -/* Add a device to PCI bus */ -void -rte_pci_add_device(struct rte_pci_device *pci_dev) -{ - TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); -} - -/* Insert a device into a predefined position in PCI bus */ -void -rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, - struct rte_pci_device *new_pci_dev) -{ - TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); -} - -/* Remove a device from PCI bus */ -void -rte_pci_remove_device(struct rte_pci_device *pci_dev) -{ - TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); -} - -static struct rte_device * -pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, - const void *data) -{ - struct rte_pci_device *dev; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - if (start && &dev->device == start) { - start = NULL; /* starting point found */ - continue; - } - if (cmp(&dev->device, data) == 0) - return &dev->device; - } - - return NULL; -} - -static int -pci_plug(struct rte_device *dev) -{ - return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); -} - -static int -pci_unplug(struct rte_device *dev) -{ - struct rte_pci_device *pdev; - int ret; - - pdev = RTE_DEV_TO_PCI(dev); - ret = rte_pci_detach_dev(pdev); - rte_pci_remove_device(pdev); - free(pdev); - return ret; -} - -struct rte_pci_bus rte_pci_bus = { - .bus = { - .scan = rte_pci_scan, - .probe = rte_pci_probe, - .find_device = pci_find_device, - .plug = pci_plug, - .unplug = pci_unplug, - .parse = pci_parse, - }, - .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), - .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), -}; - -RTE_REGISTER_BUS(pci, rte_pci_bus.bus); diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c deleted file mode 100644 index 367a6816..00000000 --- a/lib/librte_eal/common/eal_common_pci_uio.c +++ /dev/null @@ -1,233 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <fcntl.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> - -#include <rte_eal.h> -#include <rte_tailq.h> -#include <rte_log.h> -#include <rte_malloc.h> - -#include "eal_private.h" - -static struct rte_tailq_elem rte_uio_tailq = { - .name = "UIO_RESOURCE_LIST", -}; -EAL_REGISTER_TAILQ(rte_uio_tailq) - -static int -pci_uio_map_secondary(struct rte_pci_device *dev) -{ - int fd, i, j; - struct mapped_pci_resource *uio_res; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - TAILQ_FOREACH(uio_res, uio_res_list, next) { - - /* skip this element if it doesn't match our PCI address */ - if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) - continue; - - for (i = 0; i != uio_res->nb_maps; i++) { - /* - * open devname, to mmap it - */ - fd = open(uio_res->maps[i].path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - uio_res->maps[i].path, strerror(errno)); - return -1; - } - - void *mapaddr = pci_map_resource(uio_res->maps[i].addr, - fd, (off_t)uio_res->maps[i].offset, - (size_t)uio_res->maps[i].size, 0); - /* fd is not needed in slave process, close it */ - close(fd); - if (mapaddr != uio_res->maps[i].addr) { - RTE_LOG(ERR, EAL, - "Cannot mmap device resource file %s to address: %p\n", - uio_res->maps[i].path, - uio_res->maps[i].addr); - if (mapaddr != MAP_FAILED) { - /* unmap addrs correctly mapped */ - for (j = 0; j < i; j++) - pci_unmap_resource( - uio_res->maps[j].addr, - (size_t)uio_res->maps[j].size); - /* unmap addr wrongly mapped */ - pci_unmap_resource(mapaddr, - (size_t)uio_res->maps[i].size); - } - return -1; - } - } - return 0; - } - - RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); - return 1; -} - -/* map the PCI resource of a PCI device in virtual memory */ -int -pci_uio_map_resource(struct rte_pci_device *dev) -{ - int i, map_idx = 0, ret; - uint64_t phaddr; - struct mapped_pci_resource *uio_res = NULL; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - dev->intr_handle.fd = -1; - dev->intr_handle.uio_cfg_fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* secondary processes - use already recorded details */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return pci_uio_map_secondary(dev); - - /* allocate uio resource */ - ret = pci_uio_alloc_resource(dev, &uio_res); - if (ret) - return ret; - - /* Map all BARs */ - for (i = 0; i != PCI_MAX_RESOURCE; i++) { - /* skip empty BAR */ - phaddr = dev->mem_resource[i].phys_addr; - if (phaddr == 0) - continue; - - ret = pci_uio_map_resource_by_index(dev, i, - uio_res, map_idx); - if (ret) - goto error; - - map_idx++; - } - - uio_res->nb_maps = map_idx; - - TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); - - return 0; -error: - for (i = 0; i < map_idx; i++) { - pci_unmap_resource(uio_res->maps[i].addr, - (size_t)uio_res->maps[i].size); - rte_free(uio_res->maps[i].path); - } - pci_uio_free_resource(dev, uio_res); - return -1; -} - -static void -pci_uio_unmap(struct mapped_pci_resource *uio_res) -{ - int i; - - if (uio_res == NULL) - return; - - for (i = 0; i != uio_res->nb_maps; i++) { - pci_unmap_resource(uio_res->maps[i].addr, - (size_t)uio_res->maps[i].size); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(uio_res->maps[i].path); - } -} - -static struct mapped_pci_resource * -pci_uio_find_resource(struct rte_pci_device *dev) -{ - struct mapped_pci_resource *uio_res; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - if (dev == NULL) - return NULL; - - TAILQ_FOREACH(uio_res, uio_res_list, next) { - - /* skip this element if it doesn't match our PCI address */ - if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) - return uio_res; - } - return NULL; -} - -/* unmap the PCI resource of a PCI device in virtual memory */ -void -pci_uio_unmap_resource(struct rte_pci_device *dev) -{ - struct mapped_pci_resource *uio_res; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - if (dev == NULL) - return; - - /* find an entry for the device */ - uio_res = pci_uio_find_resource(dev); - if (uio_res == NULL) - return; - - /* secondary processes - just free maps */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return pci_uio_unmap(uio_res); - - TAILQ_REMOVE(uio_res_list, uio_res, next); - - /* unmap all resources */ - pci_uio_unmap(uio_res); - - /* free uio resource */ - rte_free(uio_res); - - /* close fd if in primary process */ - close(dev->intr_handle.fd); - if (dev->intr_handle.uio_cfg_fd >= 0) { - close(dev->intr_handle.uio_cfg_fd); - dev->intr_handle.uio_cfg_fd = -1; - } - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; -} diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c index 55955f9e..6ae09fdb 100644 --- a/lib/librte_eal/common/eal_common_tailqs.c +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -40,7 +40,6 @@ #include <inttypes.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c index 2405e93f..55e96963 100644 --- a/lib/librte_eal/common/eal_common_thread.c +++ b/lib/librte_eal/common/eal_common_thread.c @@ -53,6 +53,20 @@ unsigned rte_socket_id(void) return RTE_PER_LCORE(_socket_id); } +int +rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + + if (lcore_id >= RTE_MAX_LCORE) + return -EINVAL; + + if (cfg->lcore_role[lcore_id] == role) + return 0; + + return -EINVAL; +} + int eal_cpuset_socket_id(rte_cpuset_t *cpusetp) { unsigned cpu = 0; diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c index ed0b16d0..568ae2fd 100644 --- a/lib/librte_eal/common/eal_common_timer.c +++ b/lib/librte_eal/common/eal_common_timer.c @@ -80,8 +80,11 @@ estimate_tsc_freq(void) void set_tsc_freq(void) { - uint64_t freq = get_tsc_freq(); + uint64_t freq; + freq = get_tsc_freq_arch(); + if (!freq) + freq = get_tsc_freq(); if (!freq) freq = estimate_tsc_freq(); @@ -94,8 +97,7 @@ void rte_delay_us_callback_register(void (*userfunc)(unsigned int)) rte_delay_us = userfunc; } -static void __attribute__((constructor)) -rte_timer_init(void) +RTE_INIT(rte_timer_init) { /* set rte_delay_us_block as a delay function */ rte_delay_us_callback_register(rte_delay_us_block); diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c deleted file mode 100644 index f7e547a6..00000000 --- a/lib/librte_eal/common/eal_common_vdev.c +++ /dev/null @@ -1,342 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <inttypes.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <stdbool.h> -#include <sys/queue.h> - -#include <rte_eal.h> -#include <rte_dev.h> -#include <rte_bus.h> -#include <rte_vdev.h> -#include <rte_common.h> -#include <rte_devargs.h> -#include <rte_memory.h> -#include <rte_errno.h> - -/* Forward declare to access virtual bus name */ -static struct rte_bus rte_vdev_bus; - -/** Double linked list of virtual device drivers. */ -TAILQ_HEAD(vdev_device_list, rte_vdev_device); - -static struct vdev_device_list vdev_device_list = - TAILQ_HEAD_INITIALIZER(vdev_device_list); -struct vdev_driver_list vdev_driver_list = - TAILQ_HEAD_INITIALIZER(vdev_driver_list); - -/* register a driver */ -void -rte_vdev_register(struct rte_vdev_driver *driver) -{ - TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); -} - -/* unregister a driver */ -void -rte_vdev_unregister(struct rte_vdev_driver *driver) -{ - TAILQ_REMOVE(&vdev_driver_list, driver, next); -} - -static int -vdev_parse(const char *name, void *addr) -{ - struct rte_vdev_driver **out = addr; - struct rte_vdev_driver *driver = NULL; - - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - if (strncmp(driver->driver.name, name, - strlen(driver->driver.name)) == 0) - break; - if (driver->driver.alias && - strncmp(driver->driver.alias, name, - strlen(driver->driver.alias)) == 0) - break; - } - if (driver != NULL && - addr != NULL) - *out = driver; - return driver == NULL; -} - -static int -vdev_probe_all_drivers(struct rte_vdev_device *dev) -{ - const char *name; - struct rte_vdev_driver *driver; - int ret; - - name = rte_vdev_device_name(dev); - - RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name, - rte_vdev_device_name(dev)); - - if (vdev_parse(name, &driver)) - return -1; - dev->device.driver = &driver->driver; - ret = driver->probe(dev); - if (ret) - dev->device.driver = NULL; - return ret; -} - -static struct rte_vdev_device * -find_vdev(const char *name) -{ - struct rte_vdev_device *dev; - - if (!name) - return NULL; - - TAILQ_FOREACH(dev, &vdev_device_list, next) { - const char *devname = rte_vdev_device_name(dev); - if (!strncmp(devname, name, strlen(name))) - return dev; - } - - return NULL; -} - -static struct rte_devargs * -alloc_devargs(const char *name, const char *args) -{ - struct rte_devargs *devargs; - int ret; - - devargs = calloc(1, sizeof(*devargs)); - if (!devargs) - return NULL; - - devargs->bus = &rte_vdev_bus; - if (args) - devargs->args = strdup(args); - else - devargs->args = strdup(""); - - ret = snprintf(devargs->name, sizeof(devargs->name), "%s", name); - if (ret < 0 || ret >= (int)sizeof(devargs->name)) { - free(devargs->args); - free(devargs); - return NULL; - } - - return devargs; -} - -int -rte_vdev_init(const char *name, const char *args) -{ - struct rte_vdev_device *dev; - struct rte_devargs *devargs; - int ret; - - if (name == NULL) - return -EINVAL; - - dev = find_vdev(name); - if (dev) - return -EEXIST; - - devargs = alloc_devargs(name, args); - if (!devargs) - return -ENOMEM; - - dev = calloc(1, sizeof(*dev)); - if (!dev) { - ret = -ENOMEM; - goto fail; - } - - dev->device.devargs = devargs; - dev->device.numa_node = SOCKET_ID_ANY; - dev->device.name = devargs->name; - - ret = vdev_probe_all_drivers(dev); - if (ret) { - if (ret > 0) - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - goto fail; - } - - TAILQ_INSERT_TAIL(&devargs_list, devargs, next); - - TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); - return 0; - -fail: - free(devargs->args); - free(devargs); - free(dev); - return ret; -} - -static int -vdev_remove_driver(struct rte_vdev_device *dev) -{ - const char *name = rte_vdev_device_name(dev); - const struct rte_vdev_driver *driver; - - if (!dev->device.driver) { - RTE_LOG(DEBUG, EAL, "no driver attach to device %s\n", name); - return 1; - } - - driver = container_of(dev->device.driver, const struct rte_vdev_driver, - driver); - return driver->remove(dev); -} - -int -rte_vdev_uninit(const char *name) -{ - struct rte_vdev_device *dev; - struct rte_devargs *devargs; - int ret; - - if (name == NULL) - return -EINVAL; - - dev = find_vdev(name); - if (!dev) - return -ENOENT; - - devargs = dev->device.devargs; - - ret = vdev_remove_driver(dev); - if (ret) - return ret; - - TAILQ_REMOVE(&vdev_device_list, dev, next); - - TAILQ_REMOVE(&devargs_list, devargs, next); - - free(devargs->args); - free(devargs); - free(dev); - return 0; -} - -static int -vdev_scan(void) -{ - struct rte_vdev_device *dev; - struct rte_devargs *devargs; - - /* for virtual devices we scan the devargs_list populated via cmdline */ - TAILQ_FOREACH(devargs, &devargs_list, next) { - - if (devargs->bus != &rte_vdev_bus) - continue; - - dev = find_vdev(devargs->name); - if (dev) - continue; - - dev = calloc(1, sizeof(*dev)); - if (!dev) - return -1; - - dev->device.devargs = devargs; - dev->device.numa_node = SOCKET_ID_ANY; - dev->device.name = devargs->name; - - TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); - } - - return 0; -} - -static int -vdev_probe(void) -{ - struct rte_vdev_device *dev; - - /* call the init function for each virtual device */ - TAILQ_FOREACH(dev, &vdev_device_list, next) { - - if (dev->device.driver) - continue; - - if (vdev_probe_all_drivers(dev)) { - RTE_LOG(ERR, EAL, "failed to initialize %s device\n", - rte_vdev_device_name(dev)); - return -1; - } - } - - return 0; -} - -static struct rte_device * -vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, - const void *data) -{ - struct rte_vdev_device *dev; - - TAILQ_FOREACH(dev, &vdev_device_list, next) { - if (start && &dev->device == start) { - start = NULL; - continue; - } - if (cmp(&dev->device, data) == 0) - return &dev->device; - } - return NULL; -} - -static int -vdev_plug(struct rte_device *dev) -{ - return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev)); -} - -static int -vdev_unplug(struct rte_device *dev) -{ - return rte_vdev_uninit(dev->name); -} - -static struct rte_bus rte_vdev_bus = { - .scan = vdev_scan, - .probe = vdev_probe, - .find_device = vdev_find_device, - .plug = vdev_plug, - .unplug = vdev_unplug, - .parse = vdev_parse, -}; - -RTE_REGISTER_BUS(vdev, rte_vdev_bus); diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 7b7e8c88..fa6ccbec 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -65,7 +65,6 @@ struct internal_config { volatile unsigned force_nrank; /**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ unsigned hugepage_unlink; /**< true to unlink backing files */ - volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ volatile unsigned no_pci; /**< true to disable PCI */ volatile unsigned no_hpet; /**< true to disable HPET */ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping @@ -82,7 +81,7 @@ struct internal_config { volatile enum rte_intr_mode vfio_intr_mode; const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ const char *hugepage_dir; /**< specific hugetlbfs directory to use */ - + const char *mbuf_pool_ops_name; /**< mbuf pool ops name */ unsigned num_hugepage_sizes; /**< how many sizes on this system */ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; }; diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index 439a2610..30e6bb41 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -61,6 +61,8 @@ enum { OPT_LOG_LEVEL_NUM, #define OPT_MASTER_LCORE "master-lcore" OPT_MASTER_LCORE_NUM, +#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name" + OPT_MBUF_POOL_OPS_NAME_NUM, #define OPT_PROC_TYPE "proc-type" OPT_PROC_TYPE_NUM, #define OPT_NO_HPET "no-hpet" @@ -81,8 +83,6 @@ enum { OPT_VFIO_INTR_NUM, #define OPT_VMWARE_TSC_MAP "vmware-tsc-map" OPT_VMWARE_TSC_MAP_NUM, -#define OPT_XEN_DOM0 "xen-dom0" - OPT_XEN_DOM0_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 597d82e4..462226f1 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -35,8 +35,8 @@ #define _EAL_PRIVATE_H_ #include <stdbool.h> +#include <stdint.h> #include <stdio.h> -#include <rte_pci.h> /** * Initialize the memzone subsystem (private to eal). @@ -109,137 +109,6 @@ int rte_eal_timer_init(void); */ int rte_eal_log_init(const char *id, int facility); -struct rte_pci_driver; -struct rte_pci_device; - -/** - * Find the name of a PCI device. - */ -void pci_name_set(struct rte_pci_device *dev); - -/** - * Add a PCI device to the PCI Bus (append to PCI Device list). This function - * also updates the bus references of the PCI Device (and the generic device - * object embedded within. - * - * @param pci_dev - * PCI device to add - * @return void - */ -void rte_pci_add_device(struct rte_pci_device *pci_dev); - -/** - * Insert a PCI device in the PCI Bus at a particular location in the device - * list. It also updates the PCI Bus reference of the new devices to be - * inserted. - * - * @param exist_pci_dev - * Existing PCI device in PCI Bus - * @param new_pci_dev - * PCI device to be added before exist_pci_dev - * @return void - */ -void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, - struct rte_pci_device *new_pci_dev); - -/** - * Remove a PCI device from the PCI Bus. This sets to NULL the bus references - * in the PCI device object as well as the generic device object. - * - * @param pci_device - * PCI device to be removed from PCI Bus - * @return void - */ -void rte_pci_remove_device(struct rte_pci_device *pci_device); - -/** - * Update a pci device object by asking the kernel for the latest information. - * - * This function is private to EAL. - * - * @param addr - * The PCI Bus-Device-Function address to look for - * @return - * - 0 on success. - * - negative on error. - */ -int pci_update_device(const struct rte_pci_addr *addr); - -/** - * Unbind kernel driver for this device - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int pci_unbind_kernel_driver(struct rte_pci_device *dev); - -/** - * Map the PCI resource of a PCI device in virtual memory - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int pci_uio_map_resource(struct rte_pci_device *dev); - -/** - * Unmap the PCI resource of a PCI device - * - * This function is private to EAL. - */ -void pci_uio_unmap_resource(struct rte_pci_device *dev); - -/** - * Allocate uio resource for PCI device - * - * This function is private to EAL. - * - * @param dev - * PCI device to allocate uio resource - * @param uio_res - * Pointer to uio resource. - * If the function returns 0, the pointer will be filled. - * @return - * 0 on success, negative on error - */ -int pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res); - -/** - * Free uio resource for PCI device - * - * This function is private to EAL. - * - * @param dev - * PCI device to free uio resource - * @param uio_res - * Pointer to uio resource. - */ -void pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res); - -/** - * Map device memory to uio resource - * - * This function is private to EAL. - * - * @param dev - * PCI device that has memory information. - * @param res_idx - * Memory resource index of the PCI device. - * @param uio_res - * uio resource that will keep mapping information. - * @param map_idx - * Mapping information index of the uio resource. - * @return - * 0 on success, negative on error - */ -int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx); - /** * Init tail queues for non-EAL library structures. This is to allow * the rings, mempools, etc. lists to be shared among multiple processes @@ -315,6 +184,17 @@ void set_tsc_freq(void); uint64_t get_tsc_freq(void); /** + * Get TSC frequency if the architecture supports. + * + * This function is private to the EAL. + * + * @return + * The number of TSC cycles in one second. + * Returns zero if the architecture support is not available. + */ +uint64_t get_tsc_freq_arch(void); + +/** * Prepare physical memory mapping * i.e. hugepages on Linux and * contigmem on BSD. @@ -333,17 +213,6 @@ int rte_eal_hugepage_init(void); int rte_eal_hugepage_attach(void); /** - * Returns true if the system is able to obtain - * physical addresses. Return false if using DMA - * addresses through an IOMMU. - * - * Drivers based on uio will not load unless physical - * addresses are obtainable. It is only possible to get - * physical addresses when running as a privileged user. - */ -bool rte_eal_using_phys_addrs(void); - -/** * Find a bus capable of identifying a device. * * @param str diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index 782350d1..aa887a97 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -136,7 +136,7 @@ vgetq_lane_p64(poly64x2_t x, const int lane) #endif /* - * If (0 <= index <= 15), then call the ASIMD ext intruction on the + * If (0 <= index <= 15), then call the ASIMD ext instruction on the * 128 bit regs v0 and v1 with the appropriate index. * * Else returns a zero vector. diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index 2e04c759..fb3abf18 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -81,7 +81,7 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) : "memory" ); /* no-clobber list */ #else asm volatile ( - "mov %%ebx, %%edi\n" + "xchgl %%ebx, %%edi;\n" MPLOCKED "cmpxchg8b (%[dst]);" "setz %[res];" diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h index 010d752c..010d752c 100644 --- a/lib/librte_sched/rte_bitmap.h +++ b/lib/librte_eal/common/include/rte_bitmap.h diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index c79368d3..6fb08341 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -55,6 +55,21 @@ extern "C" { /** Double linked list of buses */ TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * IOVA mapping mode. + * + * IOVA mapping mode is iommu programming mode of a device. + * That device (for example: IOMMU backed DMA device) based + * on rte_iova_mode will generate physical or virtual address. + * + */ +enum rte_iova_mode { + RTE_IOVA_DC = 0, /* Don't care mode */ + RTE_IOVA_PA = (1 << 0), /* DMA using physical address */ + RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */ +}; + /** * Bus specific scan for devices attached on the bus. * For each bus object, the scan would be responsible for finding devices and @@ -168,6 +183,20 @@ struct rte_bus_conf { enum rte_bus_scan_mode scan_mode; /**< Scan policy. */ }; + +/** + * Get common iommu class of the all the devices on the bus. The bus may + * check that those devices are attached to iommu driver. + * If no devices are attached to the bus. The bus may return with don't care + * (_DC) value. + * Otherwise, The bus will return appropriate _pa or _va iova mode. + * + * @return + * enum rte_iova_mode value. + */ +typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void); + + /** * A structure describing a generic bus. */ @@ -181,6 +210,7 @@ struct rte_bus { rte_bus_unplug_t unplug; /**< Remove single device from driver */ rte_bus_parse_t parse; /**< Parse a device name */ struct rte_bus_conf conf; /**< Bus configuration */ + rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ }; /** @@ -280,12 +310,22 @@ struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev); */ struct rte_bus *rte_bus_find_by_name(const char *busname); + +/** + * Get the common iommu class of devices bound on to buses available in the + * system. The default mode is PA. + * + * @return + * enum rte_iova_mode value. + */ +enum rte_iova_mode rte_bus_get_iommu_class(void); + /** * Helper for Bus registration. * The constructor has higher priority than PMD constructors. */ #define RTE_REGISTER_BUS(nm, bus) \ -RTE_INIT_PRIO(businitfn_ ##nm, 101); \ +RTE_INIT_PRIO(businitfn_ ##nm, 110); \ static void businitfn_ ##nm(void) \ {\ (bus).name = RTE_STR(nm);\ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 1afc66e3..de853e16 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -109,6 +109,29 @@ typedef uint16_t unaligned_uint16_t; #define RTE_SET_USED(x) (void)(x) /** + * Run function before main() with low priority. + * + * The constructor will be run after prioritized constructors. + * + * @param func + * Constructor function. + */ +#define RTE_INIT(func) \ +static void __attribute__((constructor, used)) func(void) + +/** + * Run function before main() with high priority. + * + * @param func + * Constructor function. + * @param prio + * Priority number must be above 100. + * Lowest number is the first to run. + */ +#define RTE_INIT_PRIO(func, prio) \ +static void __attribute__((constructor(prio), used)) func(void) + +/** * Force a function to be inlined */ #define __rte_always_inline inline __attribute__((always_inline)) diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h index cab6fb4c..79b67b3e 100644 --- a/lib/librte_eal/common/include/rte_debug.h +++ b/lib/librte_eal/common/include/rte_debug.h @@ -79,7 +79,7 @@ void rte_dump_registers(void); #define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") #define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG +#ifdef RTE_ENABLE_ASSERT #define RTE_ASSERT(exp) RTE_VERIFY(exp) #else #define RTE_ASSERT(exp) do {} while (0) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 5386d3a2..9342e0cb 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -49,7 +49,6 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> -#include <rte_config.h> #include <rte_log.h> __attribute__((format(printf, 2, 0))) @@ -152,7 +151,11 @@ struct rte_driver { const char *alias; /**< Driver alias. */ }; -#define RTE_DEV_NAME_MAX_LEN (32) +/* + * Internal identifier length + * Sufficiently large to allow for UUID or PCI address + */ +#define RTE_DEV_NAME_MAX_LEN 64 /** * A structure describing a generic device. @@ -166,28 +169,6 @@ struct rte_device { }; /** - * Initialize a driver specified by name. - * - * @param name - * The pointer to a driver name to be initialized. - * @param args - * The pointer to arguments used by driver initialization. - * @return - * 0 on success, negative on error - */ -int rte_vdev_init(const char *name, const char *args); - -/** - * Uninitalize a driver specified by name. - * - * @param name - * The pointer to a driver name to be initialized. - * @return - * 0 on success, negative on error - */ -int rte_vdev_uninit(const char *name); - -/** * Attach a device to a registered driver. * * @param name @@ -312,4 +293,4 @@ __attribute__((used)) = str } #endif -#endif /* _RTE_VDEV_H_ */ +#endif /* _RTE_DEV_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index 0e7363d7..09b66819 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -44,7 +44,9 @@ #include <sched.h> #include <rte_per_lcore.h> -#include <rte_config.h> +#include <rte_bus.h> + +#include <rte_pci_dev_feature_defs.h> #ifdef __cplusplus extern "C" { @@ -87,6 +89,9 @@ struct rte_config { /** Primary or secondary configuration */ enum rte_proc_type_t process_type; + /** PA or VA mapping mode */ + enum rte_iova_mode iova_mode; + /** * Pointer to memory configuration, which may be shared across multiple * DPDK instances @@ -264,6 +269,32 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func); int rte_eal_has_hugepages(void); /** + * Whether EAL is using PCI bus. + * Disabled by --no-pci option. + * + * @return + * Nonzero if the PCI bus is enabled. + */ +int rte_eal_has_pci(void); + +/** + * Whether the EAL was asked to create UIO device. + * + * @return + * Nonzero if true. + */ +int rte_eal_create_uio_dev(void); + +/** + * The user-configured vfio interrupt mode. + * + * @return + * Interrupt mode configured with the command line, + * RTE_INTR_MODE_NONE by default. + */ +enum rte_intr_mode rte_eal_vfio_intr_mode(void); + +/** * A wrap API for syscall gettid. * * @return @@ -287,11 +318,22 @@ static inline int rte_gettid(void) return RTE_PER_LCORE(_thread_id); } -#define RTE_INIT(func) \ -static void __attribute__((constructor, used)) func(void) +/** + * Get the iova mode + * + * @return + * enum rte_iova_mode value. + */ +enum rte_iova_mode rte_eal_iova_mode(void); -#define RTE_INIT_PRIO(func, prio) \ -static void __attribute__((constructor(prio), used)) func(void) +/** + * Get default pool ops name for mbuf + * + * @return + * returns default pool ops name. + */ +const char * +rte_eal_mbuf_default_mempool_ops(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h index 6daffebf..031f78cc 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_eal_interrupts.h @@ -35,15 +35,26 @@ #error "don't include this file directly, please include generic <rte_interrupts.h>" #endif -#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ -#define _RTE_LINUXAPP_INTERRUPTS_H_ +/** + * @file rte_eal_interrupts.h + * @internal + * + * Contains function prototypes exposed by the EAL for interrupt handling by + * drivers and other DPDK internal consumers. + */ + +#ifndef _RTE_EAL_INTERRUPTS_H_ +#define _RTE_EAL_INTERRUPTS_H_ #define RTE_MAX_RXTX_INTR_VEC_ID 32 #define RTE_INTR_VEC_ZERO_OFFSET 0 #define RTE_INTR_VEC_RXTX_OFFSET 1 +/** + * The interrupt source type, e.g. UIO, VFIO, ALARM etc. + */ enum rte_intr_handle_type { - RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */ RTE_INTR_HANDLE_UIO, /**< uio device handle */ RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ @@ -52,7 +63,7 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_ALARM, /**< alarm handle */ RTE_INTR_HANDLE_EXT, /**< external handler */ RTE_INTR_HANDLE_VDEV, /**< virtual device */ - RTE_INTR_HANDLE_MAX + RTE_INTR_HANDLE_MAX /**< count of elements */ }; #define RTE_INTR_EVENT_ADD 1UL @@ -86,13 +97,13 @@ struct rte_intr_handle { RTE_STD_C11 union { int vfio_dev_fd; /**< VFIO device file descriptor */ - int uio_cfg_fd; /**< UIO config file descriptor - for uio_pci_generic */ + int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */ }; int fd; /**< interrupt event file descriptor */ enum rte_intr_handle_type type; /**< handle type */ uint32_t max_intr; /**< max interrupt requested */ uint32_t nb_efd; /**< number of available efd(event fd) */ + uint8_t efd_counter_size; /**< size of efd counter, used for vdev */ int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vector epoll event */ @@ -236,4 +247,4 @@ rte_intr_allow_others(struct rte_intr_handle *intr_handle); int rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); -#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ +#endif /* _RTE_EAL_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index 5d06ed79..43177c7a 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -53,7 +53,7 @@ struct rte_intr_handle; /** Function to be registered for the specific interrupt */ typedef void (*rte_intr_callback_fn)(void *cb_arg); -#include <exec-env/rte_interrupts.h> +#include "rte_eal_interrupts.h" /** * It registers the callback for the specific interrupt. Multiple diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index 50e0d0fe..c89e6bab 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -262,6 +262,20 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp); */ int rte_thread_setname(pthread_t id, const char *name); +/** + * Test if the core supplied has a specific role + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 0 and + * RTE_MAX_LCORE-1. + * @param role + * The role to be checked against. + * @return + * On success, return 0; otherwise return a negative value. + */ +int +rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index ec8dba79..16564d41 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -87,6 +87,7 @@ extern struct rte_logs rte_logs; #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */ #define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */ #define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */ +#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */ /* these log types can be used in an application */ #define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */ @@ -138,12 +139,6 @@ int rte_openlog_stream(FILE *f); void rte_log_set_global_level(uint32_t level); /** - * Deprecated, replaced by rte_log_set_global_level(). - */ -__rte_deprecated -void rte_set_log_level(uint32_t level); - -/** * Get the global log level. * * @return @@ -152,29 +147,6 @@ void rte_set_log_level(uint32_t level); uint32_t rte_log_get_global_level(void); /** - * Deprecated, replaced by rte_log_get_global_level(). - */ -__rte_deprecated -uint32_t rte_get_log_level(void); - -/** - * Enable or disable the log type. - * - * @param type - * Log type, for example, RTE_LOGTYPE_EAL. - * @param enable - * True for enable; false for disable. - */ -__rte_deprecated -void rte_set_log_type(uint32_t type, int enable); - -/** - * Get the global log type. - */ -__rte_deprecated -uint32_t rte_get_log_type(void); - -/** * Get the log level for a given type. * * @param logtype diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index 3d37f79b..5d4c11a7 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -323,17 +323,24 @@ int rte_malloc_set_limit(const char *type, size_t max); /** - * Return the physical address of a virtual address obtained through + * Return the IO address of a virtual address obtained through * rte_malloc * * @param addr * Address obtained from a previous rte_malloc call * @return - * RTE_BAD_PHYS_ADDR on error - * otherwise return physical address of the buffer + * RTE_BAD_IOVA on error + * otherwise return an address suitable for IO */ -phys_addr_t -rte_malloc_virt2phy(const void *addr); +rte_iova_t +rte_malloc_virt2iova(const void *addr); + +__rte_deprecated +static inline phys_addr_t +rte_malloc_virt2phy(const void *addr) +{ + return rte_malloc_virt2iova(addr); +} #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 4aa5d1f7..14aacea5 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -44,12 +44,6 @@ #include <stddef.h> #include <stdio.h> -#include <rte_config.h> - -#ifdef RTE_EXEC_ENV_LINUXAPP -#include <exec-env/rte_dom0_common.h> -#endif - #ifdef __cplusplus extern "C" { #endif @@ -98,14 +92,27 @@ enum rte_page_sizes { */ #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) -typedef uint64_t phys_addr_t; /**< Physical address definition. */ +typedef uint64_t phys_addr_t; /**< Physical address. */ #define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) +/** + * IO virtual address type. + * When the physical addressing mode (IOVA as PA) is in use, + * the translation from an IO virtual address (IOVA) to a physical address + * is a direct mapping, i.e. the same value. + * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation. + */ +typedef uint64_t rte_iova_t; +#define RTE_BAD_IOVA ((rte_iova_t)-1) /** * Physical memory segment descriptor. */ struct rte_memseg { - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ @@ -116,10 +123,6 @@ struct rte_memseg { int32_t socket_id; /**< NUMA socket ID. */ uint32_t nchannel; /**< Number of channels. */ uint32_t nrank; /**< Number of ranks. */ -#ifdef RTE_LIBRTE_XEN_DOM0 - /**< store segment MFNs */ - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -#endif } __rte_packed; /** @@ -140,11 +143,21 @@ int rte_mem_lock_page(const void *virt); * @param virt * The virtual address. * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. + * The physical address or RTE_BAD_IOVA on error. */ phys_addr_t rte_mem_virt2phy(const void *virt); /** + * Get IO virtual address of any mapped virtual address in the current process. + * + * @param virt + * The virtual address. + * @return + * The IO address or RTE_BAD_IOVA on error. + */ +rte_iova_t rte_mem_virt2iova(const void *virt); + +/** * Get the layout of the available physical memory. * * It can be useful for an application to have the full physical @@ -195,68 +208,16 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); -#ifdef RTE_LIBRTE_XEN_DOM0 - -/**< Internal use only - should DOM0 memory mapping be used */ -int rte_xen_dom0_supported(void); - -/**< Internal use only - phys to virt mapping for xen */ -phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t); - /** - * Return the physical address of elt, which is an element of the pool mp. - * - * @param memseg_id - * Identifier of the memory segment owning the physical address. If - * set to -1, find it automatically. - * @param phy_addr - * physical address of elt. - * - * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. - */ -static inline phys_addr_t -rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) -{ - if (rte_xen_dom0_supported()) - return rte_xen_mem_phy2mch(memseg_id, phy_addr); - else - return phy_addr; -} - -/** - * Memory init for supporting application running on Xen domain0. - * - * @param void + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. * * @return - * 0: successfully - * negative: error + * 1 if the system is able to obtain physical addresses. + * 0 if using DMA addresses through an IOMMU. */ -int rte_xen_dom0_memory_init(void); - -/** - * Attach to memory setments of primary process on Xen domain0. - * - * @param void - * - * @return - * 0: successfully - * negative: error - */ -int rte_xen_dom0_memory_attach(void); -#else -static inline int rte_xen_dom0_supported(void) -{ - return 0; -} - -static inline phys_addr_t -rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr) -{ - return phy_addr; -} -#endif +int rte_eal_using_phys_addrs(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index 1d0827f4..6f0ba182 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -78,7 +78,11 @@ struct rte_memzone { #define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h deleted file mode 100644 index 8b123391..00000000 --- a/lib/librte_eal/common/include/rte_pci.h +++ /dev/null @@ -1,598 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * Copyright 2013-2014 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_PCI_H_ -#define _RTE_PCI_H_ - -/** - * @file - * - * RTE PCI Interface - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <limits.h> -#include <errno.h> -#include <sys/queue.h> -#include <stdint.h> -#include <inttypes.h> - -#include <rte_debug.h> -#include <rte_interrupts.h> -#include <rte_dev.h> -#include <rte_bus.h> - -/** Pathname of PCI devices directory. */ -const char *pci_get_sysfs_path(void); - -/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ -#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 -#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X") - -/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ -#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 - -/** Nb. of values in PCI device identifier format string. */ -#define PCI_FMT_NVAL 4 - -/** Nb. of values in PCI resource format. */ -#define PCI_RESOURCE_FMT_NVAL 3 - -/** Maximum number of PCI resources. */ -#define PCI_MAX_RESOURCE 6 - -/* Forward declarations */ -struct rte_pci_device; -struct rte_pci_driver; - -/** List of PCI devices */ -TAILQ_HEAD(rte_pci_device_list, rte_pci_device); -/** List of PCI drivers */ -TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); - -/* PCI Bus iterators */ -#define FOREACH_DEVICE_ON_PCIBUS(p) \ - TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) - -#define FOREACH_DRIVER_ON_PCIBUS(p) \ - TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) - -/** - * A structure describing an ID for a PCI driver. Each driver provides a - * table of these IDs for each device that it supports. - */ -struct rte_pci_id { - uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */ - uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ - uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ - uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ - uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ -}; - -/** - * A structure describing the location of a PCI device. - */ -struct rte_pci_addr { - uint32_t domain; /**< Device domain */ - uint8_t bus; /**< Device bus */ - uint8_t devid; /**< Device ID */ - uint8_t function; /**< Device function. */ -}; - -struct rte_devargs; - -/** - * A structure describing a PCI device. - */ -struct rte_pci_device { - TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ - struct rte_device device; /**< Inherit core device */ - struct rte_pci_addr addr; /**< PCI location. */ - struct rte_pci_id id; /**< PCI ID. */ - struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; - /**< PCI Memory Resource */ - struct rte_intr_handle intr_handle; /**< Interrupt handle */ - struct rte_pci_driver *driver; /**< Associated driver */ - uint16_t max_vfs; /**< sriov enable if not zero */ - enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ - char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ -}; - -/** - * @internal - * Helper macro for drivers that need to convert to struct rte_pci_device. - */ -#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) - -/** Any PCI device identifier (vendor, device, ...) */ -#define PCI_ANY_ID (0xffff) -#define RTE_CLASS_ANY_ID (0xffffff) - -#ifdef __cplusplus -/** C++ macro used to help building up tables of device IDs */ -#define RTE_PCI_DEVICE(vend, dev) \ - RTE_CLASS_ANY_ID, \ - (vend), \ - (dev), \ - PCI_ANY_ID, \ - PCI_ANY_ID -#else -/** Macro used to help building up tables of device IDs */ -#define RTE_PCI_DEVICE(vend, dev) \ - .class_id = RTE_CLASS_ANY_ID, \ - .vendor_id = (vend), \ - .device_id = (dev), \ - .subsystem_vendor_id = PCI_ANY_ID, \ - .subsystem_device_id = PCI_ANY_ID -#endif - -/** - * Initialisation function for the driver called during PCI probing. - */ -typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); - -/** - * Uninitialisation function for the driver called during hotplugging. - */ -typedef int (pci_remove_t)(struct rte_pci_device *); - -/** - * A structure describing a PCI driver. - */ -struct rte_pci_driver { - TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ - struct rte_driver driver; /**< Inherit core driver. */ - struct rte_pci_bus *bus; /**< PCI bus reference. */ - pci_probe_t *probe; /**< Device Probe function. */ - pci_remove_t *remove; /**< Device Remove function. */ - const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ - uint32_t drv_flags; /**< Flags contolling handling of device. */ -}; - -/** - * Structure describing the PCI bus - */ -struct rte_pci_bus { - struct rte_bus bus; /**< Inherit the generic class */ - struct rte_pci_device_list device_list; /**< List of PCI devices */ - struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ -}; - -/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ -#define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device driver supports link state interrupt */ -#define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports device removal interrupt */ -#define RTE_PCI_DRV_INTR_RMV 0x0010 -/** Device driver needs to keep mapped resources if unsupported dev detected */ -#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 - -/** - * A structure describing a PCI mapping. - */ -struct pci_map { - void *addr; - char *path; - uint64_t offset; - uint64_t size; - uint64_t phaddr; -}; - -/** - * A structure describing a mapped PCI resource. - * For multi-process we need to reproduce all PCI mappings in secondary - * processes, so save them in a tailq. - */ -struct mapped_pci_resource { - TAILQ_ENTRY(mapped_pci_resource) next; - - struct rte_pci_addr pci_addr; - char path[PATH_MAX]; - int nb_maps; - struct pci_map maps[PCI_MAX_RESOURCE]; -}; - -/** mapped pci device list */ -TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); - -/**< Internal use only - Macro used by pci addr parsing functions **/ -#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \ -do { \ - unsigned long val; \ - char *end; \ - errno = 0; \ - val = strtoul((in), &end, 16); \ - if (errno != 0 || end[0] != (dlm) || val > (lim)) \ - return -EINVAL; \ - (fd) = (typeof (fd))val; \ - (in) = end + 1; \ -} while(0) - -/** - * Utility function to produce a PCI Bus-Device-Function value - * given a string representation. Assumes that the BDF is provided without - * a domain prefix (i.e. domain returned is always 0) - * - * @param input - * The input string to be parsed. Should have the format XX:XX.X - * @param dev_addr - * The PCI Bus-Device-Function address to be returned. Domain will always be - * returned as 0 - * @return - * 0 on success, negative on error. - */ -static inline int -eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) -{ - dev_addr->domain = 0; - GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); - GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); - return 0; -} - -/** - * Utility function to produce a PCI Bus-Device-Function value - * given a string representation. Assumes that the BDF is provided including - * a domain prefix. - * - * @param input - * The input string to be parsed. Should have the format XXXX:XX:XX.X - * @param dev_addr - * The PCI Bus-Device-Function address to be returned - * @return - * 0 on success, negative on error. - */ -static inline int -eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) -{ - GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); - GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); - return 0; -} -#undef GET_PCIADDR_FIELD - -/** - * Utility function to write a pci device name, this device name can later be - * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* - * BDF helpers. - * - * @param addr - * The PCI Bus-Device-Function address - * @param output - * The output buffer string - * @param size - * The output buffer size - */ -static inline void -rte_pci_device_name(const struct rte_pci_addr *addr, - char *output, size_t size) -{ - RTE_VERIFY(size >= PCI_PRI_STR_SIZE); - RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, - addr->domain, addr->bus, - addr->devid, addr->function) >= 0); -} - -/* Compare two PCI device addresses. */ -/** - * Utility function to compare two PCI device addresses. - * - * @param addr - * The PCI Bus-Device-Function address to compare - * @param addr2 - * The PCI Bus-Device-Function address to compare - * @return - * 0 on equal PCI address. - * Positive on addr is greater than addr2. - * Negative on addr is less than addr2, or error. - */ -static inline int -rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, - const struct rte_pci_addr *addr2) -{ - uint64_t dev_addr, dev_addr2; - - if ((addr == NULL) || (addr2 == NULL)) - return -1; - - dev_addr = ((uint64_t)addr->domain << 24) | - (addr->bus << 16) | (addr->devid << 8) | addr->function; - dev_addr2 = ((uint64_t)addr2->domain << 24) | - (addr2->bus << 16) | (addr2->devid << 8) | addr2->function; - - if (dev_addr > dev_addr2) - return 1; - else if (dev_addr < dev_addr2) - return -1; - else - return 0; -} - -/** - * Scan the content of the PCI bus, and the devices in the devices - * list - * - * @return - * 0 on success, negative on error - */ -int rte_pci_scan(void); - -/** - * Probe the PCI bus - * - * @return - * - 0 on success. - * - !0 on error. - */ -int -rte_pci_probe(void); - -/** - * Map the PCI device resources in user space virtual memory address - * - * Note that driver should not call this function when flag - * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for - * you when it's on. - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - * - * @return - * 0 on success, negative on error and positive if no driver - * is found for the device. - */ -int rte_pci_map_device(struct rte_pci_device *dev); - -/** - * Unmap this device - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - */ -void rte_pci_unmap_device(struct rte_pci_device *dev); - -/** - * @internal - * Map a particular resource from a file. - * - * @param requested_addr - * The starting address for the new mapping range. - * @param fd - * The file descriptor. - * @param offset - * The offset for the mapping range. - * @param size - * The size for the mapping range. - * @param additional_flags - * The additional flags for the mapping range. - * @return - * - On success, the function returns a pointer to the mapped area. - * - On error, the value MAP_FAILED is returned. - */ -void *pci_map_resource(void *requested_addr, int fd, off_t offset, - size_t size, int additional_flags); - -/** - * @internal - * Unmap a particular resource. - * - * @param requested_addr - * The address for the unmapping range. - * @param size - * The size for the unmapping range. - */ -void pci_unmap_resource(void *requested_addr, size_t size); - -/** - * Probe the single PCI device. - * - * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the probe() function for registered driver that has a - * matching entry in its id_table for discovered device. - * - * @param addr - * The PCI Bus-Device-Function address to probe. - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_pci_probe_one(const struct rte_pci_addr *addr); - -/** - * Close the single PCI device. - * - * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the remove() function for registered driver that has a - * matching entry in its id_table for discovered device. - * - * @param addr - * The PCI Bus-Device-Function address to close. - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_pci_detach(const struct rte_pci_addr *addr); - -/** - * Dump the content of the PCI bus. - * - * @param f - * A pointer to a file for output - */ -void rte_pci_dump(FILE *f); - -/** - * Register a PCI driver. - * - * @param driver - * A pointer to a rte_pci_driver structure describing the driver - * to be registered. - */ -void rte_pci_register(struct rte_pci_driver *driver); - -/** Helper for PCI device registration from driver (eth, crypto) instance */ -#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ -RTE_INIT(pciinitfn_ ##nm); \ -static void pciinitfn_ ##nm(void) \ -{\ - (pci_drv).driver.name = RTE_STR(nm);\ - rte_pci_register(&pci_drv); \ -} \ -RTE_PMD_EXPORT_NAME(nm, __COUNTER__) - -/** - * Unregister a PCI driver. - * - * @param driver - * A pointer to a rte_pci_driver structure describing the driver - * to be unregistered. - */ -void rte_pci_unregister(struct rte_pci_driver *driver); - -/** - * Read PCI config space. - * - * @param device - * A pointer to a rte_pci_device structure describing the device - * to use - * @param buf - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into PCI config space - */ -int rte_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset); - -/** - * Write PCI config space. - * - * @param device - * A pointer to a rte_pci_device structure describing the device - * to use - * @param buf - * A data buffer containing the bytes should be written - * @param len - * The length of the data buffer. - * @param offset - * The offset into PCI config space - */ -int rte_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset); - -/** - * A structure used to access io resources for a pci device. - * rte_pci_ioport is arch, os, driver specific, and should not be used outside - * of pci ioport api. - */ -struct rte_pci_ioport { - struct rte_pci_device *dev; - uint64_t base; - uint64_t len; /* only filled for memory mapped ports */ -}; - -/** - * Initialize a rte_pci_ioport object for a pci device io resource. - * - * This object is then used to gain access to those io resources (see below). - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use. - * @param bar - * Index of the io pci resource we want to access. - * @param p - * The rte_pci_ioport object to be initialized. - * @return - * 0 on success, negative on error. - */ -int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); - -/** - * Release any resources used in a rte_pci_ioport object. - * - * @param p - * The rte_pci_ioport object to be uninitialized. - * @return - * 0 on success, negative on error. - */ -int rte_pci_ioport_unmap(struct rte_pci_ioport *p); - -/** - * Read from a io pci resource. - * - * @param p - * The rte_pci_ioport object from which we want to read. - * @param data - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into the pci io resource. - */ -void rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); - -/** - * Write to a io pci resource. - * - * @param p - * The rte_pci_ioport object to which we want to write. - * @param data - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into the pci io resource. - */ -void rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_PCI_H_ */ diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h index 7c6f7383..92724406 100644 --- a/lib/librte_eal/common/include/rte_service.h +++ b/lib/librte_eal/common/include/rte_service.h @@ -61,9 +61,6 @@ extern "C" { #include <rte_lcore.h> -/* forward declaration only. Definition in rte_service_private.h */ -struct rte_service_spec; - #define RTE_SERVICE_NAME_MAX 32 /* Capabilities of a service. @@ -89,40 +86,32 @@ struct rte_service_spec; */ uint32_t rte_service_get_count(void); - /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Return the specification of a service by integer id. + * Return the id of a service by name. * - * This function provides the specification of a service. This can be used by - * the application to understand what the service represents. The service - * must not be modified by the application directly, only passed to the various - * rte_service_* functions. - * - * @param id The integer id of the service to retrieve - * @retval non-zero A valid pointer to the service_spec - * @retval NULL Invalid *id* provided. - */ -struct rte_service_spec *rte_service_get_by_id(uint32_t id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * This function provides the id of the service using the service name as + * lookup key. The service id is to be passed to other functions in the + * rte_service_* API. * - * Return the specification of a service by name. - * - * This function provides the specification of a service using the service name - * as lookup key. This can be used by the application to understand what the - * service represents. The service must not be modified by the application - * directly, only passed to the various rte_service_* functions. + * Example usage: + * @code + * uint32_t service_id; + * int32_t ret = rte_service_get_by_name("service_X", &service_id); + * if (ret) { + * // handle error + * } + * @endcode * * @param name The name of the service to retrieve - * @retval non-zero A valid pointer to the service_spec - * @retval NULL Invalid *name* provided. + * @param[out] service_id A pointer to a uint32_t, to be filled in with the id. + * @retval 0 Success. The service id is provided in *service_id*. + * @retval -EINVAL Null *service_id* pointer provided + * @retval -ENODEV No such service registered */ -struct rte_service_spec *rte_service_get_by_name(const char *name); +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id); /** * @warning @@ -133,7 +122,7 @@ struct rte_service_spec *rte_service_get_by_name(const char *name); * @return A pointer to the name of the service. The returned pointer remains * in ownership of the service, and the application must not free it. */ -const char *rte_service_get_name(const struct rte_service_spec *service); +const char *rte_service_get_name(uint32_t id); /** * @warning @@ -146,17 +135,16 @@ const char *rte_service_get_name(const struct rte_service_spec *service); * @retval 1 Capability supported by this service instance * @retval 0 Capability not supported by this service instance */ -int32_t rte_service_probe_capability(const struct rte_service_spec *service, - uint32_t capability); +int32_t rte_service_probe_capability(uint32_t id, uint32_t capability); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Enable a core to run a service. + * Map or unmap a lcore to a service. * - * Each core can be added or removed from running specific services. This - * functions adds *lcore* to the set of cores that will run *service*. + * Each core can be added or removed from running a specific service. This + * function enables or disables *lcore* to run *service_id*. * * If multiple cores are enabled on a service, an atomic is used to ensure that * only one cores runs the service at a time. The exception to this is when @@ -164,82 +152,120 @@ int32_t rte_service_probe_capability(const struct rte_service_spec *service, * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set, * the service function can be run on multiple threads at the same time. * - * @retval 0 lcore added successfully + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service + * @param enable Zero to unmap or disable the core, non-zero to enable + * + * @retval 0 lcore map updated successfully * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t rte_service_enable_on_lcore(struct rte_service_spec *service, - uint32_t lcore); +int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore, + uint32_t enable); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Disable a core to run a service. + * Retrieve the mapping of an lcore to a service. * - * Each core can be added or removed from running specific services. This - * functions removes *lcore* to the set of cores that will run *service*. + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service * - * @retval 0 Lcore removed successfully + * @retval 1 lcore is mapped to service + * @retval 0 lcore is not mapped to service * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t rte_service_disable_on_lcore(struct rte_service_spec *service, - uint32_t lcore); +int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Return if an lcore is enabled for the service. + * Set the runstate of the service. * - * This function allows the application to query if *lcore* is currently set to - * run *service*. + * Each service is either running or stopped. Setting a non-zero runstate + * enables the service to run, while setting runstate zero disables it. * - * @retval 1 Lcore enabled on this lcore - * @retval 0 Lcore disabled on this lcore - * @retval -EINVAL An invalid service or lcore was provided. + * @param id The id of the service + * @param runstate The run state to apply to the service + * + * @retval 0 The service was successfully started + * @retval -EINVAL Invalid service id */ -int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, - uint32_t lcore); - +int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Enable *service* to run. - * - * This function switches on a service during runtime. - * @retval 0 The service was successfully started + * Get the runstate for the service with *id*. See *rte_service_runstate_set* + * for details of runstates. A service can call this function to ensure that + * the application has indicated that it will receive CPU cycles. Either a + * service-core is mapped (default case), or the application has explicitly + * disabled the check that a service-cores is mapped to the service and takes + * responsibility to run the service manually using the available function + * *rte_service_run_iter_on_app_lcore* to do so. + * + * @retval 1 Service is running + * @retval 0 Service is stopped + * @retval -EINVAL Invalid service id */ -int32_t rte_service_start(struct rte_service_spec *service); +int32_t rte_service_runstate_get(uint32_t id); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Disable *service*. + * Enable or disable the check for a service-core being mapped to the service. + * An application can disable the check when takes the responsibility to run a + * service itself using *rte_service_run_iter_on_app_lcore*. + * + * @param id The id of the service to set the check on + * @param enable When zero, the check is disabled. Non-zero enables the check. * - * Switch off a service, so it is not run until it is *rte_service_start* is - * called on it. - * @retval 0 Service successfully switched off + * @retval 0 Success + * @retval -EINVAL Invalid service ID */ -int32_t rte_service_stop(struct rte_service_spec *service); +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Returns if *service* is currently running. - * - * This function returns true if the service has been started using - * *rte_service_start*, AND a service core is mapped to the service. This - * function can be used to ensure that the service will be run. - * - * @retval 1 Service is currently running, and has a service lcore mapped - * @retval 0 Service is currently stopped, or no service lcore is mapped - * @retval -EINVAL Invalid service pointer provided + * This function runs a service callback from a non-service lcore. + * + * This function is designed to enable gradual porting to service cores, and + * to enable unit tests to verify a service behaves as expected. + * + * When called, this function ensures that the service identified by *id* is + * safe to run on this lcore. Multi-thread safe services are invoked even if + * other cores are simultaneously running them as they are multi-thread safe. + * + * Multi-thread unsafe services are handled depending on the variable + * *serialize_multithread_unsafe*: + * - When set, the function will check if a service is already being invoked + * on another lcore, refusing to run it and returning -EBUSY. + * - When zero, the application takes responsibility to ensure that the service + * indicated by *id* is not going to be invoked by another lcore. This setting + * avoids atomic operations, so is likely to be more performant. + * + * @param id The ID of the service to run + * @param serialize_multithread_unsafe This parameter indicates to the service + * cores library if it is required to use atomics to serialize access + * to mult-thread unsafe services. As there is an overhead in using + * atomics, applications can choose to enable or disable this feature + * + * Note that any thread calling this function MUST be a DPDK EAL thread, as + * the *rte_lcore_id* function is used to access internal data structures. + * + * @retval 0 Service was run on the calling thread successfully + * @retval -EBUSY Another lcore is executing the service, and it is not a + * multi-thread safe service, so the service was not run on this lcore + * @retval -ENOEXEC Service is not in a run-able state + * @retval -EINVAL Invalid service id */ -int32_t rte_service_is_running(const struct rte_service_spec *service); +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, + uint32_t serialize_multithread_unsafe); /** * @warning @@ -341,13 +367,12 @@ int32_t rte_service_lcore_reset_all(void); * Enable or disable statistics collection for *service*. * * This function enables per core, per-service cycle count collection. - * @param service The service to enable statistics gathering on. + * @param id The service to enable statistics gathering on. * @param enable Zero to disable statistics, non-zero to enable. * @retval 0 Success * @retval -EINVAL Invalid service pointer passed */ -int32_t rte_service_set_stats_enable(struct rte_service_spec *service, - int32_t enable); +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable); /** * @warning @@ -374,10 +399,26 @@ int32_t rte_service_lcore_list(uint32_t array[], uint32_t n); * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Dumps any information available about the service. If service is NULL, - * dumps info for all services. + * Get the numer of services running on the supplied lcore. + * + * @param lcore Id of the service core. + * @retval >=0 Number of services registered to this core. + * @retval -EINVAL Invalid lcore provided + * @retval -ENOTSUP The provided lcore is not a service core. + */ +int32_t rte_service_lcore_count_services(uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Dumps any information available about the service. When id is UINT32_MAX, + * this function dumps info for all services. + * + * @retval 0 Statistics have been successfully dumped + * @retval -EINVAL Invalid service id provided */ -int32_t rte_service_dump(FILE *f, struct rte_service_spec *service); +int32_t rte_service_dump(FILE *f, uint32_t id); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h index 7a946a1e..ac965cb4 100644 --- a/lib/librte_eal/common/include/rte_service_component.h +++ b/lib/librte_eal/common/include/rte_service_component.h @@ -85,21 +85,30 @@ struct rte_service_spec { * * For example the eventdev SW PMD requires CPU cycles to perform its * scheduling. This can be achieved by registering it as a service, and the - * application can then assign CPU resources to it using - * *rte_service_set_coremask*. + * application can then assign CPU resources to that service. + * + * Note that when a service component registers itself, it is not permitted to + * add or remove service-core threads, or modify lcore-to-service mappings. The + * only API that may be called by the service-component is + * *rte_service_component_runstate_set*, which indicates that the service + * component is ready to be executed. * * @param spec The specification of the service to register + * @param[out] service_id A pointer to a uint32_t, which will be filled in + * during registration of the service. It is set to the integers + * service number given to the service. This parameter may be NULL. * @retval 0 Successfully registered the service. * -EINVAL Attempted to register an invalid service (eg, no callback * set) */ -int32_t rte_service_register(const struct rte_service_spec *spec); +int32_t rte_service_component_register(const struct rte_service_spec *spec, + uint32_t *service_id); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Unregister a service. + * Unregister a service component. * * The service being removed must be stopped before calling this function. * @@ -107,7 +116,7 @@ int32_t rte_service_register(const struct rte_service_spec *spec); * @retval -EBUSY The service is currently running, stop the service before * calling unregister. No action has been taken. */ -int32_t rte_service_unregister(struct rte_service_spec *service); +int32_t rte_service_component_unregister(uint32_t id); /** * @warning @@ -131,6 +140,23 @@ int32_t rte_service_start_with_defaults(void); * @warning * @b EXPERIMENTAL: this API may change without prior notice * + * Set the backend runstate of a component. + * + * This function allows services to be registered at startup, but not yet + * enabled to run by default. When the service has been configured (via the + * usual method; eg rte_eventdev_configure, the service can mark itself as + * ready to run. The differentiation between backend runstate and + * service_runstate is that the backend runstate is set by the service + * component while the service runstate is reserved for application usage. + * + * @retval 0 Success + */ +int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * * Initialize the service library. * * In order to use the service library, it must be initialized. EAL initializes diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h deleted file mode 100644 index 29f5a523..00000000 --- a/lib/librte_eal/common/include/rte_vdev.h +++ /dev/null @@ -1,131 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_VDEV_H -#define RTE_VDEV_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/queue.h> -#include <rte_dev.h> -#include <rte_devargs.h> - -struct rte_vdev_device { - TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */ - struct rte_device device; /**< Inherit core device */ -}; - -/** - * @internal - * Helper macro for drivers that need to convert to struct rte_vdev_device. - */ -#define RTE_DEV_TO_VDEV(ptr) \ - container_of(ptr, struct rte_vdev_device, device) - -static inline const char * -rte_vdev_device_name(const struct rte_vdev_device *dev) -{ - if (dev && dev->device.name) - return dev->device.name; - return NULL; -} - -static inline const char * -rte_vdev_device_args(const struct rte_vdev_device *dev) -{ - if (dev && dev->device.devargs) - return dev->device.devargs->args; - return ""; -} - -/** Double linked list of virtual device drivers. */ -TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); - -/** - * Probe function called for each virtual device driver once. - */ -typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev); - -/** - * Remove function called for each virtual device driver once. - */ -typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev); - -/** - * A virtual device driver abstraction. - */ -struct rte_vdev_driver { - TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */ - struct rte_driver driver; /**< Inherited general driver. */ - rte_vdev_probe_t *probe; /**< Virtual device probe function. */ - rte_vdev_remove_t *remove; /**< Virtual device remove function. */ -}; - -/** - * Register a virtual device driver. - * - * @param driver - * A pointer to a rte_vdev_driver structure describing the driver - * to be registered. - */ -void rte_vdev_register(struct rte_vdev_driver *driver); - -/** - * Unregister a virtual device driver. - * - * @param driver - * A pointer to a rte_vdev_driver structure describing the driver - * to be unregistered. - */ -void rte_vdev_unregister(struct rte_vdev_driver *driver); - -#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ -RTE_INIT(vdrvinitfn_ ##vdrv);\ -static const char *vdrvinit_ ## nm ## _alias;\ -static void vdrvinitfn_ ##vdrv(void)\ -{\ - (vdrv).driver.name = RTE_STR(nm);\ - (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ - rte_vdev_register(&vdrv);\ -} \ -RTE_PMD_EXPORT_NAME(nm, __COUNTER__) - -#define RTE_PMD_REGISTER_ALIAS(nm, alias)\ -static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index a69a7075..d08cf48a 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -61,7 +61,7 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 8 +#define RTE_VER_MONTH 11 /** * Patch level number i.e. the z in yy.mm.z @@ -71,14 +71,14 @@ extern "C" { /** * Extra string to be appended to version number */ -#define RTE_VER_SUFFIX "" +#define RTE_VER_SUFFIX "-rc" /** * Patch release number * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 16 +#define RTE_VER_RELEASE 3 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h new file mode 100644 index 00000000..a69c4ff6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 6WIND S.A. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VFIO_H_ +#define _RTE_VFIO_H_ + +/* + * determine if VFIO is present on the system + */ +#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#define VFIO_PRESENT +#endif /* kernel version >= 3.6.0 */ +#endif /* RTE_EAL_VFIO */ + +#ifdef VFIO_PRESENT + +#include <linux/vfio.h> + +#define VFIO_DIR "/dev/vfio" +#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define VFIO_GROUP_FMT "/dev/vfio/%u" +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" +#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) +#define VFIO_GET_REGION_IDX(x) (x >> 40) +#define VFIO_NOIOMMU_MODE \ + "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" + +/** + * Setup vfio_cfg for the device identified by its address. + * It discovers the configured I/O MMU groups or sets a new one for the device. + * If a new groups is assigned, the DMA mapping is performed. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param vfio_dev_fd + * VFIO fd. + * + * @param device_info + * Device information. + * + * @return + * 0 on success. + * <0 on failure. + * >1 if the device cannot be managed this way. + */ +int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, + int *vfio_dev_fd, struct vfio_device_info *device_info); + +/** + * Release a device mapped to a VFIO-managed I/O MMU group. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param fd + * VFIO fd. + * + * @return + * 0 on success. + * <0 on failure. + */ +int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); + +/** + * Enable a VFIO-related kmod. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * 0 on success. + * <0 on failure. + */ +int rte_vfio_enable(const char *modname); + +/** + * Check whether a VFIO-related kmod is enabled. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * !0 if true. + * 0 otherwise. + */ +int rte_vfio_is_enabled(const char *modname); + +/** + * Whether VFIO NOIOMMU mode is enabled. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @return + * !0 if true. + * 0 otherwise. + */ +int rte_vfio_noiommu_is_enabled(void); + +#endif /* VFIO_PRESENT */ + +#endif /* _RTE_VFIO_H_ */ diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 15076905..889dffd2 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -275,14 +275,14 @@ malloc_elem_free(struct malloc_elem *elem) return -1; rte_spinlock_lock(&(elem->heap->lock)); - size_t sz = elem->size - sizeof(*elem); + size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN; uint8_t *ptr = (uint8_t *)&elem[1]; struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); if (next->state == ELEM_FREE){ /* remove from free list, join to this one */ elem_free_list_remove(next); join_elem(elem, next); - sz += sizeof(*elem); + sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); } /* check if previous element is free, if so join with it and return, @@ -291,8 +291,8 @@ malloc_elem_free(struct malloc_elem *elem) if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { elem_free_list_remove(elem->prev); join_elem(elem->prev, elem); - sz += sizeof(*elem); - ptr -= sizeof(*elem); + sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); + ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); elem = elem->prev; } malloc_elem_free_list_insert(elem); diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index f04b2d1e..ce39129d 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -53,13 +53,13 @@ struct malloc_elem { volatile enum elem_state state; uint32_t pad; size_t size; -#ifdef RTE_LIBRTE_MALLOC_DEBUG +#ifdef RTE_MALLOC_DEBUG uint64_t header_cookie; /* Cookie marking start of data */ /* trailer cookie at start + size */ #endif } __rte_cache_aligned; -#ifndef RTE_LIBRTE_MALLOC_DEBUG +#ifndef RTE_MALLOC_DEBUG static const unsigned MALLOC_ELEM_TRAILER_LEN = 0; /* dummy function - just check if pointer is non-null */ diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 5c0627bf..fe2278bc 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -246,15 +246,22 @@ rte_malloc_set_limit(__rte_unused const char *type, } /* - * Return the physical address of a virtual address obtained through rte_malloc + * Return the IO address of a virtual address obtained through rte_malloc */ -phys_addr_t -rte_malloc_virt2phy(const void *addr) +rte_iova_t +rte_malloc_virt2iova(const void *addr) { + rte_iova_t iova; const struct malloc_elem *elem = malloc_elem_from_data(addr); if (elem == NULL) - return RTE_BAD_PHYS_ADDR; - if (elem->ms->phys_addr == RTE_BAD_PHYS_ADDR) - return RTE_BAD_PHYS_ADDR; - return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); + return RTE_BAD_IOVA; + if (elem->ms->iova == RTE_BAD_IOVA) + return RTE_BAD_IOVA; + + if (rte_eal_iova_mode() == RTE_IOVA_VA) + iova = (uintptr_t)addr; + else + iova = elem->ms->iova + + RTE_PTR_DIFF(addr, elem->ms->addr); + return iova; } diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index 7efb76dc..09b758c9 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -54,6 +54,7 @@ #define SERVICE_F_REGISTERED (1 << 0) #define SERVICE_F_STATS_ENABLED (1 << 1) +#define SERVICE_F_START_CHECK (1 << 2) /* runstates for services and lcores, denoting if they are active or not */ #define RUNSTATE_STOPPED 0 @@ -71,11 +72,12 @@ struct rte_service_spec_impl { rte_atomic32_t execute_lock; /* API set/get-able variables */ - int32_t runstate; + int8_t app_runstate; + int8_t comp_runstate; uint8_t internal_flags; /* per service statistics */ - uint32_t num_mapped_cores; + rte_atomic32_t num_mapped_cores; uint64_t calls; uint64_t cycles_spent; } __rte_cache_aligned; @@ -144,6 +146,13 @@ service_valid(uint32_t id) return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED); } +/* validate ID and retrieve service pointer, or return error value */ +#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \ + if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \ + return retval; \ + service = &rte_services[id]; \ +} while (0) + /* returns 1 if statistics should be colleced for service * Returns 0 if statistics should not be collected for service */ @@ -156,21 +165,31 @@ service_stats_enabled(struct rte_service_spec_impl *impl) static inline int service_mt_safe(struct rte_service_spec_impl *s) { - return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE; + return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE); } -int32_t rte_service_set_stats_enable(struct rte_service_spec *service, - int32_t enabled) +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enabled) { - struct rte_service_spec_impl *impl = - (struct rte_service_spec_impl *)service; - if (!impl) - return -EINVAL; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, 0); if (enabled) - impl->internal_flags |= SERVICE_F_STATS_ENABLED; + s->internal_flags |= SERVICE_F_STATS_ENABLED; else - impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED); + s->internal_flags &= ~(SERVICE_F_STATS_ENABLED); + + return 0; +} + +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled) +{ + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, 0); + + if (enabled) + s->internal_flags |= SERVICE_F_START_CHECK; + else + s->internal_flags &= ~(SERVICE_F_START_CHECK); return 0; } @@ -181,58 +200,42 @@ rte_service_get_count(void) return rte_service_count; } -struct rte_service_spec * -rte_service_get_by_id(uint32_t id) +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id) { - struct rte_service_spec *service = NULL; - if (id < rte_service_count) - service = (struct rte_service_spec *)&rte_services[id]; - - return service; -} + if (!service_id) + return -EINVAL; -struct rte_service_spec *rte_service_get_by_name(const char *name) -{ - struct rte_service_spec *service = NULL; int i; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { if (service_valid(i) && strcmp(name, rte_services[i].spec.name) == 0) { - service = (struct rte_service_spec *)&rte_services[i]; - break; + *service_id = i; + return 0; } } - return service; + return -ENODEV; } const char * -rte_service_get_name(const struct rte_service_spec *service) +rte_service_get_name(uint32_t id) { - return service->name; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, 0); + return s->spec.name; } int32_t -rte_service_probe_capability(const struct rte_service_spec *service, - uint32_t capability) +rte_service_probe_capability(uint32_t id, uint32_t capability) { - return service->capabilities & capability; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + return !!(s->spec.capabilities & capability); } int32_t -rte_service_is_running(const struct rte_service_spec *spec) -{ - const struct rte_service_spec_impl *impl = - (const struct rte_service_spec_impl *)spec; - if (!impl) - return -EINVAL; - - return (impl->runstate == RUNSTATE_RUNNING) && - (impl->num_mapped_cores > 0); -} - -int32_t -rte_service_register(const struct rte_service_spec *spec) +rte_service_component_register(const struct rte_service_spec *spec, + uint32_t *id_ptr) { uint32_t i; int32_t free_slot = -1; @@ -252,68 +255,161 @@ rte_service_register(const struct rte_service_spec *spec) struct rte_service_spec_impl *s = &rte_services[free_slot]; s->spec = *spec; - s->internal_flags |= SERVICE_F_REGISTERED; + s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK; rte_smp_wmb(); rte_service_count++; + if (id_ptr) + *id_ptr = free_slot; + return 0; } int32_t -rte_service_unregister(struct rte_service_spec *spec) +rte_service_component_unregister(uint32_t id) { - struct rte_service_spec_impl *s = NULL; - struct rte_service_spec_impl *spec_impl = - (struct rte_service_spec_impl *)spec; - uint32_t i; - uint32_t service_id; - for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - if (&rte_services[i] == spec_impl) { - s = spec_impl; - service_id = i; - break; - } - } - - if (!s) - return -EINVAL; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); rte_service_count--; rte_smp_wmb(); s->internal_flags &= ~(SERVICE_F_REGISTERED); + /* clear the run-bit in all cores */ for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id); + lcore_states[i].service_mask &= ~(UINT64_C(1) << id); - memset(&rte_services[service_id], 0, - sizeof(struct rte_service_spec_impl)); + memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl)); return 0; } int32_t -rte_service_start(struct rte_service_spec *service) +rte_service_component_runstate_set(uint32_t id, uint32_t runstate) { - struct rte_service_spec_impl *s = - (struct rte_service_spec_impl *)service; - s->runstate = RUNSTATE_RUNNING; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + if (runstate) + s->comp_runstate = RUNSTATE_RUNNING; + else + s->comp_runstate = RUNSTATE_STOPPED; + rte_smp_wmb(); return 0; } int32_t -rte_service_stop(struct rte_service_spec *service) +rte_service_runstate_set(uint32_t id, uint32_t runstate) { - struct rte_service_spec_impl *s = - (struct rte_service_spec_impl *)service; - s->runstate = RUNSTATE_STOPPED; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + if (runstate) + s->app_runstate = RUNSTATE_RUNNING; + else + s->app_runstate = RUNSTATE_STOPPED; + rte_smp_wmb(); return 0; } +int32_t +rte_service_runstate_get(uint32_t id) +{ + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + rte_smp_rmb(); + + int check_disabled = !(s->internal_flags & SERVICE_F_START_CHECK); + int lcore_mapped = (rte_atomic32_read(&s->num_mapped_cores) > 0); + + return (s->app_runstate == RUNSTATE_RUNNING) && + (s->comp_runstate == RUNSTATE_RUNNING) && + (check_disabled | lcore_mapped); +} + +static inline void +rte_service_runner_do_callback(struct rte_service_spec_impl *s, + struct core_state *cs, uint32_t service_idx) +{ + void *userdata = s->spec.callback_userdata; + + if (service_stats_enabled(s)) { + uint64_t start = rte_rdtsc(); + s->spec.callback(userdata); + uint64_t end = rte_rdtsc(); + s->cycles_spent += end - start; + cs->calls_per_service[service_idx]++; + s->calls++; + } else + s->spec.callback(userdata); +} + + +static inline int32_t +service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) +{ + if (!service_valid(i)) + return -EINVAL; + struct rte_service_spec_impl *s = &rte_services[i]; + if (s->comp_runstate != RUNSTATE_RUNNING || + s->app_runstate != RUNSTATE_RUNNING || + !(service_mask & (UINT64_C(1) << i))) + return -ENOEXEC; + + /* check do we need cmpset, if MT safe or <= 1 core + * mapped, atomic ops are not required. + */ + const int use_atomics = (service_mt_safe(s) == 0) && + (rte_atomic32_read(&s->num_mapped_cores) > 1); + if (use_atomics) { + if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1)) + return -EBUSY; + + rte_service_runner_do_callback(s, cs, i); + rte_atomic32_clear(&s->execute_lock); + } else + rte_service_runner_do_callback(s, cs, i); + + return 0; +} + +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, + uint32_t serialize_mt_unsafe) +{ + /* run service on calling core, using all-ones as the service mask */ + if (!service_valid(id)) + return -EINVAL; + + struct core_state *cs = &lcore_states[rte_lcore_id()]; + struct rte_service_spec_impl *s = &rte_services[id]; + + /* Atomically add this core to the mapped cores first, then examine if + * we can run the service. This avoids a race condition between + * checking the value, and atomically adding to the mapped count. + */ + if (serialize_mt_unsafe) + rte_atomic32_inc(&s->num_mapped_cores); + + if (service_mt_safe(s) == 0 && + rte_atomic32_read(&s->num_mapped_cores) > 1) { + if (serialize_mt_unsafe) + rte_atomic32_dec(&s->num_mapped_cores); + return -EBUSY; + } + + int ret = service_run(id, cs, UINT64_MAX); + + if (serialize_mt_unsafe) + rte_atomic32_dec(&s->num_mapped_cores); + + return ret; +} + static int32_t rte_service_runner_func(void *arg) { @@ -324,35 +420,10 @@ rte_service_runner_func(void *arg) while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) { const uint64_t service_mask = cs->service_mask; - for (i = 0; i < rte_service_count; i++) { - struct rte_service_spec_impl *s = &rte_services[i]; - if (s->runstate != RUNSTATE_RUNNING || - !(service_mask & (UINT64_C(1) << i))) - continue; - /* check do we need cmpset, if MT safe or <= 1 core - * mapped, atomic ops are not required. - */ - const int need_cmpset = !((service_mt_safe(s) == 0) && - (s->num_mapped_cores > 1)); - uint32_t *lock = (uint32_t *)&s->execute_lock; - - if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) { - void *userdata = s->spec.callback_userdata; - - if (service_stats_enabled(s)) { - uint64_t start = rte_rdtsc(); - s->spec.callback(userdata); - uint64_t end = rte_rdtsc(); - s->cycles_spent += end - start; - cs->calls_per_service[i]++; - s->calls++; - } else - s->spec.callback(userdata); - - if (need_cmpset) - rte_atomic32_clear(&s->execute_lock); - } + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + /* return value ignored as no change to code flow */ + service_run(i, cs, service_mask); } rte_smp_rmb(); @@ -397,6 +468,19 @@ rte_service_lcore_list(uint32_t array[], uint32_t n) } int32_t +rte_service_lcore_count_services(uint32_t lcore) +{ + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + struct core_state *cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -ENOTSUP; + + return __builtin_popcountll(cs->service_mask); +} + +int32_t rte_service_start_with_defaults(void) { /* create a default mapping from cores to services, then start the @@ -407,7 +491,7 @@ rte_service_start_with_defaults(void) uint32_t count = rte_service_get_count(); int32_t lcore_iter = 0; - uint32_t ids[RTE_MAX_LCORE]; + uint32_t ids[RTE_MAX_LCORE] = {0}; int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); if (lcore_count == 0) @@ -417,16 +501,12 @@ rte_service_start_with_defaults(void) rte_service_lcore_start(ids[i]); for (i = 0; i < count; i++) { - struct rte_service_spec *s = rte_service_get_by_id(i); - if (!s) - return -EINVAL; - /* do 1:1 core mapping here, with each service getting * assigned a single core by default. Adding multiple services * should multiplex to a single core, or 1:1 if there are the * same amount of services as service-cores */ - ret = rte_service_enable_on_lcore(s, ids[lcore_iter]); + ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1); if (ret) return -ENODEV; @@ -434,7 +514,7 @@ rte_service_start_with_defaults(void) if (lcore_iter >= lcore_count) lcore_iter = 0; - ret = rte_service_start(s); + ret = rte_service_runstate_set(i, 1); if (ret) return -ENOEXEC; } @@ -467,43 +547,40 @@ service_update(struct rte_service_spec *service, uint32_t lcore, if (set) { if (*set) { lcore_states[lcore].service_mask |= sid_mask; - rte_services[sid].num_mapped_cores++; + rte_atomic32_inc(&rte_services[sid].num_mapped_cores); } else { lcore_states[lcore].service_mask &= ~(sid_mask); - rte_services[sid].num_mapped_cores--; + rte_atomic32_dec(&rte_services[sid].num_mapped_cores); } } if (enabled) - *enabled = (lcore_states[lcore].service_mask & (sid_mask)); + *enabled = !!(lcore_states[lcore].service_mask & (sid_mask)); rte_smp_wmb(); return 0; } -int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, - uint32_t lcore) -{ - uint32_t enabled; - int ret = service_update(service, lcore, 0, &enabled); - if (ret == 0) - return enabled; - return -EINVAL; -} - int32_t -rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore) +rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled) { - uint32_t on = 1; - return service_update(service, lcore, &on, 0); + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + uint32_t on = enabled > 0; + return service_update(&s->spec, lcore, &on, 0); } int32_t -rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore) +rte_service_map_lcore_get(uint32_t id, uint32_t lcore) { - uint32_t off = 0; - return service_update(service, lcore, &off, 0); + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + uint32_t enabled; + int ret = service_update(&s->spec, lcore, 0, &enabled); + if (ret == 0) + return enabled; + return ret; } int32_t rte_service_lcore_reset_all(void) @@ -516,7 +593,7 @@ int32_t rte_service_lcore_reset_all(void) lcore_states[i].runstate = RUNSTATE_STOPPED; } for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) - rte_services[i].num_mapped_cores = 0; + rte_atomic32_set(&rte_services[i].num_mapped_cores, 0); rte_smp_wmb(); @@ -552,7 +629,8 @@ rte_service_lcore_add(uint32_t lcore) lcore_states[lcore].runstate = RUNSTATE_STOPPED; rte_smp_wmb(); - return 0; + + return rte_eal_wait_lcore(lcore); } int32_t @@ -607,12 +685,12 @@ rte_service_lcore_stop(uint32_t lcore) return -EALREADY; uint32_t i; + uint64_t service_mask = lcore_states[lcore].service_mask; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - int32_t enabled = - lcore_states[i].service_mask & (UINT64_C(1) << i); - int32_t service_running = rte_services[i].runstate != - RUNSTATE_STOPPED; - int32_t only_core = rte_services[i].num_mapped_cores == 1; + int32_t enabled = service_mask & (UINT64_C(1) << i); + int32_t service_running = rte_service_runstate_get(i); + int32_t only_core = (1 == + rte_atomic32_read(&rte_services[i].num_mapped_cores)); /* if the core is mapped, and the service is running, and this * is the only core that is mapped, the service would cease to @@ -667,28 +745,34 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset) fprintf(f, "\n"); } -int32_t rte_service_dump(FILE *f, struct rte_service_spec *service) +int32_t rte_service_dump(FILE *f, uint32_t id) { uint32_t i; + int print_one = (id != UINT32_MAX); uint64_t total_cycles = 0; - for (i = 0; i < rte_service_count; i++) { + + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { if (!service_valid(i)) continue; total_cycles += rte_services[i].cycles_spent; } - if (service) { - struct rte_service_spec_impl *s = - (struct rte_service_spec_impl *)service; + /* print only the specified service */ + if (print_one) { + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); fprintf(f, "Service %s Summary\n", s->spec.name); uint32_t reset = 0; rte_service_dump_one(f, s, total_cycles, reset); return 0; } + /* print all services, as UINT32_MAX was passed as id */ fprintf(f, "Services Summary\n"); - for (i = 0; i < rte_service_count; i++) { + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (!service_valid(i)) + continue; uint32_t reset = 1; rte_service_dump_one(f, &rte_services[i], total_cycles, reset); } @@ -698,7 +782,7 @@ int32_t rte_service_dump(FILE *f, struct rte_service_spec *service) if (lcore_config[i].core_role != ROLE_SERVICE) continue; - uint32_t reset = 0; + uint32_t reset = 1; service_dump_calls_per_lcore(f, i, reset); } diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index 4794696b..2ebdf313 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -35,7 +35,5 @@ DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio DIRS-$(CONFIG_RTE_KNI_KMOD) += kni DEPDIRS-kni := eal -DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0 -DEPDIRS-xen_dom0 := eal include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 90bca4d6..5a7b8b2a 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_eal.a ARCH_DIR ?= $(RTE_ARCH) -EXPORT_MAP := rte_eal_version.map +EXPORT_MAP := ../../rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 5 +LIBABIVER := 6 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -58,16 +58,10 @@ endif SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c -ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c -endif SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c @@ -80,9 +74,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c @@ -104,6 +95,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c +SRCS-y += rte_cycles.c CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) @@ -116,13 +108,11 @@ CFLAGS_eal_thread.o := -D_GNU_SOURCE CFLAGS_eal_log.o := -D_GNU_SOURCE CFLAGS_eal_common_log.o := -D_GNU_SOURCE CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE -CFLAGS_eal_pci.o := -D_GNU_SOURCE -CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE -CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE CFLAGS_eal_common_options.o := -D_GNU_SOURCE CFLAGS_eal_common_thread.o := -D_GNU_SOURCE CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE +CFLAGS_rte_cycles.o := -D_GNU_SOURCE # workaround for a gcc bug with noreturn attribute # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 @@ -130,7 +120,7 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif -INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h +INC := rte_kni_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 48f12f44..229eec9f 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -56,7 +56,6 @@ #include <rte_common.h> #include <rte_debug.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -71,12 +70,12 @@ #include <rte_cpuflags.h> #include <rte_interrupts.h> #include <rte_bus.h> -#include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> #include <rte_version.h> #include <rte_atomic.h> #include <malloc_heap.h> +#include <rte_vfio.h> #include "eal_private.h" #include "eal_thread.h" @@ -121,6 +120,13 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; +/* Return mbuf pool ops name */ +const char * +rte_eal_mbuf_default_mempool_ops(void) +{ + return internal_config.mbuf_pool_ops_name; +} + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -128,6 +134,12 @@ rte_eal_get_configuration(void) return &rte_config; } +enum rte_iova_mode +rte_eal_iova_mode(void) +{ + return rte_eal_get_configuration()->iova_mode; +} + /* parse a sysfs (or other) file containing one integer value */ int eal_parse_sysfs_value(const char *filename, unsigned long *val) @@ -354,7 +366,6 @@ eal_usage(const char *prgname) " --"OPT_BASE_VIRTADDR" Base virtual address\n" " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" - " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" "\n"); /* Allow the application to print its usage message too if hook is set */ if ( rte_application_usage_hook ) { @@ -555,25 +566,12 @@ eal_parse_args(int argc, char **argv) eal_usage(prgname); exit(EXIT_SUCCESS); - /* long options */ - case OPT_XEN_DOM0_NUM: -#ifdef RTE_LIBRTE_XEN_DOM0 - internal_config.xen_dom0_support = 1; -#else - RTE_LOG(ERR, EAL, "Can't support DPDK app " - "running on Dom0, please configure" - " RTE_LIBRTE_XEN_DOM0=y\n"); - ret = -1; - goto out; -#endif - break; - case OPT_HUGE_DIR_NUM: - internal_config.hugepage_dir = optarg; + internal_config.hugepage_dir = strdup(optarg); break; case OPT_FILE_PREFIX_NUM: - internal_config.hugefile_prefix = optarg; + internal_config.hugefile_prefix = strdup(optarg); break; case OPT_SOCKET_MEM_NUM: @@ -610,6 +608,10 @@ eal_parse_args(int argc, char **argv) internal_config.create_uio_dev = 1; break; + case OPT_MBUF_POOL_OPS_NAME_NUM: + internal_config.mbuf_pool_ops_name = optarg; + break; + default: if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { RTE_LOG(ERR, EAL, "Option %c is not supported " @@ -641,15 +643,6 @@ eal_parse_args(int argc, char **argv) goto out; } - /* --xen-dom0 doesn't make sense with --socket-mem */ - if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { - RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " - "together with --"OPT_XEN_DOM0"\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - if (optind >= 0) argv[optind-1] = prgname; ret = optind-1; @@ -716,10 +709,9 @@ static int rte_eal_vfio_setup(void) { int vfio_enabled = 0; - if (!internal_config.no_pci) { - pci_vfio_enable(); - vfio_enabled |= pci_vfio_is_enabled(); - } + if (rte_vfio_enable("vfio")) + return -1; + vfio_enabled = rte_vfio_is_enabled("vfio"); if (vfio_enabled) { @@ -792,9 +784,40 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins\n"); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_option_device_parse()) { + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + /* autodetect the iova mapping mode (default is iova_pa) */ + rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); + + /* Workaround for KNI which requires physical address to work */ + if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA && + rte_eal_check_module("rte_kni") == 1) { + rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA; + RTE_LOG(WARNING, EAL, + "Some devices want IOVA as VA but PA will be used because.. " + "KNI module inserted\n"); + } + if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && - internal_config.xen_dom0_support == 0 && eal_hugepage_info_init() < 0) { rte_eal_init_alert("Cannot get hugepage information."); rte_errno = EACCES; @@ -873,9 +896,6 @@ rte_eal_init(int argc, char **argv) eal_check_mem_on_local_socket(); - if (eal_plugins_init() < 0) - rte_eal_init_alert("Cannot init plugins\n"); - eal_thread_init_master(rte_config.master_lcore); ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); @@ -889,17 +909,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (eal_option_device_parse()) { - rte_errno = ENODEV; - return -1; - } - - if (rte_bus_scan()) { - rte_eal_init_alert("Cannot scan the buses for devices\n"); - rte_errno = ENODEV; - return -1; - } - RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -983,6 +992,22 @@ int rte_eal_has_hugepages(void) return ! internal_config.no_hugetlbfs; } +int rte_eal_has_pci(void) +{ + return !internal_config.no_pci; +} + +int rte_eal_create_uio_dev(void) +{ + return internal_config.create_uio_dev; +} + +enum rte_intr_mode +rte_eal_vfio_intr_mode(void) +{ + return internal_config.vfio_intr_mode; +} + int rte_eal_check_module(const char *module_name) { diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index fbae4613..8e4a775b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -40,7 +40,6 @@ #include <sys/timerfd.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_interrupts.h> #include <rte_alarm.h> #include <rte_common.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 7a21e8f6..86e174fc 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -46,7 +46,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_launch.h> #include <rte_per_lcore.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 3e9ac41e..1c20693d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -51,7 +51,6 @@ #include <rte_common.h> #include <rte_interrupts.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_per_lcore.h> @@ -60,7 +59,6 @@ #include <rte_branch_prediction.h> #include <rte_debug.h> #include <rte_log.h> -#include <rte_pci.h> #include <rte_malloc.h> #include <rte_errno.h> #include <rte_spinlock.h> @@ -914,7 +912,7 @@ static void eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) { union rte_intr_read_buffer buf; - int bytes_read = 1; + int bytes_read = 0; int nbytes; switch (intr_handle->type) { @@ -930,11 +928,9 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) break; #endif case RTE_INTR_HANDLE_VDEV: - /* for vdev, fd points to: - * a. eventfd which does not need to read out; - * b. datapath fd which needs PMD to read out. - */ - return; + bytes_read = intr_handle->efd_counter_size; + /* For vdev, number of bytes to read is set by driver */ + break; case RTE_INTR_HANDLE_EXT: return; default: @@ -947,6 +943,8 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) * read out to clear the ready-to-be-read flag * for epoll_wait. */ + if (bytes_read == 0) + return; do { nbytes = read(fd, &buf, bytes_read); if (nbytes < 0) { @@ -1206,7 +1204,12 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) intr_handle->nb_efd = n; intr_handle->max_intr = NB_OTHER_INTR + n; } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { - /* do nothing, and let vdev driver to initialize this struct */ + /* only check, initialization would be done in vdev driver.*/ + if (intr_handle->efd_counter_size > + sizeof(union rte_intr_read_buffer)) { + RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); + return -EINVAL; + } } else { intr_handle->efds[0] = intr_handle->fd; intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index e3a50aa3..c088bd9b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -39,7 +39,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_launch.h> #include <rte_per_lcore.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 52791282..a54b822a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -59,7 +59,6 @@ #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -75,13 +74,6 @@ #define PFN_MASK_SIZE 8 -#ifdef RTE_LIBRTE_XEN_DOM0 -int rte_xen_dom0_supported(void) -{ - return internal_config.xen_dom0_support; -} -#endif - /** * @file * Huge page mapping under linux @@ -106,10 +98,6 @@ test_phys_addrs_available(void) uint64_t tmp; phys_addr_t physaddr; - /* For dom0, phys addresses can always be available */ - if (rte_xen_dom0_supported()) - return; - if (!rte_eal_has_hugepages()) { RTE_LOG(ERR, EAL, "Started without hugepages support, physical addresses not available\n"); @@ -119,10 +107,11 @@ test_phys_addrs_available(void) physaddr = rte_mem_virt2phy(&tmp); if (physaddr == RTE_BAD_PHYS_ADDR) { - RTE_LOG(ERR, EAL, - "Cannot obtain physical addresses: %s. " - "Only vfio will function.\n", - strerror(errno)); + if (rte_eal_iova_mode() == RTE_IOVA_PA) + RTE_LOG(ERR, EAL, + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", + strerror(errno)); phys_addrs_available = false; } } @@ -139,32 +128,9 @@ rte_mem_virt2phy(const void *virtaddr) int page_size; off_t offset; - /* when using dom0, /proc/self/pagemap always returns 0, check in - * dpdk memory by browsing the memsegs */ - if (rte_xen_dom0_supported()) { - struct rte_mem_config *mcfg; - struct rte_memseg *memseg; - unsigned i; - - mcfg = rte_eal_get_configuration()->mem_config; - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - memseg = &mcfg->memseg[i]; - if (memseg->addr == NULL) - break; - if (virtaddr > memseg->addr && - virtaddr < RTE_PTR_ADD(memseg->addr, - memseg->len)) { - return memseg->phys_addr + - RTE_PTR_DIFF(virtaddr, memseg->addr); - } - } - - return RTE_BAD_PHYS_ADDR; - } - /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ if (!phys_addrs_available) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; /* standard page size */ page_size = getpagesize(); @@ -173,7 +139,7 @@ rte_mem_virt2phy(const void *virtaddr) if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } virt_pfn = (unsigned long)virtaddr / page_size; @@ -182,7 +148,7 @@ rte_mem_virt2phy(const void *virtaddr) RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } retval = read(fd, &page, PFN_MASK_SIZE); @@ -190,12 +156,12 @@ rte_mem_virt2phy(const void *virtaddr) if (retval < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } else if (retval != PFN_MASK_SIZE) { RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap " "but expected %d:\n", __func__, retval, PFN_MASK_SIZE); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } /* @@ -203,7 +169,7 @@ rte_mem_virt2phy(const void *virtaddr) * pagemap.txt in linux Documentation) */ if ((page & 0x7fffffffffffffULL) == 0) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -211,6 +177,14 @@ rte_mem_virt2phy(const void *virtaddr) return physaddr; } +rte_iova_t +rte_mem_virt2iova(const void *virtaddr) +{ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + return (uintptr_t)virtaddr; + return rte_mem_virt2phy(virtaddr); +} + /* * For each hugepage in hugepg_tbl, fill the physaddr value. We find * it by browsing the /proc/self/pagemap special file. @@ -716,6 +690,8 @@ create_shared_memory(const char *filename, const size_t mem_size) } retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); + if (retval == MAP_FAILED) + return NULL; return retval; } @@ -1059,7 +1035,10 @@ rte_eal_hugepage_init(void) strerror(errno)); return -1; } - mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + mcfg->memseg[0].iova = (uintptr_t)addr; + else + mcfg->memseg[0].iova = RTE_BAD_IOVA; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -1067,17 +1046,6 @@ rte_eal_hugepage_init(void) return 0; } -/* check if app runs on Xen Dom0 */ - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - /* use dom0_mm kernel driver to init memory */ - if (rte_xen_dom0_memory_init() < 0) - return -1; - else - return 0; -#endif - } - /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1319,7 +1287,7 @@ rte_eal_hugepage_init(void) if (j == RTE_MAX_MEMSEG) break; - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; mcfg->memseg[j].len = hugepage[i].size; mcfg->memseg[j].socket_id = hugepage[i].socket_id; @@ -1330,7 +1298,7 @@ rte_eal_hugepage_init(void) #ifdef RTE_ARCH_PPC_64 /* Use the phy and virt address of the last page as segment * address for IBM Power architecture */ - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; #endif mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; @@ -1400,17 +1368,6 @@ rte_eal_hugepage_attach(void) test_phys_addrs_available(); - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary " - "process\n"); - return -1; - } - return 0; -#endif - } - fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); @@ -1542,7 +1499,7 @@ error: return -1; } -bool +int rte_eal_using_phys_addrs(void) { return phys_addrs_available; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c deleted file mode 100644 index 8951ce74..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ /dev/null @@ -1,722 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <dirent.h> - -#include <rte_log.h> -#include <rte_bus.h> -#include <rte_pci.h> -#include <rte_eal_memconfig.h> -#include <rte_malloc.h> -#include <rte_devargs.h> -#include <rte_memcpy.h> - -#include "eal_filesystem.h" -#include "eal_private.h" -#include "eal_pci_init.h" - -/** - * @file - * PCI probing under linux - * - * This code is used to simulate a PCI probe by parsing information in sysfs. - * When a registered device matches a driver, it is then initialized with - * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). - */ - -extern struct rte_pci_bus rte_pci_bus; - -static int -pci_get_kernel_driver_by_path(const char *filename, char *dri_name) -{ - int count; - char path[PATH_MAX]; - char *name; - - if (!filename || !dri_name) - return -1; - - count = readlink(filename, path, PATH_MAX); - if (count >= PATH_MAX) - return -1; - - /* For device does not have a driver */ - if (count < 0) - return 1; - - path[count] = '\0'; - - name = strrchr(path, '/'); - if (name) { - strncpy(dri_name, name + 1, strlen(name + 1) + 1); - return 0; - } - - return -1; -} - -/* Map pci device */ -int -rte_pci_map_device(struct rte_pci_device *dev) -{ - int ret = -1; - - /* try mapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - ret = pci_vfio_map_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - if (rte_eal_using_phys_addrs()) { - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); - } - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - ret = 1; - break; - } - - return ret; -} - -/* Unmap pci device */ -void -rte_pci_unmap_device(struct rte_pci_device *dev) -{ - /* try unmapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - pci_vfio_unmap_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - /* unmap resources for devices that use uio */ - pci_uio_unmap_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - break; - } -} - -void * -pci_find_max_end_va(void) -{ - const struct rte_memseg *seg = rte_eal_get_physmem_layout(); - const struct rte_memseg *last = seg; - unsigned i = 0; - - for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { - if (seg->addr == NULL) - break; - - if (seg->addr > last->addr) - last = seg; - - } - return RTE_PTR_ADD(last->addr, last->len); -} - -/* parse one line of the "resource" sysfs file (note that the 'line' - * string is modified) - */ -int -pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, - uint64_t *end_addr, uint64_t *flags) -{ - union pci_resource_info { - struct { - char *phys_addr; - char *end_addr; - char *flags; - }; - char *ptrs[PCI_RESOURCE_FMT_NVAL]; - } res_info; - - if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - return -1; - } - errno = 0; - *phys_addr = strtoull(res_info.phys_addr, NULL, 16); - *end_addr = strtoull(res_info.end_addr, NULL, 16); - *flags = strtoull(res_info.flags, NULL, 16); - if (errno != 0) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - return -1; - } - - return 0; -} - -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) -{ - FILE *f; - char buf[BUFSIZ]; - int i; - uint64_t phys_addr, end_addr, flags; - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); - return -1; - } - - for (i = 0; i<PCI_MAX_RESOURCE; i++) { - - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot read resource\n", __func__); - goto error; - } - if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, - &end_addr, &flags) < 0) - goto error; - - if (flags & IORESOURCE_MEM) { - dev->mem_resource[i].phys_addr = phys_addr; - dev->mem_resource[i].len = end_addr - phys_addr + 1; - /* not mapped for now */ - dev->mem_resource[i].addr = NULL; - } - } - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - -/* Scan one pci sysfs entry, and fill the devices list from it. */ -static int -pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) -{ - char filename[PATH_MAX]; - unsigned long tmp; - struct rte_pci_device *dev; - char driver[PATH_MAX]; - int ret; - - dev = malloc(sizeof(*dev)); - if (dev == NULL) - return -1; - - memset(dev, 0, sizeof(*dev)); - dev->addr = *addr; - - /* get vendor id */ - snprintf(filename, sizeof(filename), "%s/vendor", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.vendor_id = (uint16_t)tmp; - - /* get device id */ - snprintf(filename, sizeof(filename), "%s/device", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.device_id = (uint16_t)tmp; - - /* get subsystem_vendor id */ - snprintf(filename, sizeof(filename), "%s/subsystem_vendor", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_vendor_id = (uint16_t)tmp; - - /* get subsystem_device id */ - snprintf(filename, sizeof(filename), "%s/subsystem_device", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_device_id = (uint16_t)tmp; - - /* get class_id */ - snprintf(filename, sizeof(filename), "%s/class", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - /* the least 24 bits are valid: class, subclass, program interface */ - dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; - - /* get max_vfs */ - dev->max_vfs = 0; - snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - else { - /* for non igb_uio driver, need kernel version >= 3.8 */ - snprintf(filename, sizeof(filename), - "%s/sriov_numvfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - } - - /* get numa node, default to 0 if not present */ - snprintf(filename, sizeof(filename), "%s/numa_node", - dirname); - - if (access(filename, F_OK) != -1) { - if (eal_parse_sysfs_value(filename, &tmp) == 0) - dev->device.numa_node = tmp; - else - dev->device.numa_node = -1; - } else { - dev->device.numa_node = 0; - } - - pci_name_set(dev); - - /* parse resources */ - snprintf(filename, sizeof(filename), "%s/resource", dirname); - if (pci_parse_sysfs_resource(filename, dev) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); - free(dev); - return -1; - } - - /* parse driver */ - snprintf(filename, sizeof(filename), "%s/driver", dirname); - ret = pci_get_kernel_driver_by_path(filename, driver); - if (ret < 0) { - RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); - free(dev); - return -1; - } - - if (!ret) { - if (!strcmp(driver, "vfio-pci")) - dev->kdrv = RTE_KDRV_VFIO; - else if (!strcmp(driver, "igb_uio")) - dev->kdrv = RTE_KDRV_IGB_UIO; - else if (!strcmp(driver, "uio_pci_generic")) - dev->kdrv = RTE_KDRV_UIO_GENERIC; - else - dev->kdrv = RTE_KDRV_UNKNOWN; - } else - dev->kdrv = RTE_KDRV_NONE; - - /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { - rte_pci_add_device(dev); - } else { - struct rte_pci_device *dev2; - int ret; - - TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { - ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); - if (ret > 0) - continue; - - if (ret < 0) { - rte_pci_insert_device(dev2, dev); - } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - pci_name_set(dev2); - memmove(dev2->mem_resource, dev->mem_resource, - sizeof(dev->mem_resource)); - free(dev); - } - return 0; - } - - rte_pci_add_device(dev); - } - - return 0; -} - -int -pci_update_device(const struct rte_pci_addr *addr) -{ - char filename[PATH_MAX]; - - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, - pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, - addr->function); - - return pci_scan_one(filename, addr); -} - -/* - * split up a pci address into its constituent parts. - */ -static int -parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) -{ - /* first split on ':' */ - union splitaddr { - struct { - char *domain; - char *bus; - char *devid; - char *function; - }; - char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ - } splitaddr; - - char *buf_copy = strndup(buf, bufsize); - if (buf_copy == NULL) - return -1; - - if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') - != PCI_FMT_NVAL - 1) - goto error; - /* final split is on '.' between devid and function */ - splitaddr.function = strchr(splitaddr.devid,'.'); - if (splitaddr.function == NULL) - goto error; - *splitaddr.function++ = '\0'; - - /* now convert to int values */ - errno = 0; - addr->domain = strtoul(splitaddr.domain, NULL, 16); - addr->bus = strtoul(splitaddr.bus, NULL, 16); - addr->devid = strtoul(splitaddr.devid, NULL, 16); - addr->function = strtoul(splitaddr.function, NULL, 10); - if (errno != 0) - goto error; - - free(buf_copy); /* free the copy made with strdup */ - return 0; -error: - free(buf_copy); - return -1; -} - -/* - * Scan the content of the PCI bus, and the devices in the devices - * list - */ -int -rte_pci_scan(void) -{ - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - struct rte_pci_addr addr; - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - dir = opendir(pci_get_sysfs_path()); - if (dir == NULL) { - RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", - __func__, strerror(errno)); - return -1; - } - - while ((e = readdir(dir)) != NULL) { - if (e->d_name[0] == '.') - continue; - - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) - continue; - - snprintf(dirname, sizeof(dirname), "%s/%s", - pci_get_sysfs_path(), e->d_name); - - if (pci_scan_one(dirname, &addr) < 0) - goto error; - } - closedir(dir); - return 0; - -error: - closedir(dir); - return -1; -} - -/* Read PCI config space. */ -int rte_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_read_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_read_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -/* Write PCI config space. */ -int rte_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_write_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_write_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -#if defined(RTE_ARCH_X86) -static int -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) -{ - uint16_t start, end; - FILE *fp; - char *line = NULL; - char pci_id[16]; - int found = 0; - size_t linesz; - - snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, - dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - - fp = fopen("/proc/ioports", "r"); - if (fp == NULL) { - RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); - return -1; - } - - while (getdelim(&line, &linesz, '\n', fp) > 0) { - char *ptr = line; - char *left; - int n; - - n = strcspn(ptr, ":"); - ptr[n] = 0; - left = &ptr[n + 1]; - - while (*left && isspace(*left)) - left++; - - if (!strncmp(left, pci_id, strlen(pci_id))) { - found = 1; - - while (*ptr && isspace(*ptr)) - ptr++; - - sscanf(ptr, "%04hx-%04hx", &start, &end); - - break; - } - } - - free(line); - fclose(fp); - - if (!found) - return -1; - - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - p->base = start; - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); - - return 0; -} -#endif - -int -rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - int ret = -1; - - switch (dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_map(dev, bar, p); - break; -#endif - case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_map(dev, bar, p); - break; - case RTE_KDRV_UIO_GENERIC: -#if defined(RTE_ARCH_X86) - ret = pci_ioport_map(dev, bar, p); -#else - ret = pci_uio_ioport_map(dev, bar, p); -#endif - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - ret = pci_ioport_map(dev, bar, p); -#endif - break; - default: - break; - } - - if (!ret) - p->dev = dev; - - return ret; -} - -void -rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - pci_vfio_ioport_read(p, data, len, offset); - break; -#endif - case RTE_KDRV_IGB_UIO: - pci_uio_ioport_read(p, data, len, offset); - break; - case RTE_KDRV_UIO_GENERIC: - pci_uio_ioport_read(p, data, len, offset); - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - pci_uio_ioport_read(p, data, len, offset); -#endif - break; - default: - break; - } -} - -void -rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - pci_vfio_ioport_write(p, data, len, offset); - break; -#endif - case RTE_KDRV_IGB_UIO: - pci_uio_ioport_write(p, data, len, offset); - break; - case RTE_KDRV_UIO_GENERIC: - pci_uio_ioport_write(p, data, len, offset); - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - pci_uio_ioport_write(p, data, len, offset); -#endif - break; - default: - break; - } -} - -int -rte_pci_ioport_unmap(struct rte_pci_ioport *p) -{ - int ret = -1; - - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_unmap(p); - break; -#endif - case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_unmap(p); - break; - case RTE_KDRV_UIO_GENERIC: -#if defined(RTE_ARCH_X86) - ret = 0; -#else - ret = pci_uio_ioport_unmap(p); -#endif - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - ret = 0; -#endif - break; - default: - break; - } - - return ret; -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h deleted file mode 100644 index ae2980d6..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ /dev/null @@ -1,97 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef EAL_PCI_INIT_H_ -#define EAL_PCI_INIT_H_ - -#include "eal_vfio.h" - -/** IO resource type: */ -#define IORESOURCE_IO 0x00000100 -#define IORESOURCE_MEM 0x00000200 - -/* - * Helper function to map PCI resources right after hugepages in virtual memory - */ -extern void *pci_map_addr; -void *pci_find_max_end_va(void); - -/* parse one line of the "resource" sysfs file (note that the 'line' - * string is modified) - */ -int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, - uint64_t *end_addr, uint64_t *flags); - -int pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res); -void pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res); -int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx); - -int pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); -void pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); -void pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); -int pci_uio_ioport_unmap(struct rte_pci_ioport *p); - -#ifdef VFIO_PRESENT - -/* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); -void pci_vfio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); -void pci_vfio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); -int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); - -/* map/unmap VFIO resource prototype */ -int pci_vfio_map_resource(struct rte_pci_device *dev); -int pci_vfio_unmap_resource(struct rte_pci_device *dev); - -#endif - -#endif /* EAL_PCI_INIT_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c deleted file mode 100644 index fa10329f..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ /dev/null @@ -1,567 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <unistd.h> -#include <fcntl.h> -#include <dirent.h> -#include <inttypes.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/sysmacros.h> -#include <linux/pci_regs.h> - -#if defined(RTE_ARCH_X86) -#include <sys/io.h> -#endif - -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_eal_memconfig.h> -#include <rte_common.h> -#include <rte_malloc.h> - -#include "eal_filesystem.h" -#include "eal_pci_init.h" - -void *pci_map_addr = NULL; - -#define OFF_MAX ((uint64_t)(off_t)-1) - -int -pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offset) -{ - return pread(intr_handle->uio_cfg_fd, buf, len, offset); -} - -int -pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offset) -{ - return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); -} - -static int -pci_uio_set_bus_master(int dev_fd) -{ - uint16_t reg; - int ret; - - ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot read command from PCI config space!\n"); - return -1; - } - - /* return if bus mastering is already on */ - if (reg & PCI_COMMAND_MASTER) - return 0; - - reg |= PCI_COMMAND_MASTER; - - ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -static int -pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) -{ - FILE *f; - char filename[PATH_MAX]; - int ret; - unsigned major, minor; - dev_t dev; - - /* get the name of the sysfs file that contains the major and minor - * of the uio device and read its content */ - snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", - __func__); - return -1; - } - - ret = fscanf(f, "%u:%u", &major, &minor); - if (ret != 2) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", - __func__); - fclose(f); - return -1; - } - fclose(f); - - /* create the char device "mknod /dev/uioX c major minor" */ - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev = makedev(major, minor); - ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (ret != 0) { - RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", - __func__, strerror(errno)); - return -1; - } - - return ret; -} - -/* - * Return the uioX char device used for a pci device. On success, return - * the UIO number and fill dstbuf string with the path of the device in - * sysfs. On error, return a negative value. In this case dstbuf is - * invalid. - */ -static int -pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, - unsigned int buflen, int create) -{ - struct rte_pci_addr *loc = &dev->addr; - unsigned int uio_num; - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - - /* depending on kernel version, uio can be located in uio/uioX - * or uio:uioX */ - - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - dir = opendir(dirname); - if (dir == NULL) { - /* retry with the parent directory */ - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT, pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - dir = opendir(dirname); - - if (dir == NULL) { - RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); - return -1; - } - } - - /* take the first file starting with "uio" */ - while ((e = readdir(dir)) != NULL) { - /* format could be uio%d ...*/ - int shortprefix_len = sizeof("uio") - 1; - /* ... or uio:uio%d */ - int longprefix_len = sizeof("uio:uio") - 1; - char *endptr; - - if (strncmp(e->d_name, "uio", 3) != 0) - continue; - - /* first try uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); - break; - } - - /* then try uio:uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + longprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); - break; - } - } - closedir(dir); - - /* No uio resource found */ - if (e == NULL) - return -1; - - /* create uio device if we've been asked to */ - if (internal_config.create_uio_dev && create && - pci_mknod_uio_dev(dstbuf, uio_num) < 0) - RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); - - return uio_num; -} - -void -pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res) -{ - rte_free(uio_res); - - if (dev->intr_handle.uio_cfg_fd >= 0) { - close(dev->intr_handle.uio_cfg_fd); - dev->intr_handle.uio_cfg_fd = -1; - } - if (dev->intr_handle.fd >= 0) { - close(dev->intr_handle.fd); - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - } -} - -int -pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res) -{ - char dirname[PATH_MAX]; - char cfgname[PATH_MAX]; - char devname[PATH_MAX]; /* contains the /dev/uioX */ - int uio_num; - struct rte_pci_addr *loc; - - loc = &dev->addr; - - /* find uio resource */ - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); - if (uio_num < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return 1; - } - snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - snprintf(cfgname, sizeof(cfgname), - "/sys/class/uio/uio%u/device/config", uio_num); - dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); - if (dev->intr_handle.uio_cfg_fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - cfgname, strerror(errno)); - goto error; - } - - if (dev->kdrv == RTE_KDRV_IGB_UIO) - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - else { - dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; - - /* set bus master that is not done by uio_pci_generic */ - if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { - RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); - goto error; - } - } - - /* allocate the mapping details for secondary processes*/ - *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); - if (*uio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - goto error; - } - - snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); - memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); - - return 0; - -error: - pci_uio_free_resource(dev, *uio_res); - return -1; -} - -int -pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx) -{ - int fd; - char devname[PATH_MAX]; - void *mapaddr; - struct rte_pci_addr *loc; - struct pci_map *maps; - - loc = &dev->addr; - maps = uio_res->maps; - - /* update devname for mmap */ - snprintf(devname, sizeof(devname), - "%s/" PCI_PRI_FMT "/resource%d", - pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, - loc->function, res_idx); - - /* allocate memory to keep path */ - maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); - if (maps[map_idx].path == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", - strerror(errno)); - return -1; - } - - /* - * open resource file, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - mapaddr = pci_map_resource(pci_map_addr, fd, 0, - (size_t)dev->mem_resource[res_idx].len, 0); - close(fd); - if (mapaddr == MAP_FAILED) - goto error; - - pci_map_addr = RTE_PTR_ADD(mapaddr, - (size_t)dev->mem_resource[res_idx].len); - - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; - maps[map_idx].size = dev->mem_resource[res_idx].len; - maps[map_idx].addr = mapaddr; - maps[map_idx].offset = 0; - strcpy(maps[map_idx].path, devname); - dev->mem_resource[res_idx].addr = mapaddr; - - return 0; - -error: - rte_free(maps[map_idx].path); - return -1; -} - -#if defined(RTE_ARCH_X86) -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - char dirname[PATH_MAX]; - char filename[PATH_MAX]; - int uio_num; - unsigned long start; - - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); - if (uio_num < 0) - return -1; - - /* get portio start */ - snprintf(filename, sizeof(filename), - "%s/portio/port%d/start", dirname, bar); - if (eal_parse_sysfs_value(filename, &start) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", - __func__); - return -1; - } - /* ensure we don't get anything funny here, read/write will cast to - * uin16_t */ - if (start > UINT16_MAX) - return -1; - - /* FIXME only for primary process ? */ - if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { - - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev->intr_handle.fd = open(filename, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - filename, strerror(errno)); - return -1; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - } - - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); - - p->base = start; - p->len = 0; - return 0; -} -#else -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - FILE *f; - char buf[BUFSIZ]; - char filename[PATH_MAX]; - uint64_t phys_addr, end_addr, flags; - int fd, i; - void *addr; - - /* open and read addresses of the corresponding resource in sysfs */ - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", - pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", - strerror(errno)); - return -1; - } - for (i = 0; i < bar + 1; i++) { - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); - goto error; - } - } - if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, - &end_addr, &flags) < 0) - goto error; - if ((flags & IORESOURCE_IO) == 0) { - RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); - goto error; - } - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", - pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function, bar); - - /* mmap the pci resource */ - fd = open(filename, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, - strerror(errno)); - goto error; - } - addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - close(fd); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", - strerror(errno)); - goto error; - } - - /* strangely, the base address is mmap addr + phys_addr */ - p->base = (uintptr_t)addr + phys_addr; - p->len = end_addr + 1; - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); - fclose(f); - - return 0; - -error: - fclose(f); - return -1; -} -#endif - -void -pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - uint8_t *d; - int size; - uintptr_t reg = p->base + offset; - - for (d = data; len > 0; d += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; -#if defined(RTE_ARCH_X86) - *(uint32_t *)d = inl(reg); -#else - *(uint32_t *)d = *(volatile uint32_t *)reg; -#endif - } else if (len >= 2) { - size = 2; -#if defined(RTE_ARCH_X86) - *(uint16_t *)d = inw(reg); -#else - *(uint16_t *)d = *(volatile uint16_t *)reg; -#endif - } else { - size = 1; -#if defined(RTE_ARCH_X86) - *d = inb(reg); -#else - *d = *(volatile uint8_t *)reg; -#endif - } - } -} - -void -pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - const uint8_t *s; - int size; - uintptr_t reg = p->base + offset; - - for (s = data; len > 0; s += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; -#if defined(RTE_ARCH_X86) - outl_p(*(const uint32_t *)s, reg); -#else - *(volatile uint32_t *)reg = *(const uint32_t *)s; -#endif - } else if (len >= 2) { - size = 2; -#if defined(RTE_ARCH_X86) - outw_p(*(const uint16_t *)s, reg); -#else - *(volatile uint16_t *)reg = *(const uint16_t *)s; -#endif - } else { - size = 1; -#if defined(RTE_ARCH_X86) - outb_p(*s, reg); -#else - *(volatile uint8_t *)reg = *s; -#endif - } - } -} - -int -pci_uio_ioport_unmap(struct rte_pci_ioport *p) -{ -#if defined(RTE_ARCH_X86) - RTE_SET_USED(p); - /* FIXME close intr fd ? */ - return 0; -#else - return munmap((void *)(uintptr_t)p->base, p->len); -#endif -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c deleted file mode 100644 index aa9d96ed..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ /dev/null @@ -1,674 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <fcntl.h> -#include <linux/pci_regs.h> -#include <sys/eventfd.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <stdbool.h> - -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_eal_memconfig.h> -#include <rte_malloc.h> - -#include "eal_filesystem.h" -#include "eal_pci_init.h" -#include "eal_vfio.h" -#include "eal_private.h" - -/** - * @file - * PCI probing under linux (VFIO version) - * - * This code tries to determine if the PCI device is bound to VFIO driver, - * and initialize it (map BARs, set up interrupts) if that's the case. - * - * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". - */ - -#ifdef VFIO_PRESENT - -#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) -#define PAGE_MASK (~(PAGE_SIZE - 1)) - -static struct rte_tailq_elem rte_vfio_tailq = { - .name = "VFIO_RESOURCE_LIST", -}; -EAL_REGISTER_TAILQ(rte_vfio_tailq) - -int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs) -{ - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs) -{ - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -/* get PCI BAR number where MSI-X interrupts are */ -static int -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, - uint32_t *msix_table_size) -{ - int ret; - uint32_t reg; - uint16_t flags; - uint8_t cap_id, cap_offset; - - /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_offset = reg & 0xFF; - - while (cap_offset) { - - /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_id = reg & 0xFF; - - /* if we haven't reached MSI-X, check next capability */ - if (cap_id != PCI_CAP_ID_MSIX) { - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need second byte */ - cap_offset = (reg & 0xFF00) >> 8; - - continue; - } - /* else, read table offset */ - else { - /* table offset resides in the next 4 bytes */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " - "space!\n"); - return -1; - } - - ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); - if (ret != sizeof(flags)) { - RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " - "space!\n"); - return -1; - } - - *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; - *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; - *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); - - return 0; - } - } - return 0; -} - -/* set PCI bus mastering */ -static int -pci_vfio_set_bus_master(int dev_fd, bool op) -{ - uint16_t reg; - int ret; - - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); - return -1; - } - - if (op) - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; - else - reg &= ~(PCI_COMMAND_MASTER); - - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -/* set up interrupt support (but not enable interrupts) */ -static int -pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) -{ - int i, ret, intr_idx; - - /* default to invalid index */ - intr_idx = VFIO_PCI_NUM_IRQS; - - /* get interrupt type from internal config (MSI-X by default, can be - * overridden from the command line - */ - switch (internal_config.vfio_intr_mode) { - case RTE_INTR_MODE_MSIX: - intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; - break; - case RTE_INTR_MODE_MSI: - intr_idx = VFIO_PCI_MSI_IRQ_INDEX; - break; - case RTE_INTR_MODE_LEGACY: - intr_idx = VFIO_PCI_INTX_IRQ_INDEX; - break; - /* don't do anything if we want to automatically determine interrupt type */ - case RTE_INTR_MODE_NONE: - break; - default: - RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); - return -1; - } - - /* start from MSI-X interrupt type */ - for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; - int fd = -1; - - /* skip interrupt modes we don't want */ - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && - i != intr_idx) - continue; - - irq.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); - if (ret < 0) { - RTE_LOG(ERR, EAL, " cannot get IRQ info, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - /* if this vector cannot be used with eventfd, fail if we explicitly - * specified interrupt type, otherwise continue */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, - " interrupt vector does not support eventfd!\n"); - return -1; - } else - continue; - } - - /* set up an eventfd for interrupts */ - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - - switch (i) { - case VFIO_PCI_MSIX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - break; - case VFIO_PCI_MSI_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - break; - case VFIO_PCI_INTX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - break; - default: - RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); - return -1; - } - - return 0; - } - - /* if we're here, we haven't found a suitable interrupt vector */ - return -1; -} - -/* - * map the PCI resources of a PCI device in virtual memory (VFIO version). - * primary and secondary processes follow almost exactly the same path - */ -int -pci_vfio_map_resource(struct rte_pci_device *dev) -{ - struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; - char pci_addr[PATH_MAX] = {0}; - int vfio_dev_fd; - struct rte_pci_addr *loc = &dev->addr; - int i, ret, msix_bar; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - - struct pci_map *maps; - uint32_t msix_table_offset = 0; - uint32_t msix_table_size = 0; - uint32_t ioport_bar; - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr, - &vfio_dev_fd, &device_info))) - return ret; - - /* get MSI-X BAR, if any (we have to know where it is because we can't - * easily mmap it when using VFIO) */ - msix_bar = -1; - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, - &msix_table_offset, &msix_table_size); - if (ret < 0) { - RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - close(vfio_dev_fd); - return -1; - } - - /* if we're in a primary process, allocate vfio_res and get region info */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - close(vfio_dev_fd); - return -1; - } - memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); - - /* get number of registers (up to BAR5) */ - vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, - VFIO_PCI_BAR5_REGION_INDEX + 1); - } else { - /* if we're in a secondary process, just find our tailq entry */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, - &dev->addr)) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - close(vfio_dev_fd); - return -1; - } - } - - /* map BARs */ - maps = vfio_res->maps; - - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; - void *bar_addr; - struct memreg { - unsigned long offset, size; - } memreg[2] = {}; - - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - /* chk for io port region */ - ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + i*4); - - if (ret != sizeof(ioport_bar)) { - RTE_LOG(ERR, EAL, - "Cannot read command (%x) from config space!\n", - PCI_BASE_ADDRESS_0 + i*4); - return -1; - } - - if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) { - RTE_LOG(INFO, EAL, - "Ignore mapping IO port bar(%d) addr: %x\n", - i, ioport_bar); - continue; - } - - /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) - continue; - - if (i == msix_bar) { - /* - * VFIO will not let us map the MSI-X table, - * but we can map around it. - */ - uint32_t table_start = msix_table_offset; - uint32_t table_end = table_start + msix_table_size; - table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; - table_start &= PAGE_MASK; - - if (table_start == 0 && table_end >= reg.size) { - /* Cannot map this BAR */ - RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); - continue; - } else { - memreg[0].offset = reg.offset; - memreg[0].size = table_start; - memreg[1].offset = reg.offset + table_end; - memreg[1].size = reg.size - table_end; - - RTE_LOG(DEBUG, EAL, - "Trying to map BAR %d that contains the MSI-X " - "table. Trying offsets: " - "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, - memreg[0].offset, memreg[0].size, - memreg[1].offset, memreg[1].size); - } - } else { - memreg[0].offset = reg.offset; - memreg[0].size = reg.size; - } - - /* try to figure out an address */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); - } else { - bar_addr = maps[i].addr; - } - - /* reserve the address using an inaccessible mapping */ - bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (bar_addr != MAP_FAILED) { - void *map_addr = NULL; - if (memreg[0].size) { - /* actual map of first part */ - map_addr = pci_map_resource(bar_addr, vfio_dev_fd, - memreg[0].offset, - memreg[0].size, - MAP_FIXED); - } - - /* if there's a second part, try to map it */ - if (map_addr != MAP_FAILED - && memreg[1].offset && memreg[1].size) { - void *second_addr = RTE_PTR_ADD(bar_addr, - memreg[1].offset - - (uintptr_t)reg.offset); - map_addr = pci_map_resource(second_addr, - vfio_dev_fd, memreg[1].offset, - memreg[1].size, - MAP_FIXED); - } - - if (map_addr == MAP_FAILED || !map_addr) { - munmap(bar_addr, reg.size); - bar_addr = MAP_FAILED; - } - } - - if (bar_addr == MAP_FAILED || - (internal_config.process_type == RTE_PROC_SECONDARY && - bar_addr != maps[i].addr)) { - RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, - strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; - maps[i].path = NULL; /* vfio doesn't have per-resource paths */ - dev->mem_resource[i].addr = bar_addr; - } - - /* if secondary process, do not set up interrupts */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { - RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { - RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* Reset the device */ - ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); - } - - if (internal_config.process_type == RTE_PROC_PRIMARY) - TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_unmap_resource(struct rte_pci_device *dev) -{ - char pci_addr[PATH_MAX] = {0}; - struct rte_pci_addr *loc = &dev->addr; - int i, ret; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list; - - struct pci_map *maps; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - - if (close(dev->intr_handle.fd) < 0) { - RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", - pci_addr); - return -1; - } - - if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { - RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", - pci_addr); - return -1; - } - - ret = vfio_release_device(pci_get_sysfs_path(), pci_addr, - dev->intr_handle.vfio_dev_fd); - if (ret < 0) { - RTE_LOG(ERR, EAL, - "%s(): cannot release device\n", __func__); - return ret; - } - - vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - /* Get vfio_res */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - return -1; - } - - /* unmap BARs */ - maps = vfio_res->maps; - - RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", - pci_addr); - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - - /* - * We do not need to be aware of MSI-X table BAR mappings as - * when mapping. Just using current maps array is enough - */ - if (maps[i].addr) { - RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", - pci_addr, maps[i].addr); - pci_unmap_resource(maps[i].addr, maps[i].size); - } - } - - TAILQ_REMOVE(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - if (bar < VFIO_PCI_BAR0_REGION_INDEX || - bar > VFIO_PCI_BAR5_REGION_INDEX) { - RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); - return -1; - } - - p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); - return 0; -} - -void -pci_vfio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; - - if (pread64(intr_handle->vfio_dev_fd, data, - len, p->base + offset) <= 0) - RTE_LOG(ERR, EAL, - "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", - VFIO_GET_REGION_IDX(p->base), (int)offset); -} - -void -pci_vfio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; - - if (pwrite64(intr_handle->vfio_dev_fd, data, - len, p->base + offset) <= 0) - RTE_LOG(ERR, EAL, - "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", - VFIO_GET_REGION_IDX(p->base), (int)offset); -} - -int -pci_vfio_ioport_unmap(struct rte_pci_ioport *p) -{ - RTE_SET_USED(p); - return -1; -} - -int -pci_vfio_enable(void) -{ - return vfio_enable("vfio_pci"); -} - -int -pci_vfio_is_enabled(void) -{ - return vfio_is_enabled("vfio_pci"); -} -#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 6481eeea..e9a579e4 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -46,7 +46,6 @@ #include <rte_launch.h> #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> #include <rte_lcore.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c index afa32f5c..24349dab 100644 --- a/lib/librte_eal/linuxapp/eal/eal_timer.c +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -49,7 +49,6 @@ #include <rte_cycles.h> #include <rte_lcore.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_debug.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 946df7e3..58f0123e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include <rte_log.h> #include <rte_memory.h> #include <rte_eal_memconfig.h> +#include <rte_vfio.h> #include "eal_filesystem.h" #include "eal_vfio.h" @@ -68,8 +69,8 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; - int group_idx = -1; char filename[PATH_MAX]; + struct vfio_group *cur_grp; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) @@ -85,12 +86,12 @@ vfio_get_group_fd(int iommu_group_no) /* Now lets get an index for the new group */ for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == -1) { - group_idx = i; + cur_grp = &vfio_cfg.vfio_groups[i]; break; } /* This should not happen */ - if (group_idx == -1) { + if (i == VFIO_MAX_GROUPS) { RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); return -1; } @@ -123,8 +124,8 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; vfio_cfg.vfio_active_groups++; return vfio_group_fd; } @@ -157,9 +158,12 @@ vfio_get_group_fd(int iommu_group_no) return 0; case SOCKET_OK: vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, return it */ + /* if we got the fd, store it and return it */ if (vfio_group_fd > 0) { close(socket_fd); + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* fall-through on error */ @@ -280,7 +284,7 @@ clear_group(int vfio_group_fd) } int -vfio_setup_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -412,7 +416,7 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, } int -vfio_release_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int vfio_dev_fd) { struct vfio_group_status group_status = { @@ -474,7 +478,7 @@ vfio_release_device(const char *sysfs_base, const char *dev_addr, } int -vfio_enable(const char *modname) +rte_vfio_enable(const char *modname) { /* initialize group list */ int i; @@ -489,7 +493,7 @@ vfio_enable(const char *modname) /* inform the user that we are probing for VFIO */ RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); - /* check if vfio-pci module is loaded */ + /* check if vfio module is loaded */ vfio_available = rte_eal_check_module(modname); /* return error directly */ @@ -519,7 +523,7 @@ vfio_enable(const char *modname) } int -vfio_is_enabled(const char *modname) +rte_vfio_is_enabled(const char *modname) { const int mod_available = rte_eal_check_module(modname); return vfio_cfg.vfio_enabled && mod_available; @@ -706,7 +710,10 @@ vfio_type1_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); @@ -759,10 +766,19 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } - /* calculate window size based on number of hugepages configured */ - create.window_size = rte_eal_get_physmem_size(); + /* create DMA window from 0 to max(phys_addr + len) */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (ms[i].addr == NULL) + break; + + create.window_size = RTE_MAX(create.window_size, + ms[i].iova + ms[i].len); + } + + /* sPAPR requires window size to be a power of 2 */ + create.window_size = rte_align64pow2(create.window_size); create.page_shift = __builtin_ctzll(ms->hugepage_sz); - create.levels = 2; + create.levels = 1; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { @@ -771,6 +787,11 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } + if (create.start_addr != 0) { + RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); + return -1; + } + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; @@ -792,7 +813,10 @@ vfio_spapr_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; @@ -816,4 +840,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) return 0; } +int +rte_vfio_noiommu_is_enabled(void) +{ + int fd, ret, cnt __rte_unused; + char c; + + ret = -1; + fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); + if (fd < 0) + return -1; + + cnt = read(fd, &c, 1); + if (c == 'Y') + ret = 1; + + close(fd); + return ret; +} + #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 5ff63e5d..ba7892b7 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -37,20 +37,18 @@ /* * determine if VFIO is present on the system */ -#ifdef RTE_EAL_VFIO +#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) #include <linux/version.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) -#include <linux/vfio.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define RTE_PCI_MSIX_TABLE_BIR 0x7 -#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 -#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#define VFIO_PRESENT #else -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR -#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET -#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE -#endif +#pragma message("VFIO configured but not supported by this kernel, disabling.") +#endif /* kernel version >= 3.6.0 */ +#endif /* RTE_EAL_VFIO */ + +#ifdef VFIO_PRESENT + +#include <linux/vfio.h> #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU @@ -144,13 +142,6 @@ struct vfio_config { struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; }; -#define VFIO_DIR "/dev/vfio" -#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" -#define VFIO_GROUP_FMT "/dev/vfio/%u" -#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" -#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) -#define VFIO_GET_REGION_IDX(x) (x >> 40) - /* DMA mapping function prototype. * Takes VFIO container fd as a parameter. * Returns 0 on success, -1 on error. @@ -190,24 +181,6 @@ vfio_get_group_fd(int iommu_group_no); int clear_group(int vfio_group_fd); -/** - * Setup vfio_cfg for the device identified by its address. It discovers - * the configured I/O MMU groups or sets a new one for the device. If a new - * groups is assigned, the DMA mapping is performed. - * Returns 0 on success, a negative value on failure and a positive value in - * case the given device cannot be managed this way. - */ -int vfio_setup_device(const char *sysfs_base, const char *dev_addr, - int *vfio_dev_fd, struct vfio_device_info *device_info); - -int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); - -int vfio_enable(const char *modname); -int vfio_is_enabled(const char *modname); - -int pci_vfio_enable(void); -int pci_vfio_is_enabled(void); - int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 @@ -217,8 +190,6 @@ int vfio_mp_sync_setup(void); #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF -#define VFIO_PRESENT -#endif /* kernel version */ -#endif /* RTE_EAL_VFIO */ +#endif /* VFIO_PRESENT */ #endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index 7e8095cb..b53ed7eb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -49,12 +49,12 @@ #endif #include <rte_log.h> -#include <rte_pci.h> #include <rte_eal_memconfig.h> #include <rte_malloc.h> +#include <rte_vfio.h> #include "eal_filesystem.h" -#include "eal_pci_init.h" +#include "eal_vfio.h" #include "eal_thread.h" /** @@ -301,7 +301,8 @@ vfio_mp_sync_thread(void __rte_unused * arg) vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); else vfio_mp_sync_send_fd(conn_sock, fd); - close(fd); + if (fd >= 0) + close(fd); break; case SOCKET_REQ_GROUP: /* wait for group number */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c deleted file mode 100644 index 19db1cb5..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ /dev/null @@ -1,381 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <errno.h> -#include <stdarg.h> -#include <stdlib.h> -#include <stdio.h> -#include <stdint.h> -#include <inttypes.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> -#include <sys/file.h> -#include <unistd.h> -#include <limits.h> -#include <sys/ioctl.h> -#include <sys/time.h> - -#include <rte_log.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_launch.h> -#include <rte_eal.h> -#include <rte_eal_memconfig.h> -#include <rte_per_lcore.h> -#include <rte_lcore.h> -#include <rte_common.h> -#include <rte_string_fns.h> - -#include "eal_private.h" -#include "eal_internal_cfg.h" -#include "eal_filesystem.h" -#include <exec-env/rte_dom0_common.h> - -#define PAGE_SIZE RTE_PGSIZE_4K -#define DEFAUL_DOM0_NAME "dom0-mem" - -static int xen_fd = -1; -static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB"; - -/* - * Try to mmap *size bytes in /dev/zero. If it is successful, return the - * pointer to the mmap'd area and keep *size unmodified. Else, retry - * with a smaller zone: decrease *size by mem_size until it reaches - * 0. In this case, return NULL. Note: this function returns an address - * which is a multiple of mem_size size. - */ -static void * -xen_get_virtual_area(size_t *size, size_t mem_size) -{ - void *addr; - int fd; - long aligned_addr; - - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); - - fd = open("/dev/zero", O_RDONLY); - if (fd < 0){ - RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); - return NULL; - } - do { - addr = mmap(NULL, (*size) + mem_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) - *size -= mem_size; - } while (addr == MAP_FAILED && *size > 0); - - if (addr == MAP_FAILED) { - close(fd); - RTE_LOG(ERR, EAL, "Cannot get a virtual area\n"); - return NULL; - } - - munmap(addr, (*size) + mem_size); - close(fd); - - /* align addr to a mem_size boundary */ - aligned_addr = (uintptr_t)addr; - aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size); - addr = (void *)(aligned_addr); - - RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", - addr, *size); - - return addr; -} - -/** - * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm - * /memsize-mB/memsize file, and the size unit is mB. - */ -static int -get_xen_memory_size(void) -{ - char path[PATH_MAX]; - unsigned long mem_size = 0; - static const char *file_name; - - file_name = "memsize"; - snprintf(path, sizeof(path), "%s/%s", - sys_dir_path, file_name); - - if (eal_parse_sysfs_value(path, &mem_size) < 0) - return -1; - - if (mem_size == 0) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not" - " configured.\n",sys_dir_path, file_name); - if (mem_size % 2) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be" - " even number.\n",sys_dir_path, file_name); - - if (mem_size > DOM0_CONFIG_MEMSIZE) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger" - " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE); - - return mem_size; -} - -/** - * Based on physical address to caculate MFN in Xen Dom0. - */ -phys_addr_t -rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) -{ - int mfn_id, i; - uint64_t mfn, mfn_offset; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg *memseg = mcfg->memseg; - - /* find the memory segment owning the physical address */ - if (memseg_id == -1) { - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if ((phy_addr >= memseg[i].phys_addr) && - (phy_addr < memseg[i].phys_addr + - memseg[i].len)) { - memseg_id = i; - break; - } - } - if (memseg_id == -1) - return RTE_BAD_PHYS_ADDR; - } - - mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; - - /*the MFN is contiguous in 2M */ - mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) % - RTE_PGSIZE_2M / PAGE_SIZE; - mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id]; - - /** return mechine address */ - return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE; -} - -int -rte_xen_dom0_memory_init(void) -{ - void *vir_addr, *vma_addr = NULL; - int err, ret = 0; - uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0; - size_t vma_len = 0; - struct memory_info meminfo; - struct memseg_info seginfo[RTE_MAX_MEMSEG]; - int flags, page_size = getpagesize(); - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg *memseg = mcfg->memseg; - uint64_t total_mem = internal_config.memory; - - memset(seginfo, 0, sizeof(seginfo)); - memset(&meminfo, 0, sizeof(struct memory_info)); - - mem_size = get_xen_memory_size(); - requested = (unsigned) (total_mem / 0x100000); - if (requested > mem_size) - /* if we didn't satisfy total memory requirements */ - rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB," - " available: %uMB\n", requested, mem_size); - else if (total_mem != 0) - mem_size = requested; - - /* Check FD and open once */ - if (xen_fd < 0) { - xen_fd = open(DOM0_MM_DEV, O_RDWR); - if (xen_fd < 0) { - RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); - return -1; - } - } - - meminfo.size = mem_size; - - /* construct memory mangement name for Dom0 */ - snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s", - internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); - - /* Notify kernel driver to allocate memory */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n"); - err = -EIO; - goto fail; - } - - /* Get number of memory segment from driver */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n"); - err = -EIO; - goto fail; - } - - if(num_memseg > RTE_MAX_MEMSEG){ - RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater" - " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG); - err = -EIO; - goto fail; - } - - /* get all memory segements information */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n"); - err = -EIO; - goto fail; - } - - /* map all memory segments to contiguous user space */ - for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++) - { - vma_len = seginfo[memseg_idx].size; - - /** - * get the biggest virtual memory area up to vma_len. If it fails, - * vma_addr is NULL, so let the kernel provide the address. - */ - vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M); - if (vma_addr == NULL) { - flags = MAP_SHARED; - vma_len = RTE_PGSIZE_2M; - } else - flags = MAP_SHARED | MAP_FIXED; - - seginfo[memseg_idx].size = vma_len; - vir_addr = mmap(vma_addr, seginfo[memseg_idx].size, - PROT_READ|PROT_WRITE, flags, xen_fd, - memseg_idx * page_size); - if (vir_addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n", - DOM0_MM_DEV); - err = -EIO; - goto fail; - } - - memseg[memseg_idx].addr = vir_addr; - memseg[memseg_idx].phys_addr = page_size * - seginfo[memseg_idx].pfn ; - memseg[memseg_idx].len = seginfo[memseg_idx].size; - for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++) - memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i]; - - /* MFNs are continuous in 2M, so assume that page size is 2M */ - memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M; - - memseg[memseg_idx].nchannel = mcfg->nchannel; - memseg[memseg_idx].nrank = mcfg->nrank; - - /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/ - memseg[memseg_idx].socket_id = 0; - } - - return 0; -fail: - if (xen_fd > 0) { - close(xen_fd); - xen_fd = -1; - } - return err; -} - -/* - * This creates the memory mappings in the secondary process to match that of - * the server process. It goes through each memory segment in the DPDK runtime - * configuration, mapping them in order to form a contiguous block in the - * virtual memory space - */ -int -rte_xen_dom0_memory_attach(void) -{ - const struct rte_mem_config *mcfg; - unsigned s = 0; /* s used to track the segment number */ - int xen_fd = -1; - int ret = -1; - void *vir_addr; - char name[DOM0_NAME_MAX] = {0}; - int page_size = getpagesize(); - - mcfg = rte_eal_get_configuration()->mem_config; - - /* Check FD and open once */ - if (xen_fd < 0) { - xen_fd = open(DOM0_MM_DEV, O_RDWR); - if (xen_fd < 0) { - RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); - goto error; - } - } - - /* construct memory mangement name for Dom0 */ - snprintf(name, DOM0_NAME_MAX, "%s-%s", - internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); - /* attach to memory segments of primary process */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name); - if (ret) { - RTE_LOG(ERR, EAL,"attach memory segments fail.\n"); - goto error; - } - - /* map all segments into memory to make sure we get the addrs */ - for (s = 0; s < RTE_MAX_MEMSEG; ++s) { - - /* - * the first memory segment with len==0 is the one that - * follows the last valid segment. - */ - if (mcfg->memseg[s].len == 0) - break; - - vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd, - s * page_size); - if (vir_addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in %s to requested address [%p]\n", - (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV, - mcfg->memseg[s].addr); - goto error; - } - } - return 0; - -error: - if (xen_fd >= 0) { - close(xen_fd); - xen_fd = -1; - } - return -1; -} diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h deleted file mode 100644 index d9707780..00000000 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h +++ /dev/null @@ -1,108 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_DOM0_COMMON_H_ -#define _RTE_DOM0_COMMON_H_ - -#ifdef __KERNEL__ -#include <linux/if.h> -#endif - -#define DOM0_NAME_MAX 256 -#define DOM0_MM_DEV "/dev/dom0_mm" - -#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ -#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ -#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ -#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ -#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ - -#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) -#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) -#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) -#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) - -/** - * A structure used to store memory information. - */ -struct memory_info { - char name[DOM0_NAME_MAX]; - uint64_t size; -}; - -/** - * A structure used to store memory segment information. - */ -struct memseg_info { - uint32_t idx; - uint64_t pfn; - uint64_t size; - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -}; - -/** - * A structure used to store memory block information. - */ -struct memblock_info { - uint8_t exchange_flag; - uint8_t used; - uint64_t vir_addr; - uint64_t pfn; - uint64_t mfn; -}; -#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index b800a53c..ce456d4b 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -16,12 +16,9 @@ #endif #ifndef PCI_MSIX_ENTRY_SIZE -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 #endif /* @@ -124,6 +121,14 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev) #endif /* < 3.3.0 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) -#define HAVE_PCI_ENABLE_MSIX +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +#define HAVE_ALLOC_IRQ_VECTORS 1 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) +#define HAVE_PCI_MSI_MASK_IRQ 1 #endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index 07a19a31..a3a98c17 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -29,13 +29,11 @@ #include <linux/pci.h> #include <linux/uio_driver.h> #include <linux/io.h> +#include <linux/irq.h> #include <linux/msi.h> #include <linux/version.h> #include <linux/slab.h> -#ifdef CONFIG_XEN_DOM0 -#include <xen/xen.h> -#endif #include <rte_pci_dev_features.h> #include "compat.h" @@ -51,7 +49,6 @@ struct rte_uio_pci_dev { static char *intr_mode; static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; - /* sriov sysfs */ static ssize_t show_max_vfs(struct device *dev, struct device_attribute *attr, @@ -91,14 +88,16 @@ static struct attribute *dev_attrs[] = { static const struct attribute_group dev_attr_grp = { .attrs = dev_attrs, }; + +#ifndef HAVE_PCI_MSI_MASK_IRQ /* * It masks the msix on/off of generating MSI-X messages. */ static void -igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) +igbuio_msix_mask_irq(struct msi_desc *desc, s32 state) { u32 mask_bits = desc->masked; - unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; if (state != 0) @@ -113,6 +112,52 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) } } +/* + * It masks the msi on/off of generating MSI messages. + */ +static void +igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state) +{ + u32 mask_bits = desc->masked; + u32 offset = desc->irq - pdev->irq; + u32 mask = 1 << offset; + + if (!desc->msi_attrib.maskbit) + return; + + if (state != 0) + mask_bits &= ~mask; + else + mask_bits |= mask; + + if (mask_bits != desc->masked) { + pci_write_config_dword(pdev, desc->mask_pos, mask_bits); + desc->masked = mask_bits; + } +} + +static void +igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state) +{ + struct msi_desc *desc; + struct list_head *msi_list; + +#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE + msi_list = &pdev->dev.msi_list; +#else + msi_list = &pdev->msi_list; +#endif + + if (mode == RTE_INTR_MODE_MSIX) { + list_for_each_entry(desc, msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); + } else if (mode == RTE_INTR_MODE_MSI) { + list_for_each_entry(desc, msi_list, list) + igbuio_msi_mask_irq(pdev, desc, irq_state); + } +} +#endif + /** * This is the irqcontrol callback to be registered to uio_info. * It can be used to disable/enable interrupt from user space processes. @@ -132,21 +177,26 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *pdev = udev->pdev; - pci_cfg_access_lock(pdev); - if (udev->mode == RTE_INTR_MODE_LEGACY) - pci_intx(pdev, !!irq_state); +#ifdef HAVE_PCI_MSI_MASK_IRQ + struct irq_data *irq = irq_get_irq_data(udev->info.irq); +#endif - else if (udev->mode == RTE_INTR_MODE_MSIX) { - struct msi_desc *desc; + pci_cfg_access_lock(pdev); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) - list_for_each_entry(desc, &pdev->msi_list, list) - igbuio_msix_mask_irq(desc, irq_state); + if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) { +#ifdef HAVE_PCI_MSI_MASK_IRQ + if (irq_state == 1) + pci_msi_unmask_irq(irq); + else + pci_msi_mask_irq(irq); #else - list_for_each_entry(desc, &pdev->dev.msi_list, list) - igbuio_msix_mask_irq(desc, irq_state); + igbuio_mask_irq(pdev, udev->mode, irq_state); #endif } + + if (udev->mode == RTE_INTR_MODE_LEGACY) + pci_intx(pdev, !!irq_state); + pci_cfg_access_unlock(pdev); return 0; @@ -157,19 +207,125 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) * If yes, disable it here and will be enable later. */ static irqreturn_t -igbuio_pci_irqhandler(int irq, struct uio_info *info) +igbuio_pci_irqhandler(int irq, void *dev_id) { - struct rte_uio_pci_dev *udev = info->priv; + struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id; + struct uio_info *info = &udev->info; /* Legacy mode need to mask in hardware */ if (udev->mode == RTE_INTR_MODE_LEGACY && !pci_check_and_mask_intx(udev->pdev)) return IRQ_NONE; + uio_event_notify(info); + /* Message signal mode, no share IRQ and automasked */ return IRQ_HANDLED; } +static int +igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) +{ + int err = 0; +#ifndef HAVE_ALLOC_IRQ_VECTORS + struct msix_entry msix_entry; +#endif + + switch (igbuio_intr_mode_preferred) { + case RTE_INTR_MODE_MSIX: + /* Only 1 msi-x vector needed */ +#ifndef HAVE_ALLOC_IRQ_VECTORS + msix_entry.entry = 0; + if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { + dev_dbg(&udev->pdev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = msix_entry.vector; + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#else + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) { + dev_dbg(&udev->pdev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(udev->pdev, 0); + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#endif + + /* fall back to MSI */ + case RTE_INTR_MODE_MSI: +#ifndef HAVE_ALLOC_IRQ_VECTORS + if (pci_enable_msi(udev->pdev) == 0) { + dev_dbg(&udev->pdev->dev, "using MSI"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = udev->pdev->irq; + udev->mode = RTE_INTR_MODE_MSI; + break; + } +#else + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) { + dev_dbg(&udev->pdev->dev, "using MSI"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(udev->pdev, 0); + udev->mode = RTE_INTR_MODE_MSI; + break; + } +#endif + /* fall back to INTX */ + case RTE_INTR_MODE_LEGACY: + if (pci_intx_mask_supported(udev->pdev)) { + dev_dbg(&udev->pdev->dev, "using INTX"); + udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; + udev->info.irq = udev->pdev->irq; + udev->mode = RTE_INTR_MODE_LEGACY; + break; + } + dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n"); + /* fall back to no IRQ */ + case RTE_INTR_MODE_NONE: + udev->mode = RTE_INTR_MODE_NONE; + udev->info.irq = UIO_IRQ_NONE; + break; + + default: + dev_err(&udev->pdev->dev, "invalid IRQ mode %u", + igbuio_intr_mode_preferred); + udev->info.irq = UIO_IRQ_NONE; + err = -EINVAL; + } + + if (udev->info.irq != UIO_IRQ_NONE) + err = request_irq(udev->info.irq, igbuio_pci_irqhandler, + udev->info.irq_flags, udev->info.name, + udev); + dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n", + udev->info.irq); + + return err; +} + +static void +igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) +{ + if (udev->info.irq) { + free_irq(udev->info.irq, udev); + udev->info.irq = 0; + } + +#ifndef HAVE_ALLOC_IRQ_VECTORS + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(udev->pdev); + if (udev->mode == RTE_INTR_MODE_MSI) + pci_disable_msi(udev->pdev); +#else + if (udev->mode == RTE_INTR_MODE_MSIX || + udev->mode == RTE_INTR_MODE_MSI) + pci_free_irq_vectors(udev->pdev); +#endif +} + + /** * This gets called while opening uio device file. */ @@ -178,12 +334,17 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode) { struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; - - pci_reset_function(dev); + int err; /* set bus master, which was cleared by the reset function */ pci_set_master(dev); + /* enable interrupts */ + err = igbuio_pci_enable_interrupts(udev); + if (err) { + dev_err(&dev->dev, "Enable interrupt fails\n"); + return err; + } return 0; } @@ -193,60 +354,15 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode) struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; + /* disable interrupts */ + igbuio_pci_disable_interrupts(udev); + /* stop the device from further DMA */ pci_clear_master(dev); - pci_reset_function(dev); - return 0; } -#ifdef CONFIG_XEN_DOM0 -static int -igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) -{ - int idx; - - idx = (int)vma->vm_pgoff; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -#ifdef HAVE_PTE_MASK_PAGE_IOMAP - vma->vm_page_prot.pgprot |= _PAGE_IOMAP; -#endif - - return remap_pfn_range(vma, - vma->vm_start, - info->mem[idx].addr >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - -/** - * This is uio device mmap method which will use igbuio mmap for Xen - * Dom0 environment. - */ -static int -igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) -{ - int idx; - - if (vma->vm_pgoff >= MAX_UIO_MAPS) - return -EINVAL; - - if (info->mem[vma->vm_pgoff].size == 0) - return -EINVAL; - - idx = (int)vma->vm_pgoff; - switch (info->mem[idx].memtype) { - case UIO_MEM_PHYS: - return igbuio_dom0_mmap_phys(info, vma); - case UIO_MEM_LOGICAL: - case UIO_MEM_VIRTUAL: - default: - return -EINVAL; - } -} -#endif - /* Remap pci resources described by bar #pci_bar in uio resource n. */ static int igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, @@ -356,9 +472,6 @@ static int igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct rte_uio_pci_dev *udev; -#ifdef HAVE_PCI_ENABLE_MSIX - struct msix_entry msix_entry; -#endif dma_addr_t map_dma_addr; void *map_addr; int err; @@ -401,61 +514,12 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* fill uio infos */ udev->info.name = "igb_uio"; udev->info.version = "0.1"; - udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol; udev->info.open = igbuio_pci_open; udev->info.release = igbuio_pci_release; -#ifdef CONFIG_XEN_DOM0 - /* check if the driver run on Xen Dom0 */ - if (xen_initial_domain()) - udev->info.mmap = igbuio_dom0_pci_mmap; -#endif udev->info.priv = udev; udev->pdev = dev; - switch (igbuio_intr_mode_preferred) { - case RTE_INTR_MODE_MSIX: - /* Only 1 msi-x vector needed */ -#ifdef HAVE_PCI_ENABLE_MSIX - msix_entry.entry = 0; - if (pci_enable_msix(dev, &msix_entry, 1) == 0) { - dev_dbg(&dev->dev, "using MSI-X"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = msix_entry.vector; - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#else - if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) { - dev_dbg(&dev->dev, "using MSI-X"); - udev->info.irq = pci_irq_vector(dev, 0); - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#endif - /* fall back to INTX */ - case RTE_INTR_MODE_LEGACY: - if (pci_intx_mask_supported(dev)) { - dev_dbg(&dev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; - udev->info.irq = dev->irq; - udev->mode = RTE_INTR_MODE_LEGACY; - break; - } - dev_notice(&dev->dev, "PCI INTX mask not supported\n"); - /* fall back to no IRQ */ - case RTE_INTR_MODE_NONE: - udev->mode = RTE_INTR_MODE_NONE; - udev->info.irq = 0; - break; - - default: - dev_err(&dev->dev, "invalid IRQ mode %u", - igbuio_intr_mode_preferred); - err = -EINVAL; - goto fail_release_iomem; - } - err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); if (err != 0) goto fail_release_iomem; @@ -467,9 +531,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) pci_set_drvdata(dev, udev); - dev_info(&dev->dev, "uio device registered with irq %lx\n", - udev->info.irq); - /* * Doing a harmless dma mapping for attaching the device to * the iommu identity mapping if kernel boots with iommu=pt. @@ -497,8 +558,6 @@ fail_remove_group: sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); fail_release_iomem: igbuio_pci_release_iomem(&udev->info); - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(udev->pdev); pci_disable_device(dev); fail_free: kfree(udev); @@ -514,8 +573,6 @@ igbuio_pci_remove(struct pci_dev *dev) sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); uio_unregister_device(&udev->info); igbuio_pci_release_iomem(&udev->info); - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(dev); pci_disable_device(dev); pci_set_drvdata(dev, NULL); kfree(udev); @@ -532,6 +589,9 @@ igbuio_config_intr_mode(char *intr_str) if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; pr_info("Use MSIX interrupt\n"); + } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI; + pr_info("Use MSI interrupt\n"); } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; pr_info("Use legacy interrupt\n"); @@ -575,6 +635,7 @@ module_param(intr_mode, charp, S_IRUGO); MODULE_PARM_DESC(intr_mode, "igb_uio interrupt mode (default=msix):\n" " " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" +" " RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n" " " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" "\n"); diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 6a1587b4..3f8c0bc8 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -8,6 +8,34 @@ #define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) #endif +/* SuSE version macro is the same as Linux kernel version */ +#ifndef SLE_VERSION +#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c) +#endif +#ifdef CONFIG_SUSE_KERNEL +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) +/* SLES12SP3 is at least 4.4.57+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28)) +/* SLES12 is at least 3.12.28+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0) +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0))) +/* SLES11 SP3 is at least 3.0.61+ based */ +#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0) +#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0) +#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27)) +/* SLES11 GA is 2.6.27 based */ +#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0) +#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ +#endif /* CONFIG_SUSE_KERNEL */ +#ifndef SLE_VERSION_CODE +#define SLE_VERSION_CODE 0 +#endif /* SLE_VERSION_CODE */ + + #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ (!(defined(RHEL_RELEASE_CODE) && \ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) @@ -55,7 +83,8 @@ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \ (defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0)) #define HAVE_TRANS_START_HELPER #endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index e0a03542..e38a7561 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -697,22 +697,22 @@ struct _kc_ethtool_pauseparam { #define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) #endif #ifdef CONFIG_SUSE_KERNEL -#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) -/* SLES11 GA is 2.6.27 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,0,0) -#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) -/* SLES11 SP1 is 2.6.32 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) +/* SLES12SP3 is at least 4.4.57+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) +#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) +/* SLES12 is at least 3.12.28+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12,0,0) #elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \ (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0))) /* SLES11 SP3 is at least 3.0.61+ based */ #define SLE_VERSION_CODE SLE_VERSION(11,3,0) -#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) -/* SLES12 is at least 3.12.28+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12,0,0) -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) -/* SLES12SP3 is at least 4.4.57+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) +/* SLES11 GA is 2.6.27 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,0,0) #endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ #endif /* CONFIG_SUSE_KERNEL */ #ifndef SLE_VERSION_CODE diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h deleted file mode 100644 index e6eb97f2..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/compat.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Minimal wrappers to allow compiling xen_dom0 on older kernels. - */ - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) - -#define kstrtoul strict_strtoul - -#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h deleted file mode 100644 index 9d5ffb22..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h +++ /dev/null @@ -1,107 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#ifndef _DOM0_MM_DEV_H_ -#define _DOM0_MM_DEV_H_ - -#include <linux/wait.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <exec-env/rte_dom0_common.h> - -#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ -#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ -#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE) -#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1) -#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/ - -/** - * A structure describing the private information for a dom0 device. - */ -struct dom0_mm_dev { - struct miscdevice miscdev; - uint8_t fail_times; - uint32_t used_memsize; - uint32_t num_mem_ctx; - uint32_t config_memsize; - uint32_t num_bigblock; - struct dom0_mm_data *mm_data[NUM_MEM_CTX]; - struct mutex data_lock; -}; - -struct dom0_mm_data{ - uint32_t refcnt; - uint32_t num_memseg; /**< Number of memory segment. */ - uint32_t mem_size; /**< Size of requesting memory. */ - - char name[DOM0_NAME_MAX]; - - /** Store global memory block IDs used by an instance */ - uint32_t block_num[DOM0_NUM_MEMBLOCK]; - - /** Store memory block information.*/ - struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; - - /** Store memory segment information.*/ - struct memseg_info seg_info[DOM0_NUM_MEMSEG]; -}; - -#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) -#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) -#endif diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c deleted file mode 100644 index 79630bad..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c +++ /dev/null @@ -1,780 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/module.h> -#include <linux/miscdevice.h> -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/errno.h> -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/version.h> - -#include <xen/xen.h> -#include <xen/page.h> -#include <xen/xen-ops.h> -#include <xen/interface/memory.h> - -#include <exec-env/rte_dom0_common.h> - -#include "compat.h" -#include "dom0_mm_dev.h" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0"); - -static struct dom0_mm_dev dom0_dev; -static struct kobject *dom0_kobj = NULL; - -static struct memblock_info *rsv_mm_info; - -/* Default configuration for reserved memory size(2048 MB). */ -static uint32_t rsv_memsize = 2048; - -static int dom0_open(struct inode *inode, struct file *file); -static int dom0_release(struct inode *inode, struct file *file); -static int dom0_ioctl(struct file *file, unsigned int ioctl_num, - unsigned long ioctl_param); -static int dom0_mmap(struct file *file, struct vm_area_struct *vma); -static int dom0_memory_free(uint32_t size); -static int dom0_memory_release(struct dom0_mm_data *mm_data); - -static const struct file_operations data_fops = { - .owner = THIS_MODULE, - .open = dom0_open, - .release = dom0_release, - .mmap = dom0_mmap, - .unlocked_ioctl = (void *)dom0_ioctl, -}; - -static ssize_t -show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf) -{ - return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize); -} - -static ssize_t -show_memsize(struct device *dev, struct device_attribute *attr, char *buf) -{ - return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize); -} - -static ssize_t -store_memsize(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - int err = 0; - unsigned long mem_size; - - if (0 != kstrtoul(buf, 0, &mem_size)) - return -EINVAL; - - mutex_lock(&dom0_dev.data_lock); - if (0 == mem_size) { - err = -EINVAL; - goto fail; - } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) { - XEN_ERR("configure memory size fail\n"); - err = -EINVAL; - goto fail; - } else - dom0_dev.config_memsize = mem_size; - -fail: - mutex_unlock(&dom0_dev.data_lock); - return err ? err : count; -} - -static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize); -static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL); - -static struct attribute *dev_attrs[] = { - &dev_attr_memsize.attr, - &dev_attr_memsize_rsvd.attr, - NULL, -}; - -/* the memory size unit is MB */ -static const struct attribute_group dev_attr_grp = { - .name = "memsize-mB", - .attrs = dev_attrs, -}; - - -static void -sort_viraddr(struct memblock_info *mb, int cnt) -{ - int i,j; - uint64_t tmp_pfn; - uint64_t tmp_viraddr; - - /*sort virtual address and pfn */ - for(i = 0; i < cnt; i ++) { - for(j = cnt - 1; j > i; j--) { - if(mb[j].pfn < mb[j - 1].pfn) { - tmp_pfn = mb[j - 1].pfn; - mb[j - 1].pfn = mb[j].pfn; - mb[j].pfn = tmp_pfn; - - tmp_viraddr = mb[j - 1].vir_addr; - mb[j - 1].vir_addr = mb[j].vir_addr; - mb[j].vir_addr = tmp_viraddr; - } - } - } -} - -static int -dom0_find_memdata(const char * mem_name) -{ - unsigned i; - int idx = -1; - for(i = 0; i< NUM_MEM_CTX; i++) { - if(dom0_dev.mm_data[i] == NULL) - continue; - if (!strncmp(dom0_dev.mm_data[i]->name, mem_name, - sizeof(char) * DOM0_NAME_MAX)) { - idx = i; - break; - } - } - - return idx; -} - -static int -dom0_find_mempos(void) -{ - unsigned i; - int idx = -1; - - for(i = 0; i< NUM_MEM_CTX; i++) { - if(dom0_dev.mm_data[i] == NULL){ - idx = i; - break; - } - } - - return idx; -} - -static int -dom0_memory_release(struct dom0_mm_data *mm_data) -{ - int idx; - uint32_t num_block, block_id; - - /* each memory block is 2M */ - num_block = mm_data->mem_size / SIZE_PER_BLOCK; - if (num_block == 0) - return -EINVAL; - - /* reset global memory data */ - idx = dom0_find_memdata(mm_data->name); - if (idx >= 0) { - dom0_dev.used_memsize -= mm_data->mem_size; - dom0_dev.mm_data[idx] = NULL; - dom0_dev.num_mem_ctx--; - } - - /* reset these memory blocks status as free */ - for (idx = 0; idx < num_block; idx++) { - block_id = mm_data->block_num[idx]; - rsv_mm_info[block_id].used = 0; - } - - memset(mm_data, 0, sizeof(struct dom0_mm_data)); - vfree(mm_data); - return 0; -} - -static int -dom0_memory_free(uint32_t rsv_size) -{ - uint64_t vstart, vaddr; - uint32_t i, num_block, size; - - if (!xen_pv_domain()) - return -1; - - /* each memory block is 2M */ - num_block = rsv_size / SIZE_PER_BLOCK; - if (num_block == 0) - return -EINVAL; - - /* free all memory blocks of size of 4M and destroy contiguous region */ - for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) { - vstart = rsv_mm_info[i].vir_addr; - if (vstart) { - #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) - if (rsv_mm_info[i].exchange_flag) - xen_destroy_contiguous_region(vstart, - DOM0_CONTIG_NUM_ORDER); - if (rsv_mm_info[i + 1].exchange_flag) - xen_destroy_contiguous_region(vstart + - DOM0_MEMBLOCK_SIZE, - DOM0_CONTIG_NUM_ORDER); - #else - if (rsv_mm_info[i].exchange_flag) - xen_destroy_contiguous_region(rsv_mm_info[i].pfn - * PAGE_SIZE, - DOM0_CONTIG_NUM_ORDER); - if (rsv_mm_info[i + 1].exchange_flag) - xen_destroy_contiguous_region(rsv_mm_info[i].pfn - * PAGE_SIZE + DOM0_MEMBLOCK_SIZE, - DOM0_CONTIG_NUM_ORDER); - #endif - - size = DOM0_MEMBLOCK_SIZE * 2; - vaddr = vstart; - while (size > 0) { - ClearPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - free_pages(vstart, MAX_NUM_ORDER); - } - } - - /* free all memory blocks size of 2M and destroy contiguous region */ - for (; i < num_block; i++) { - vstart = rsv_mm_info[i].vir_addr; - if (vstart) { - if (rsv_mm_info[i].exchange_flag) - xen_destroy_contiguous_region(vstart, - DOM0_CONTIG_NUM_ORDER); - - size = DOM0_MEMBLOCK_SIZE; - vaddr = vstart; - while (size > 0) { - ClearPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - free_pages(vstart, DOM0_CONTIG_NUM_ORDER); - } - } - - memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); - vfree(rsv_mm_info); - rsv_mm_info = NULL; - - return 0; -} - -static void -find_free_memory(uint32_t count, struct dom0_mm_data *mm_data) -{ - uint32_t i = 0; - uint32_t j = 0; - - while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) { - if (rsv_mm_info[j].used == 0) { - mm_data->block_info[i].pfn = rsv_mm_info[j].pfn; - mm_data->block_info[i].vir_addr = - rsv_mm_info[j].vir_addr; - mm_data->block_info[i].mfn = rsv_mm_info[j].mfn; - mm_data->block_info[i].exchange_flag = - rsv_mm_info[j].exchange_flag; - mm_data->block_num[i] = j; - rsv_mm_info[j].used = 1; - i++; - } - j++; - } -} - -/** - * Find all memory segments in which physical addresses are contiguous. - */ -static void -find_memseg(int count, struct dom0_mm_data * mm_data) -{ - int i = 0; - int j, k, idx = 0; - uint64_t zone_len, pfn, num_block; - - while(i < count) { - if (mm_data->block_info[i].exchange_flag == 0) { - i++; - continue; - } - k = 0; - pfn = mm_data->block_info[i].pfn; - mm_data->seg_info[idx].pfn = pfn; - mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn; - - for (j = i + 1; j < count; j++) { - - /* ignore exchange fail memory block */ - if (mm_data->block_info[j].exchange_flag == 0) - break; - - if (mm_data->block_info[j].pfn != - (mm_data->block_info[j - 1].pfn + - DOM0_MEMBLOCK_SIZE / PAGE_SIZE)) - break; - ++k; - mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn; - } - - num_block = j - i; - zone_len = num_block * DOM0_MEMBLOCK_SIZE; - mm_data->seg_info[idx].size = zone_len; - - XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len); - i = i+ num_block; - idx++; - if (idx == DOM0_NUM_MEMSEG) - break; - } - mm_data->num_memseg = idx; -} - -static int -dom0_memory_reserve(uint32_t rsv_size) -{ - uint64_t pfn, vstart, vaddr; - uint32_t i, num_block, size, allocated_size = 0; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) - dma_addr_t dma_handle; -#endif - - /* 2M as memory block */ - num_block = rsv_size / SIZE_PER_BLOCK; - - rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block); - if (!rsv_mm_info) { - XEN_ERR("Unable to allocate device memory information\n"); - return -ENOMEM; - } - memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); - - /* try alloc size of 4M once */ - for (i = 0; i < num_block; i += 2) { - vstart = (unsigned long) - __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER); - if (vstart == 0) - break; - - dom0_dev.num_bigblock = i / 2 + 1; - allocated_size = SIZE_PER_BLOCK * (i + 2); - - /* size of 4M */ - size = DOM0_MEMBLOCK_SIZE * 2; - - vaddr = vstart; - while (size > 0) { - SetPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - - pfn = virt_to_pfn(vstart); - rsv_mm_info[i].pfn = pfn; - rsv_mm_info[i].vir_addr = vstart; - rsv_mm_info[i + 1].pfn = - pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE; - rsv_mm_info[i + 1].vir_addr = - vstart + DOM0_MEMBLOCK_SIZE; - } - - /*if it failed to alloc 4M, and continue to alloc 2M once */ - for (; i < num_block; i++) { - vstart = (unsigned long) - __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER); - if (vstart == 0) { - XEN_ERR("allocate memory fail.\n"); - dom0_memory_free(allocated_size); - return -ENOMEM; - } - - allocated_size += SIZE_PER_BLOCK; - - size = DOM0_MEMBLOCK_SIZE; - vaddr = vstart; - while (size > 0) { - SetPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - pfn = virt_to_pfn(vstart); - rsv_mm_info[i].pfn = pfn; - rsv_mm_info[i].vir_addr = vstart; - } - - sort_viraddr(rsv_mm_info, num_block); - - for (i = 0; i< num_block; i++) { - - /* - * This API is used to exchage MFN for getting a block of - * contiguous physical addresses, its maximum size is 2M. - */ - #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) - if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr, - DOM0_CONTIG_NUM_ORDER, 0) == 0) { - #else - if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE, - DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) { - #endif - rsv_mm_info[i].exchange_flag = 1; - rsv_mm_info[i].mfn = - pfn_to_mfn(rsv_mm_info[i].pfn); - rsv_mm_info[i].used = 0; - } else { - XEN_ERR("exchange memeory fail\n"); - rsv_mm_info[i].exchange_flag = 0; - dom0_dev.fail_times++; - if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) { - dom0_memory_free(rsv_size); - return -EFAULT; - } - } - } - - return 0; -} - -static int -dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data) -{ - uint32_t num_block; - int idx; - - /* check if there is a free name buffer */ - memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX); - mm_data->name[DOM0_NAME_MAX - 1] = '\0'; - idx = dom0_find_mempos(); - if (idx < 0) - return -1; - - num_block = meminfo->size / SIZE_PER_BLOCK; - /* find free memory and new memory segments*/ - find_free_memory(num_block, mm_data); - find_memseg(num_block, mm_data); - - /* update private memory data */ - mm_data->refcnt++; - mm_data->mem_size = meminfo->size; - - /* update global memory data */ - dom0_dev.mm_data[idx] = mm_data; - dom0_dev.num_mem_ctx++; - dom0_dev.used_memsize += mm_data->mem_size; - - return 0; -} - -static int -dom0_check_memory (struct memory_info *meminfo) -{ - int idx; - uint64_t mem_size; - - /* round memory size to the next even number. */ - if (meminfo->size % 2) - ++meminfo->size; - - mem_size = meminfo->size; - if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) { - XEN_ERR("Memory data space is full in Dom0 driver\n"); - return -1; - } - idx = dom0_find_memdata(meminfo->name); - if (idx >= 0) { - XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n", - meminfo->name); - return -1; - } - if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) { - XEN_ERR("Total size can't be larger than reserved size.\n"); - return -1; - } - - return 0; -} - -static int __init -dom0_init(void) -{ - if (!xen_domain()) - return -ENODEV; - - if (rsv_memsize > DOM0_CONFIG_MEMSIZE) { - XEN_ERR("The reserved memory size cannot be greater than %d\n", - DOM0_CONFIG_MEMSIZE); - return -EINVAL; - } - - /* Setup the misc device */ - dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR; - dom0_dev.miscdev.name = "dom0_mm"; - dom0_dev.miscdev.fops = &data_fops; - - /* register misc char device */ - if (misc_register(&dom0_dev.miscdev) != 0) { - XEN_ERR("Misc device registration failed\n"); - return -EPERM; - } - - mutex_init(&dom0_dev.data_lock); - dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj); - - if (!dom0_kobj) { - XEN_ERR("dom0-mm object creation failed\n"); - misc_deregister(&dom0_dev.miscdev); - return -ENOMEM; - } - - if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) { - kobject_put(dom0_kobj); - misc_deregister(&dom0_dev.miscdev); - return -EPERM; - } - - if (dom0_memory_reserve(rsv_memsize) < 0) { - sysfs_remove_group(dom0_kobj, &dev_attr_grp); - kobject_put(dom0_kobj); - misc_deregister(&dom0_dev.miscdev); - return -ENOMEM; - } - - XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n"); - - return 0; -} - -static void __exit -dom0_exit(void) -{ - if (rsv_mm_info != NULL) - dom0_memory_free(rsv_memsize); - - sysfs_remove_group(dom0_kobj, &dev_attr_grp); - kobject_put(dom0_kobj); - misc_deregister(&dom0_dev.miscdev); - - XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n"); -} - -static int -dom0_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - - XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n"); - return 0; -} - -static int -dom0_release(struct inode *inode, struct file *file) -{ - int ret = 0; - struct dom0_mm_data *mm_data = file->private_data; - - if (mm_data == NULL) - return ret; - - mutex_lock(&dom0_dev.data_lock); - if (--mm_data->refcnt == 0) - ret = dom0_memory_release(mm_data); - mutex_unlock(&dom0_dev.data_lock); - - file->private_data = NULL; - XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n"); - return ret; -} - -static int -dom0_mmap(struct file *file, struct vm_area_struct *vm) -{ - int status = 0; - uint32_t idx = vm->vm_pgoff; - uint64_t pfn, size = vm->vm_end - vm->vm_start; - struct dom0_mm_data *mm_data = file->private_data; - - if(mm_data == NULL) - return -EINVAL; - - mutex_lock(&dom0_dev.data_lock); - if (idx >= mm_data->num_memseg) { - mutex_unlock(&dom0_dev.data_lock); - return -EINVAL; - } - - if (size > mm_data->seg_info[idx].size){ - mutex_unlock(&dom0_dev.data_lock); - return -EINVAL; - } - - XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size); - - pfn = mm_data->seg_info[idx].pfn; - mutex_unlock(&dom0_dev.data_lock); - - status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED); - - return status; -} -static int -dom0_ioctl(struct file *file, - unsigned int ioctl_num, - unsigned long ioctl_param) -{ - int idx, ret; - char name[DOM0_NAME_MAX] = {0}; - struct memory_info meminfo; - struct dom0_mm_data *mm_data = file->private_data; - - XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param); - - /** - * Switch according to the ioctl called - */ - switch _IOC_NR(ioctl_num) { - case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG): - ret = copy_from_user(&meminfo, (void *)ioctl_param, - sizeof(struct memory_info)); - if (ret) - return -EFAULT; - - if (mm_data != NULL) { - XEN_ERR("Cannot create memory segment for the same" - " file descriptor\n"); - return -EINVAL; - } - - /* Allocate private data */ - mm_data = vmalloc(sizeof(struct dom0_mm_data)); - if (!mm_data) { - XEN_ERR("Unable to allocate device private data\n"); - return -ENOMEM; - } - memset(mm_data, 0, sizeof(struct dom0_mm_data)); - - mutex_lock(&dom0_dev.data_lock); - /* check if we can allocate memory*/ - if (dom0_check_memory(&meminfo) < 0) { - mutex_unlock(&dom0_dev.data_lock); - vfree(mm_data); - return -EINVAL; - } - - /* allocate memory and created memory segments*/ - if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) { - XEN_ERR("create memory segment fail.\n"); - mutex_unlock(&dom0_dev.data_lock); - return -EIO; - } - - file->private_data = mm_data; - mutex_unlock(&dom0_dev.data_lock); - break; - - /* support multiple process in term of memory mapping*/ - case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG): - ret = copy_from_user(name, (void *)ioctl_param, - sizeof(char) * DOM0_NAME_MAX); - if (ret) - return -EFAULT; - - mutex_lock(&dom0_dev.data_lock); - idx = dom0_find_memdata(name); - if (idx < 0) { - mutex_unlock(&dom0_dev.data_lock); - return -EINVAL; - } - - mm_data = dom0_dev.mm_data[idx]; - mm_data->refcnt++; - file->private_data = mm_data; - mutex_unlock(&dom0_dev.data_lock); - break; - - case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG): - ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg, - sizeof(int)); - if (ret) - return -EFAULT; - break; - - case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO): - ret = copy_to_user((void *)ioctl_param, - &mm_data->seg_info[0], - sizeof(struct memseg_info) * - mm_data->num_memseg); - if (ret) - return -EFAULT; - break; - default: - XEN_PRINT("IOCTL default \n"); - break; - } - - return 0; -} - -module_init(dom0_init); -module_exit(dom0_exit); - -module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n"); diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 3a8f1540..f4f46c1b 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -44,8 +44,6 @@ DPDK_2.0 { rte_free; rte_get_hpet_cycles; rte_get_hpet_hz; - rte_get_log_level; - rte_get_log_type; rte_get_tsc_hz; rte_hexdump; rte_intr_callback_register; @@ -62,9 +60,7 @@ DPDK_2.0 { rte_malloc_set_limit; rte_malloc_socket; rte_malloc_validate; - rte_malloc_virt2phy; rte_mem_lock_page; - rte_mem_phy2mch; rte_mem_virt2phy; rte_memdump; rte_memory_get_nchannel; @@ -78,8 +74,6 @@ DPDK_2.0 { rte_openlog_stream; rte_realloc; rte_set_application_usage_hook; - rte_set_log_level; - rte_set_log_type; rte_socket_id; rte_strerror; rte_strsplit; @@ -87,8 +81,6 @@ DPDK_2.0 { rte_thread_get_affinity; rte_thread_set_affinity; rte_vlog; - rte_xen_dom0_memory_attach; - rte_xen_dom0_memory_init; rte_zmalloc; rte_zmalloc_socket; @@ -118,8 +110,6 @@ DPDK_2.2 { rte_keepalive_dispatch_pings; rte_keepalive_mark_alive; rte_keepalive_register_core; - rte_xen_dom0_supported; - rte_xen_mem_phy2mch; } DPDK_2.1; @@ -134,7 +124,6 @@ DPDK_16.04 { DPDK_16.07 { global: - pci_get_sysfs_path; rte_keepalive_mark_sleep; rte_keepalive_register_relay_callback; rte_rtm_supported; @@ -174,25 +163,6 @@ DPDK_17.05 { rte_log_set_global_level; rte_log_set_level; rte_log_set_level_regexp; - rte_pci_detach; - rte_pci_dump; - rte_pci_ioport_map; - rte_pci_ioport_read; - rte_pci_ioport_unmap; - rte_pci_ioport_write; - rte_pci_map_device; - rte_pci_probe; - rte_pci_probe_one; - rte_pci_read_config; - rte_pci_register; - rte_pci_scan; - rte_pci_unmap_device; - rte_pci_unregister; - rte_pci_write_config; - rte_vdev_init; - rte_vdev_register; - rte_vdev_uninit; - rte_vdev_unregister; vfio_get_container_fd; vfio_get_group_fd; vfio_get_group_no; @@ -209,6 +179,27 @@ DPDK_17.08 { } DPDK_17.05; +DPDK_17.11 { + global: + + rte_eal_create_uio_dev; + rte_bus_get_iommu_class; + rte_eal_has_pci; + rte_eal_iova_mode; + rte_eal_mbuf_default_mempool_ops; + rte_eal_using_phys_addrs; + rte_eal_vfio_intr_mode; + rte_lcore_has_role; + rte_malloc_virt2iova; + rte_mem_virt2iova; + rte_vfio_enable; + rte_vfio_is_enabled; + rte_vfio_noiommu_is_enabled; + rte_vfio_release_device; + rte_vfio_setup_device; + +} DPDK_17.08; + EXPERIMENTAL { global: @@ -217,28 +208,31 @@ EXPERIMENTAL { rte_eal_devargs_remove; rte_eal_hotplug_add; rte_eal_hotplug_remove; - rte_service_disable_on_lcore; + rte_service_component_register; + rte_service_component_unregister; + rte_service_component_runstate_set; rte_service_dump; - rte_service_enable_on_lcore; rte_service_get_by_id; rte_service_get_by_name; rte_service_get_count; - rte_service_get_enabled_on_lcore; - rte_service_is_running; + rte_service_get_name; rte_service_lcore_add; rte_service_lcore_count; + rte_service_lcore_count_services; rte_service_lcore_del; rte_service_lcore_list; rte_service_lcore_reset_all; rte_service_lcore_start; rte_service_lcore_stop; + rte_service_map_lcore_get; + rte_service_map_lcore_set; rte_service_probe_capability; - rte_service_register; rte_service_reset; + rte_service_run_iter_on_app_lcore; + rte_service_runstate_get; + rte_service_runstate_set; + rte_service_set_runstate_mapped_check; rte_service_set_stats_enable; - rte_service_start; rte_service_start_with_defaults; - rte_service_stop; - rte_service_unregister; -} DPDK_17.08; +} DPDK_17.11; diff --git a/lib/librte_efd/Makefile b/lib/librte_efd/Makefile index b9277bc5..16e450e8 100644 --- a/lib/librte_efd/Makefile +++ b/lib/librte_efd/Makefile @@ -36,6 +36,7 @@ LIB = librte_efd.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_ring -lrte_hash EXPORT_MAP := rte_efd_version.map diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c index 4d9a0887..8771d042 100644 --- a/lib/librte_efd/rte_efd.c +++ b/lib/librte_efd/rte_efd.c @@ -42,7 +42,6 @@ #include <rte_eal_memconfig.h> #include <rte_errno.h> #include <rte_malloc.h> -#include <rte_memzone.h> #include <rte_prefetch.h> #include <rte_branch_prediction.h> #include <rte_memcpy.h> @@ -1278,7 +1277,7 @@ efd_lookup_internal(const struct efd_online_group_entry * const group, switch (lookup_fn) { -#if defined(RTE_ARCH_X86) +#if defined(RTE_ARCH_X86) && defined(CC_SUPPORT_AVX2) case EFD_LOOKUP_AVX2: return efd_lookup_internal_avx2(group->hash_idx, group->lookup_table, diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile index db692ae4..394cc9c0 100644 --- a/lib/librte_ether/Makefile +++ b/lib/librte_ether/Makefile @@ -38,14 +38,18 @@ LIB = librte_ethdev.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring +LDLIBS += -lrte_mbuf -EXPORT_MAP := rte_ether_version.map +EXPORT_MAP := rte_ethdev_version.map -LIBABIVER := 6 +LIBABIVER := 8 SRCS-y += rte_ethdev.c SRCS-y += rte_flow.c SRCS-y += rte_tm.c +SRCS-y += rte_mtr.c +SRCS-y += ethdev_profile.c # # Export include files @@ -59,5 +63,7 @@ SYMLINK-y-include += rte_flow.h SYMLINK-y-include += rte_flow_driver.h SYMLINK-y-include += rte_tm.h SYMLINK-y-include += rte_tm_driver.h +SYMLINK-y-include += rte_mtr.h +SYMLINK-y-include += rte_mtr_driver.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ether/ethdev_profile.c b/lib/librte_ether/ethdev_profile.c new file mode 100644 index 00000000..c9cb8420 --- /dev/null +++ b/lib/librte_ether/ethdev_profile.c @@ -0,0 +1,164 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ethdev_profile.h" + +/** + * This conditional block enables RX queues profiling by tracking wasted + * iterations, i.e. iterations which yielded no RX packets. Profiling is + * performed using the Instrumentation and Tracing Technology (ITT) API, + * employed by the Intel (R) VTune (TM) Amplifier. + */ +#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS + +#include <ittnotify.h> + +#define ITT_MAX_NAME_LEN (100) + +/** + * Auxiliary ITT structure belonging to Ethernet device and using to: + * - track RX queue state to determine whether it is wasting loop iterations + * - begin or end ITT task using task domain and task name (handle) + */ +struct itt_profile_rx_data { + /** + * ITT domains for each queue. + */ + __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT]; + /** + * ITT task names for each queue. + */ + __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT]; + /** + * Flags indicating the queues state. Possible values: + * 1 - queue is wasting iterations, + * 0 - otherwise. + */ + uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT]; +}; + +/** + * The pool of *itt_profile_rx_data* structures. + */ +struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS]; + + +/** + * This callback function manages ITT tasks collection on given port and queue. + * It must be registered with rte_eth_add_rx_callback() to be called from + * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function + * type declaration. + */ +static uint16_t +collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id, + __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, __rte_unused void *user_param) +{ + if (unlikely(nb_pkts == 0)) { + if (!itt_rx_data[port_id].queue_state[queue_id]) { + __itt_task_begin( + itt_rx_data[port_id].domains[queue_id], + __itt_null, __itt_null, + itt_rx_data[port_id].handles[queue_id]); + itt_rx_data[port_id].queue_state[queue_id] = 1; + } + } else { + if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) { + __itt_task_end( + itt_rx_data[port_id].domains[queue_id]); + itt_rx_data[port_id].queue_state[queue_id] = 0; + } + } + return nb_pkts; +} + +/** + * Initialization of itt_profile_rx_data for a given Ethernet device. + * This function must be invoked when ethernet device is being configured. + * Result will be stored in the global array *itt_rx_data*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param port_name + * The name of the Ethernet device. + * @param rx_queue_num + * The number of RX queues on specified port. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static inline int +itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num) +{ + uint16_t q_id; + + for (q_id = 0; q_id < rx_queue_num; ++q_id) { + char domain_name[ITT_MAX_NAME_LEN]; + + snprintf(domain_name, sizeof(domain_name), + "RXBurst.WastedIterations.Port_%s.Queue_%d", + port_name, q_id); + itt_rx_data[port_id].domains[q_id] + = __itt_domain_create(domain_name); + + char task_name[ITT_MAX_NAME_LEN]; + + snprintf(task_name, sizeof(task_name), + "port id: %d; queue id: %d", + port_id, q_id); + itt_rx_data[port_id].handles[q_id] + = __itt_string_handle_create(task_name); + + itt_rx_data[port_id].queue_state[q_id] = 0; + + if (!rte_eth_add_rx_callback( + port_id, q_id, collect_itt_rx_burst_cb, NULL)) { + return -rte_errno; + } + } + + return 0; +} +#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */ + +int +__rte_eth_profile_rx_init(__rte_unused uint16_t port_id, + __rte_unused struct rte_eth_dev *dev) +{ +#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS + return itt_profile_rx_init( + port_id, dev->data->name, dev->data->nb_rx_queues); +#endif + return 0; +} diff --git a/lib/librte_ether/ethdev_profile.h b/lib/librte_ether/ethdev_profile.h new file mode 100644 index 00000000..697facff --- /dev/null +++ b/lib/librte_ether/ethdev_profile.h @@ -0,0 +1,56 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_PROFILE_H_ +#define _RTE_ETHDEV_PROFILE_H_ + +#include "rte_ethdev.h" + +/** + * Initialization of profiling RX queues for the Ethernet device. + * Implementation of this function depends on chosen profiling method, + * defined in configs. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev + * Pointer to struct rte_eth_dev corresponding to given port_id. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev); + +#endif diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 0597641e..318af286 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -47,7 +47,6 @@ #include <rte_log.h> #include <rte_debug.h> #include <rte_interrupts.h> -#include <rte_pci.h> #include <rte_memory.h> #include <rte_memcpy.h> #include <rte_memzone.h> @@ -67,6 +66,7 @@ #include "rte_ether.h" #include "rte_ethdev.h" +#include "ethdev_profile.h" static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; @@ -138,8 +138,8 @@ enum { STAT_QMAP_RX }; -uint8_t -rte_eth_find_next(uint8_t port_id) +uint16_t +rte_eth_find_next(uint16_t port_id) { while (port_id < RTE_MAX_ETHPORTS && rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED) @@ -178,16 +178,14 @@ rte_eth_dev_allocated(const char *name) unsigned i; for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if (rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED && - rte_eth_devices[i].device) { - if (!strcmp(rte_eth_devices[i].device->name, name)) - return &rte_eth_devices[i]; - } + if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) && + strcmp(rte_eth_devices[i].data->name, name) == 0) + return &rte_eth_devices[i]; } return NULL; } -static uint8_t +static uint16_t rte_eth_dev_find_free_port(void) { unsigned i; @@ -200,7 +198,7 @@ rte_eth_dev_find_free_port(void) } static struct rte_eth_dev * -eth_dev_get(uint8_t port_id) +eth_dev_get(uint16_t port_id) { struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; @@ -216,7 +214,7 @@ eth_dev_get(uint8_t port_id) struct rte_eth_dev * rte_eth_dev_allocate(const char *name) { - uint8_t port_id; + uint16_t port_id; struct rte_eth_dev *eth_dev; port_id = rte_eth_dev_find_free_port(); @@ -251,7 +249,7 @@ rte_eth_dev_allocate(const char *name) struct rte_eth_dev * rte_eth_dev_attach_secondary(const char *name) { - uint8_t i; + uint16_t i; struct rte_eth_dev *eth_dev; if (rte_eth_dev_data == NULL) @@ -285,7 +283,7 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) } int -rte_eth_dev_is_valid_port(uint8_t port_id) +rte_eth_dev_is_valid_port(uint16_t port_id) { if (port_id >= RTE_MAX_ETHPORTS || (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && @@ -296,17 +294,24 @@ rte_eth_dev_is_valid_port(uint8_t port_id) } int -rte_eth_dev_socket_id(uint8_t port_id) +rte_eth_dev_socket_id(uint16_t port_id) { RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); return rte_eth_devices[port_id].data->numa_node; } -uint8_t +void * +rte_eth_dev_get_sec_ctx(uint8_t port_id) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + return rte_eth_devices[port_id].security_ctx; +} + +uint16_t rte_eth_dev_count(void) { - uint8_t p; - uint8_t count; + uint16_t p; + uint16_t count; count = 0; @@ -317,9 +322,9 @@ rte_eth_dev_count(void) } int -rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) +rte_eth_dev_get_name_by_port(uint16_t port_id, char *name) { - const char *tmp; + char *tmp; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -330,15 +335,14 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) /* shouldn't check 'rte_eth_devices[i].data', * because it might be overwritten by VDEV PMD */ - tmp = rte_eth_devices[port_id].device->name; + tmp = rte_eth_dev_data[port_id].name; strcpy(name, tmp); return 0; } int -rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) +rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id) { - int ret; int i; if (name == NULL) { @@ -347,37 +351,20 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) } RTE_ETH_FOREACH_DEV(i) { - if (!rte_eth_devices[i].device) - continue; + if (!strncmp(name, + rte_eth_dev_data[i].name, strlen(name))) { - ret = strncmp(name, rte_eth_devices[i].device->name, - strlen(name)); - if (ret == 0) { *port_id = i; + return 0; } } return -ENODEV; } -static int -rte_eth_dev_is_detachable(uint8_t port_id) -{ - uint32_t dev_flags; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev_flags = rte_eth_devices[port_id].data->dev_flags; - if ((dev_flags & RTE_ETH_DEV_DETACHABLE) && - (!(dev_flags & RTE_ETH_DEV_BONDED_SLAVE))) - return 0; - else - return 1; -} - /* attach the new device, then store port_id of the device */ int -rte_eth_dev_attach(const char *devargs, uint8_t *port_id) +rte_eth_dev_attach(const char *devargs, uint16_t *port_id) { int ret = -1; int current = rte_eth_dev_count(); @@ -423,21 +410,28 @@ err: /* detach the device, then store the name of the device */ int -rte_eth_dev_detach(uint8_t port_id, char *name) +rte_eth_dev_detach(uint16_t port_id, char *name) { + uint32_t dev_flags; int ret = -1; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (name == NULL) { ret = -EINVAL; goto err; } - /* FIXME: move this to eal, once device flags are relocated there */ - if (rte_eth_dev_is_detachable(port_id)) + dev_flags = rte_eth_devices[port_id].data->dev_flags; + if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) { + RTE_LOG(ERR, EAL, "Port %" PRIu16 " is bonded, cannot detach\n", + port_id); + ret = -ENOTSUP; goto err; + } - snprintf(name, RTE_DEV_NAME_MAX_LEN, "%s", - rte_eth_devices[port_id].device->name); + snprintf(name, sizeof(rte_eth_devices[port_id].data->name), + "%s", rte_eth_devices[port_id].data->name); ret = rte_eal_dev_detach(rte_eth_devices[port_id].device); if (ret < 0) @@ -501,7 +495,7 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) } int -rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) +rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id) { struct rte_eth_dev *dev; @@ -527,7 +521,7 @@ rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) } int -rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id) +rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id) { struct rte_eth_dev *dev; @@ -553,7 +547,7 @@ rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id) } int -rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id) +rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id) { struct rte_eth_dev *dev; @@ -579,7 +573,7 @@ rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id) } int -rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id) +rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id) { struct rte_eth_dev *dev; @@ -687,12 +681,102 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex) } } +/** + * A conversion function from rxmode bitfield API. + */ +static void +rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode, + uint64_t *rx_offloads) +{ + uint64_t offloads = 0; + + if (rxmode->header_split == 1) + offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT; + if (rxmode->hw_ip_checksum == 1) + offloads |= DEV_RX_OFFLOAD_CHECKSUM; + if (rxmode->hw_vlan_filter == 1) + offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; + if (rxmode->hw_vlan_strip == 1) + offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; + if (rxmode->hw_vlan_extend == 1) + offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND; + if (rxmode->jumbo_frame == 1) + offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + if (rxmode->hw_strip_crc == 1) + offloads |= DEV_RX_OFFLOAD_CRC_STRIP; + if (rxmode->enable_scatter == 1) + offloads |= DEV_RX_OFFLOAD_SCATTER; + if (rxmode->enable_lro == 1) + offloads |= DEV_RX_OFFLOAD_TCP_LRO; + if (rxmode->hw_timestamp == 1) + offloads |= DEV_RX_OFFLOAD_TIMESTAMP; + if (rxmode->security == 1) + offloads |= DEV_RX_OFFLOAD_SECURITY; + + *rx_offloads = offloads; +} + +/** + * A conversion function from rxmode offloads API. + */ +static void +rte_eth_convert_rx_offloads(const uint64_t rx_offloads, + struct rte_eth_rxmode *rxmode) +{ + + if (rx_offloads & DEV_RX_OFFLOAD_HEADER_SPLIT) + rxmode->header_split = 1; + else + rxmode->header_split = 0; + if (rx_offloads & DEV_RX_OFFLOAD_CHECKSUM) + rxmode->hw_ip_checksum = 1; + else + rxmode->hw_ip_checksum = 0; + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + rxmode->hw_vlan_filter = 1; + else + rxmode->hw_vlan_filter = 0; + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) + rxmode->hw_vlan_strip = 1; + else + rxmode->hw_vlan_strip = 0; + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) + rxmode->hw_vlan_extend = 1; + else + rxmode->hw_vlan_extend = 0; + if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) + rxmode->jumbo_frame = 1; + else + rxmode->jumbo_frame = 0; + if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP) + rxmode->hw_strip_crc = 1; + else + rxmode->hw_strip_crc = 0; + if (rx_offloads & DEV_RX_OFFLOAD_SCATTER) + rxmode->enable_scatter = 1; + else + rxmode->enable_scatter = 0; + if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) + rxmode->enable_lro = 1; + else + rxmode->enable_lro = 0; + if (rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) + rxmode->hw_timestamp = 1; + else + rxmode->hw_timestamp = 0; + if (rx_offloads & DEV_RX_OFFLOAD_SECURITY) + rxmode->security = 1; + else + rxmode->security = 0; +} + int -rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, +rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, const struct rte_eth_conf *dev_conf) { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + struct rte_eth_conf local_conf = *dev_conf; int diag; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -722,8 +806,20 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return -EBUSY; } + /* + * Convert between the offloads API to enable PMDs to support + * only one of them. + */ + if ((dev_conf->rxmode.ignore_offload_bitfield == 0)) { + rte_eth_convert_rx_offload_bitfield( + &dev_conf->rxmode, &local_conf.rxmode.offloads); + } else { + rte_eth_convert_rx_offloads(dev_conf->rxmode.offloads, + &local_conf.rxmode); + } + /* Copy the dev_conf parameter into the dev structure */ - memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf)); + memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf)); /* * Check that the numbers of RX and TX queues are not greater @@ -767,7 +863,7 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, * If jumbo frames are enabled, check that the maximum RX packet * length is supported by the configured device. */ - if (dev_conf->rxmode.jumbo_frame == 1) { + if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) { RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" @@ -819,6 +915,16 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return diag; } + /* Initialize Rx profiling if enabled at compilation time. */ + diag = __rte_eth_profile_rx_init(port_id, dev); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return diag; + } + return 0; } @@ -839,7 +945,7 @@ _rte_eth_dev_reset(struct rte_eth_dev *dev) } static void -rte_eth_dev_config_restore(uint8_t port_id) +rte_eth_dev_config_restore(uint16_t port_id) { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; @@ -894,7 +1000,7 @@ rte_eth_dev_config_restore(uint8_t port_id) } int -rte_eth_dev_start(uint8_t port_id) +rte_eth_dev_start(uint16_t port_id) { struct rte_eth_dev *dev; int diag; @@ -906,7 +1012,7 @@ rte_eth_dev_start(uint8_t port_id) RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); if (dev->data->dev_started != 0) { - RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8 + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16 " already started\n", port_id); return 0; @@ -928,7 +1034,7 @@ rte_eth_dev_start(uint8_t port_id) } void -rte_eth_dev_stop(uint8_t port_id) +rte_eth_dev_stop(uint16_t port_id) { struct rte_eth_dev *dev; @@ -938,7 +1044,7 @@ rte_eth_dev_stop(uint8_t port_id) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); if (dev->data->dev_started == 0) { - RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8 + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16 " already stopped\n", port_id); return; @@ -949,7 +1055,7 @@ rte_eth_dev_stop(uint8_t port_id) } int -rte_eth_dev_set_link_up(uint8_t port_id) +rte_eth_dev_set_link_up(uint16_t port_id) { struct rte_eth_dev *dev; @@ -962,7 +1068,7 @@ rte_eth_dev_set_link_up(uint8_t port_id) } int -rte_eth_dev_set_link_down(uint8_t port_id) +rte_eth_dev_set_link_down(uint16_t port_id) { struct rte_eth_dev *dev; @@ -975,7 +1081,7 @@ rte_eth_dev_set_link_down(uint8_t port_id) } void -rte_eth_dev_close(uint8_t port_id) +rte_eth_dev_close(uint16_t port_id) { struct rte_eth_dev *dev; @@ -995,7 +1101,24 @@ rte_eth_dev_close(uint8_t port_id) } int -rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, +rte_eth_dev_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_reset, -ENOTSUP); + + rte_eth_dev_stop(port_id); + ret = dev->dev_ops->dev_reset(dev); + + return ret; +} + +int +rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) @@ -1004,6 +1127,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, uint32_t mbp_buf_size; struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf local_conf; void **rxq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1074,8 +1198,18 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, if (rx_conf == NULL) rx_conf = &dev_info.default_rxconf; + local_conf = *rx_conf; + if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) { + /** + * Reflect port offloads to queue offloads in order for + * offloads to not be discarded. + */ + rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode, + &local_conf.offloads); + } + ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc, - socket_id, rx_conf, mp); + socket_id, &local_conf, mp); if (!ret) { if (!dev->data->min_rx_buf_size || dev->data->min_rx_buf_size > mbp_buf_size) @@ -1085,13 +1219,63 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, return ret; } +/** + * A conversion function from txq_flags API. + */ +static void +rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads) +{ + uint64_t offloads = 0; + + if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)) + offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; + if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL)) + offloads |= DEV_TX_OFFLOAD_VLAN_INSERT; + if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP)) + offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; + if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP)) + offloads |= DEV_TX_OFFLOAD_UDP_CKSUM; + if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP)) + offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; + if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) && + (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP)) + offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; + + *tx_offloads = offloads; +} + +/** + * A conversion function from offloads API. + */ +static void +rte_eth_convert_txq_offloads(const uint64_t tx_offloads, uint32_t *txq_flags) +{ + uint32_t flags = 0; + + if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)) + flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT)) + flags |= ETH_TXQ_FLAGS_NOVLANOFFL; + if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM)) + flags |= ETH_TXQ_FLAGS_NOXSUMSCTP; + if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) + flags |= ETH_TXQ_FLAGS_NOXSUMUDP; + if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) + flags |= ETH_TXQ_FLAGS_NOXSUMTCP; + if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + flags |= (ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP); + + *txq_flags = flags; +} + int -rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, +rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf) { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + struct rte_eth_txconf local_conf; void **txq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1136,8 +1320,23 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, if (tx_conf == NULL) tx_conf = &dev_info.default_txconf; + /* + * Convert between the offloads API to enable PMDs to support + * only one of them. + */ + local_conf = *tx_conf; + if (tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) { + rte_eth_convert_txq_offloads(tx_conf->offloads, + &local_conf.txq_flags); + /* Keep the ignore flag. */ + local_conf.txq_flags |= ETH_TXQ_FLAGS_IGNORE; + } else { + rte_eth_convert_txq_flags(tx_conf->txq_flags, + &local_conf.offloads); + } + return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc, - socket_id, tx_conf); + socket_id, &local_conf); } void @@ -1190,7 +1389,7 @@ rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size) } int -rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt) +rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; @@ -1204,7 +1403,7 @@ rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt) } void -rte_eth_promiscuous_enable(uint8_t port_id) +rte_eth_promiscuous_enable(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1217,7 +1416,7 @@ rte_eth_promiscuous_enable(uint8_t port_id) } void -rte_eth_promiscuous_disable(uint8_t port_id) +rte_eth_promiscuous_disable(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1230,7 +1429,7 @@ rte_eth_promiscuous_disable(uint8_t port_id) } int -rte_eth_promiscuous_get(uint8_t port_id) +rte_eth_promiscuous_get(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1241,7 +1440,7 @@ rte_eth_promiscuous_get(uint8_t port_id) } void -rte_eth_allmulticast_enable(uint8_t port_id) +rte_eth_allmulticast_enable(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1254,7 +1453,7 @@ rte_eth_allmulticast_enable(uint8_t port_id) } void -rte_eth_allmulticast_disable(uint8_t port_id) +rte_eth_allmulticast_disable(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1267,7 +1466,7 @@ rte_eth_allmulticast_disable(uint8_t port_id) } int -rte_eth_allmulticast_get(uint8_t port_id) +rte_eth_allmulticast_get(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1292,7 +1491,7 @@ rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev, } void -rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link) +rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link) { struct rte_eth_dev *dev; @@ -1309,7 +1508,7 @@ rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link) } void -rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link) +rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link) { struct rte_eth_dev *dev; @@ -1326,7 +1525,7 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link) } int -rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats) +rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats) { struct rte_eth_dev *dev; @@ -1337,25 +1536,42 @@ rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats) RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; - (*dev->dev_ops->stats_get)(dev, stats); - return 0; + return (*dev->dev_ops->stats_get)(dev, stats); } -void -rte_eth_stats_reset(uint8_t port_id) +int +rte_eth_stats_reset(uint16_t port_id) { struct rte_eth_dev *dev; - RTE_ETH_VALID_PORTID_OR_RET(port_id); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_reset, -ENOTSUP); (*dev->dev_ops->stats_reset)(dev); dev->data->rx_mbuf_alloc_failed = 0; + + return 0; +} + +static inline int +get_xstats_basic_count(struct rte_eth_dev *dev) +{ + uint16_t nb_rxqs, nb_txqs; + int count; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + count = RTE_NB_STATS; + count += nb_rxqs * RTE_NB_RXQ_STATS; + count += nb_txqs * RTE_NB_TXQ_STATS; + + return count; } static int -get_xstats_count(uint8_t port_id) +get_xstats_count(uint16_t port_id) { struct rte_eth_dev *dev; int count; @@ -1375,16 +1591,14 @@ get_xstats_count(uint8_t port_id) } else count = 0; - count += RTE_NB_STATS; - count += RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * - RTE_NB_RXQ_STATS; - count += RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * - RTE_NB_TXQ_STATS; + + count += get_xstats_basic_count(dev); + return count; } int -rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name, +rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, uint64_t *id) { int cnt_xstats, idx_xstat; @@ -1427,125 +1641,97 @@ rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name, return -EINVAL; } +/* retrieve ethdev extended statistics names */ int -rte_eth_xstats_get_names_by_id(uint8_t port_id, +rte_eth_xstats_get_names_by_id(uint16_t port_id, struct rte_eth_xstat_name *xstats_names, unsigned int size, uint64_t *ids) { - /* Get all xstats */ + struct rte_eth_xstat_name *xstats_names_copy; + unsigned int no_basic_stat_requested = 1; + unsigned int expected_entries; + struct rte_eth_dev *dev; + unsigned int i; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + ret = get_xstats_count(port_id); + if (ret < 0) + return ret; + expected_entries = (unsigned int)ret; + + /* Return max number of stats if no ids given */ if (!ids) { - struct rte_eth_dev *dev; - int cnt_used_entries; - int cnt_expected_entries; - int cnt_driver_entries; - uint32_t idx, id_queue; - uint16_t num_q; - - cnt_expected_entries = get_xstats_count(port_id); - if (xstats_names == NULL || cnt_expected_entries < 0 || - (int)size < cnt_expected_entries) - return cnt_expected_entries; - - /* port_id checked in get_xstats_count() */ - dev = &rte_eth_devices[port_id]; - cnt_used_entries = 0; - - for (idx = 0; idx < RTE_NB_STATS; idx++) { - snprintf(xstats_names[cnt_used_entries].name, - sizeof(xstats_names[0].name), - "%s", rte_stats_strings[idx].name); - cnt_used_entries++; - } - num_q = RTE_MIN(dev->data->nb_rx_queues, - RTE_ETHDEV_QUEUE_STAT_CNTRS); - for (id_queue = 0; id_queue < num_q; id_queue++) { - for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { - snprintf(xstats_names[cnt_used_entries].name, - sizeof(xstats_names[0].name), - "rx_q%u%s", - id_queue, - rte_rxq_stats_strings[idx].name); - cnt_used_entries++; - } + if (!xstats_names) + return expected_entries; + else if (xstats_names && size < expected_entries) + return expected_entries; + } - } - num_q = RTE_MIN(dev->data->nb_tx_queues, - RTE_ETHDEV_QUEUE_STAT_CNTRS); - for (id_queue = 0; id_queue < num_q; id_queue++) { - for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { - snprintf(xstats_names[cnt_used_entries].name, - sizeof(xstats_names[0].name), - "tx_q%u%s", - id_queue, - rte_txq_stats_strings[idx].name); - cnt_used_entries++; + if (ids && !xstats_names) + return -EINVAL; + + if (ids && dev->dev_ops->xstats_get_names_by_id != NULL && size > 0) { + unsigned int basic_count = get_xstats_basic_count(dev); + uint64_t ids_copy[size]; + + for (i = 0; i < size; i++) { + if (ids[i] < basic_count) { + no_basic_stat_requested = 0; + break; } - } - if (dev->dev_ops->xstats_get_names_by_id != NULL) { - /* If there are any driver-specific xstats, append them - * to end of list. + /* + * Convert ids to xstats ids that PMD knows. + * ids known by user are basic + extended stats. */ - cnt_driver_entries = - (*dev->dev_ops->xstats_get_names_by_id)( - dev, - xstats_names + cnt_used_entries, - NULL, - size - cnt_used_entries); - if (cnt_driver_entries < 0) - return cnt_driver_entries; - cnt_used_entries += cnt_driver_entries; - - } else if (dev->dev_ops->xstats_get_names != NULL) { - /* If there are any driver-specific xstats, append them - * to end of list. - */ - cnt_driver_entries = (*dev->dev_ops->xstats_get_names)( - dev, - xstats_names + cnt_used_entries, - size - cnt_used_entries); - if (cnt_driver_entries < 0) - return cnt_driver_entries; - cnt_used_entries += cnt_driver_entries; + ids_copy[i] = ids[i] - basic_count; } - return cnt_used_entries; + if (no_basic_stat_requested) + return (*dev->dev_ops->xstats_get_names_by_id)(dev, + xstats_names, ids_copy, size); } - /* Get only xstats given by IDS */ - else { - uint16_t len, i; - struct rte_eth_xstat_name *xstats_names_copy; - len = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL); + /* Retrieve all stats */ + if (!ids) { + int num_stats = rte_eth_xstats_get_names(port_id, xstats_names, + expected_entries); + if (num_stats < 0 || num_stats > (int)expected_entries) + return num_stats; + else + return expected_entries; + } - xstats_names_copy = - malloc(sizeof(struct rte_eth_xstat_name) * len); - if (!xstats_names_copy) { - RTE_PMD_DEBUG_TRACE( - "ERROR: can't allocate memory for values_copy\n"); - free(xstats_names_copy); - return -1; - } + xstats_names_copy = calloc(expected_entries, + sizeof(struct rte_eth_xstat_name)); - rte_eth_xstats_get_names_by_id(port_id, xstats_names_copy, - len, NULL); + if (!xstats_names_copy) { + RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory"); + return -ENOMEM; + } - for (i = 0; i < size; i++) { - if (ids[i] >= len) { - RTE_PMD_DEBUG_TRACE( - "ERROR: id value isn't valid\n"); - return -1; - } - strcpy(xstats_names[i].name, - xstats_names_copy[ids[i]].name); + /* Fill xstats_names_copy structure */ + rte_eth_xstats_get_names(port_id, xstats_names_copy, expected_entries); + + /* Filter stats */ + for (i = 0; i < size; i++) { + if (ids[i] >= expected_entries) { + RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n"); + free(xstats_names_copy); + return -1; } - free(xstats_names_copy); - return size; + xstats_names[i] = xstats_names_copy[ids[i]]; } + + free(xstats_names_copy); + return size; } int -rte_eth_xstats_get_names(uint8_t port_id, +rte_eth_xstats_get_names(uint16_t port_id, struct rte_eth_xstat_name *xstats_names, unsigned int size) { @@ -1611,133 +1797,80 @@ rte_eth_xstats_get_names(uint8_t port_id, /* retrieve ethdev extended statistics */ int -rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids, uint64_t *values, - unsigned int n) +rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size) { - /* If need all xstats */ - if (!ids) { - struct rte_eth_stats eth_stats; - struct rte_eth_dev *dev; - unsigned int count = 0, i, q; - signed int xcount = 0; - uint64_t val, *stats_ptr; - uint16_t nb_rxqs, nb_txqs; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, - RTE_ETHDEV_QUEUE_STAT_CNTRS); - nb_txqs = RTE_MIN(dev->data->nb_tx_queues, - RTE_ETHDEV_QUEUE_STAT_CNTRS); - - /* Return generic statistics */ - count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + - (nb_txqs * RTE_NB_TXQ_STATS); - - - /* implemented by the driver */ - if (dev->dev_ops->xstats_get_by_id != NULL) { - /* Retrieve the xstats from the driver at the end of the - * xstats struct. Retrieve all xstats. - */ - xcount = (*dev->dev_ops->xstats_get_by_id)(dev, - NULL, - values ? values + count : NULL, - (n > count) ? n - count : 0); - - if (xcount < 0) - return xcount; - /* implemented by the driver */ - } else if (dev->dev_ops->xstats_get != NULL) { - /* Retrieve the xstats from the driver at the end of the - * xstats struct. Retrieve all xstats. - * Compatibility for PMD without xstats_get_by_ids - */ - unsigned int size = (n > count) ? n - count : 1; - struct rte_eth_xstat xstats[size]; - - xcount = (*dev->dev_ops->xstats_get)(dev, - values ? xstats : NULL, size); - - if (xcount < 0) - return xcount; + unsigned int no_basic_stat_requested = 1; + unsigned int num_xstats_filled; + uint16_t expected_entries; + struct rte_eth_dev *dev; + unsigned int i; + int ret; - if (values != NULL) - for (i = 0 ; i < (unsigned int)xcount; i++) - values[i + count] = xstats[i].value; - } + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + expected_entries = get_xstats_count(port_id); + struct rte_eth_xstat xstats[expected_entries]; + dev = &rte_eth_devices[port_id]; - if (n < count + xcount || values == NULL) - return count + xcount; + /* Return max number of stats if no ids given */ + if (!ids) { + if (!values) + return expected_entries; + else if (values && size < expected_entries) + return expected_entries; + } - /* now fill the xstats structure */ - count = 0; - rte_eth_stats_get(port_id, ð_stats); + if (ids && !values) + return -EINVAL; - /* global stats */ - for (i = 0; i < RTE_NB_STATS; i++) { - stats_ptr = RTE_PTR_ADD(ð_stats, - rte_stats_strings[i].offset); - val = *stats_ptr; - values[count++] = val; - } + if (ids && dev->dev_ops->xstats_get_by_id != NULL && size) { + unsigned int basic_count = get_xstats_basic_count(dev); + uint64_t ids_copy[size]; - /* per-rxq stats */ - for (q = 0; q < nb_rxqs; q++) { - for (i = 0; i < RTE_NB_RXQ_STATS; i++) { - stats_ptr = RTE_PTR_ADD(ð_stats, - rte_rxq_stats_strings[i].offset + - q * sizeof(uint64_t)); - val = *stats_ptr; - values[count++] = val; + for (i = 0; i < size; i++) { + if (ids[i] < basic_count) { + no_basic_stat_requested = 0; + break; } - } - /* per-txq stats */ - for (q = 0; q < nb_txqs; q++) { - for (i = 0; i < RTE_NB_TXQ_STATS; i++) { - stats_ptr = RTE_PTR_ADD(ð_stats, - rte_txq_stats_strings[i].offset + - q * sizeof(uint64_t)); - val = *stats_ptr; - values[count++] = val; - } + /* + * Convert ids to xstats ids that PMD knows. + * ids known by user are basic + extended stats. + */ + ids_copy[i] = ids[i] - basic_count; } - return count + xcount; + if (no_basic_stat_requested) + return (*dev->dev_ops->xstats_get_by_id)(dev, ids_copy, + values, size); } - /* Need only xstats given by IDS array */ - else { - uint16_t i, size; - uint64_t *values_copy; - - size = rte_eth_xstats_get_by_id(port_id, NULL, NULL, 0); - values_copy = malloc(sizeof(*values_copy) * size); - if (!values_copy) { - RTE_PMD_DEBUG_TRACE( - "ERROR: can't allocate memory for values_copy\n"); - return -1; - } + /* Fill the xstats structure */ + ret = rte_eth_xstats_get(port_id, xstats, expected_entries); + if (ret < 0) + return ret; + num_xstats_filled = (unsigned int)ret; - rte_eth_xstats_get_by_id(port_id, NULL, values_copy, size); + /* Return all stats */ + if (!ids) { + for (i = 0; i < num_xstats_filled; i++) + values[i] = xstats[i].value; + return expected_entries; + } - for (i = 0; i < n; i++) { - if (ids[i] >= size) { - RTE_PMD_DEBUG_TRACE( - "ERROR: id value isn't valid\n"); - return -1; - } - values[i] = values_copy[ids[i]]; + /* Filter stats */ + for (i = 0; i < size; i++) { + if (ids[i] >= expected_entries) { + RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n"); + return -1; } - free(values_copy); - return n; + values[i] = xstats[ids[i]].value; } + return size; } int -rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, +rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, unsigned int n) { struct rte_eth_stats eth_stats; @@ -1819,7 +1952,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, /* reset ethdev extended statistics */ void -rte_eth_xstats_reset(uint8_t port_id) +rte_eth_xstats_reset(uint16_t port_id) { struct rte_eth_dev *dev; @@ -1837,7 +1970,7 @@ rte_eth_xstats_reset(uint8_t port_id) } static int -set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx, +set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx) { struct rte_eth_dev *dev; @@ -1853,7 +1986,7 @@ set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx, int -rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id, +rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id, uint8_t stat_idx) { return set_queue_stats_mapping(port_id, tx_queue_id, stat_idx, @@ -1862,7 +1995,7 @@ rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id, int -rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, +rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id, uint8_t stat_idx) { return set_queue_stats_mapping(port_id, rx_queue_id, stat_idx, @@ -1870,7 +2003,7 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, } int -rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size) +rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size) { struct rte_eth_dev *dev; @@ -1882,7 +2015,7 @@ rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size) } void -rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) +rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info) { struct rte_eth_dev *dev; const struct rte_eth_desc_lim lim = { @@ -1906,7 +2039,7 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) } int -rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask, +rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, uint32_t *ptypes, int num) { int i, j; @@ -1932,7 +2065,7 @@ rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask, } void -rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr) +rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr) { struct rte_eth_dev *dev; @@ -1943,7 +2076,7 @@ rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr) int -rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu) +rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu) { struct rte_eth_dev *dev; @@ -1955,7 +2088,7 @@ rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu) } int -rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu) +rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu) { int ret; struct rte_eth_dev *dev; @@ -1972,14 +2105,15 @@ rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu) } int -rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) +rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on) { struct rte_eth_dev *dev; int ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; - if (!(dev->data->dev_conf.rxmode.hw_vlan_filter)) { + if (!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER)) { RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id); return -ENOSYS; } @@ -2011,7 +2145,8 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) } int -rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int on) +rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, + int on) { struct rte_eth_dev *dev; @@ -2029,7 +2164,7 @@ rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int o } int -rte_eth_dev_set_vlan_ether_type(uint8_t port_id, +rte_eth_dev_set_vlan_ether_type(uint16_t port_id, enum rte_vlan_type vlan_type, uint16_t tpid) { @@ -2043,35 +2178,57 @@ rte_eth_dev_set_vlan_ether_type(uint8_t port_id, } int -rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask) +rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) { struct rte_eth_dev *dev; int ret = 0; int mask = 0; int cur, org = 0; + uint64_t orig_offloads; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; + /* save original values in case of failure */ + orig_offloads = dev->data->dev_conf.rxmode.offloads; + /*check which option changed by application*/ cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); - org = !!(dev->data->dev_conf.rxmode.hw_vlan_strip); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP); if (cur != org) { - dev->data->dev_conf.rxmode.hw_vlan_strip = (uint8_t)cur; + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_STRIP; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_STRIP; mask |= ETH_VLAN_STRIP_MASK; } cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); - org = !!(dev->data->dev_conf.rxmode.hw_vlan_filter); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER); if (cur != org) { - dev->data->dev_conf.rxmode.hw_vlan_filter = (uint8_t)cur; + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_FILTER; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_FILTER; mask |= ETH_VLAN_FILTER_MASK; } cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); - org = !!(dev->data->dev_conf.rxmode.hw_vlan_extend); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_EXTEND); if (cur != org) { - dev->data->dev_conf.rxmode.hw_vlan_extend = (uint8_t)cur; + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_EXTEND; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_EXTEND; mask |= ETH_VLAN_EXTEND_MASK; } @@ -2080,13 +2237,26 @@ rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask) return ret; RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); - (*dev->dev_ops->vlan_offload_set)(dev, mask); + + /* + * Convert to the offload bitfield API just in case the underlying PMD + * still supporting it. + */ + rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads, + &dev->data->dev_conf.rxmode); + ret = (*dev->dev_ops->vlan_offload_set)(dev, mask); + if (ret) { + /* hit an error restore original values */ + dev->data->dev_conf.rxmode.offloads = orig_offloads; + rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads, + &dev->data->dev_conf.rxmode); + } return ret; } int -rte_eth_dev_get_vlan_offload(uint8_t port_id) +rte_eth_dev_get_vlan_offload(uint16_t port_id) { struct rte_eth_dev *dev; int ret = 0; @@ -2094,20 +2264,23 @@ rte_eth_dev_get_vlan_offload(uint8_t port_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; - if (dev->data->dev_conf.rxmode.hw_vlan_strip) + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP) ret |= ETH_VLAN_STRIP_OFFLOAD; - if (dev->data->dev_conf.rxmode.hw_vlan_filter) + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) ret |= ETH_VLAN_FILTER_OFFLOAD; - if (dev->data->dev_conf.rxmode.hw_vlan_extend) + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_EXTEND) ret |= ETH_VLAN_EXTEND_OFFLOAD; return ret; } int -rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on) +rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on) { struct rte_eth_dev *dev; @@ -2120,7 +2293,7 @@ rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on) } int -rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) { struct rte_eth_dev *dev; @@ -2132,7 +2305,7 @@ rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) } int -rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) { struct rte_eth_dev *dev; @@ -2148,7 +2321,8 @@ rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) } int -rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, struct rte_eth_pfc_conf *pfc_conf) +rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, + struct rte_eth_pfc_conf *pfc_conf) { struct rte_eth_dev *dev; @@ -2214,7 +2388,7 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf, } int -rte_eth_dev_rss_reta_update(uint8_t port_id, +rte_eth_dev_rss_reta_update(uint16_t port_id, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) { @@ -2240,7 +2414,7 @@ rte_eth_dev_rss_reta_update(uint8_t port_id, } int -rte_eth_dev_rss_reta_query(uint8_t port_id, +rte_eth_dev_rss_reta_query(uint16_t port_id, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) { @@ -2260,26 +2434,19 @@ rte_eth_dev_rss_reta_query(uint8_t port_id, } int -rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf) +rte_eth_dev_rss_hash_update(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf) { struct rte_eth_dev *dev; - uint16_t rss_hash_protos; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - rss_hash_protos = rss_conf->rss_hf; - if ((rss_hash_protos != 0) && - ((rss_hash_protos & ETH_RSS_PROTO_MASK) == 0)) { - RTE_PMD_DEBUG_TRACE("Invalid rss_hash_protos=0x%x\n", - rss_hash_protos); - return -EINVAL; - } dev = &rte_eth_devices[port_id]; RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); return (*dev->dev_ops->rss_hash_update)(dev, rss_conf); } int -rte_eth_dev_rss_hash_conf_get(uint8_t port_id, +rte_eth_dev_rss_hash_conf_get(uint16_t port_id, struct rte_eth_rss_conf *rss_conf) { struct rte_eth_dev *dev; @@ -2291,7 +2458,7 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id, } int -rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, +rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, struct rte_eth_udp_tunnel *udp_tunnel) { struct rte_eth_dev *dev; @@ -2313,7 +2480,7 @@ rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, } int -rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id, +rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, struct rte_eth_udp_tunnel *udp_tunnel) { struct rte_eth_dev *dev; @@ -2336,7 +2503,7 @@ rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id, } int -rte_eth_led_on(uint8_t port_id) +rte_eth_led_on(uint16_t port_id) { struct rte_eth_dev *dev; @@ -2347,7 +2514,7 @@ rte_eth_led_on(uint8_t port_id) } int -rte_eth_led_off(uint8_t port_id) +rte_eth_led_off(uint16_t port_id) { struct rte_eth_dev *dev; @@ -2362,7 +2529,7 @@ rte_eth_led_off(uint8_t port_id) * an empty spot. */ static int -get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) +get_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) { struct rte_eth_dev_info dev_info; struct rte_eth_dev *dev = &rte_eth_devices[port_id]; @@ -2381,7 +2548,7 @@ get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) static const struct ether_addr null_mac_addr; int -rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr, +rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr, uint32_t pool) { struct rte_eth_dev *dev; @@ -2434,7 +2601,7 @@ rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr, } int -rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr) +rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr) { struct rte_eth_dev *dev; int index; @@ -2463,7 +2630,7 @@ rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr) } int -rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) +rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr) { struct rte_eth_dev *dev; @@ -2489,7 +2656,7 @@ rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) * an empty spot. */ static int -get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) +get_hash_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) { struct rte_eth_dev_info dev_info; struct rte_eth_dev *dev = &rte_eth_devices[port_id]; @@ -2508,7 +2675,7 @@ get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) } int -rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr, +rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, uint8_t on) { int index; @@ -2560,7 +2727,7 @@ rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr, } int -rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) +rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on) { struct rte_eth_dev *dev; @@ -2572,7 +2739,7 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) return (*dev->dev_ops->uc_all_hash_table_set)(dev, on); } -int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, +int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, uint16_t tx_rate) { struct rte_eth_dev *dev; @@ -2603,7 +2770,7 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, } int -rte_eth_mirror_rule_set(uint8_t port_id, +rte_eth_mirror_rule_set(uint16_t port_id, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on) { @@ -2641,7 +2808,7 @@ rte_eth_mirror_rule_set(uint8_t port_id, } int -rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) +rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id) { struct rte_eth_dev *dev; @@ -2654,7 +2821,7 @@ rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) } int -rte_eth_dev_callback_register(uint8_t port_id, +rte_eth_dev_callback_register(uint16_t port_id, enum rte_eth_event_type event, rte_eth_dev_cb_fn cb_fn, void *cb_arg) { @@ -2694,7 +2861,7 @@ rte_eth_dev_callback_register(uint8_t port_id, } int -rte_eth_dev_callback_unregister(uint8_t port_id, +rte_eth_dev_callback_unregister(uint16_t port_id, enum rte_eth_event_type event, rte_eth_dev_cb_fn cb_fn, void *cb_arg) { @@ -2766,7 +2933,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev, } int -rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data) +rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data) { uint32_t vec; struct rte_eth_dev *dev; @@ -2818,16 +2985,11 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, if (mz) return mz; - if (rte_xen_dom0_supported()) - return rte_memzone_reserve_bounded(z_name, size, socket_id, - 0, align, RTE_PGSIZE_2M); - else - return rte_memzone_reserve_aligned(z_name, size, socket_id, - 0, align); + return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align); } int -rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, +rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, int epfd, int op, void *data) { uint32_t vec; @@ -2867,7 +3029,7 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, } int -rte_eth_dev_rx_intr_enable(uint8_t port_id, +rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id) { struct rte_eth_dev *dev; @@ -2881,7 +3043,7 @@ rte_eth_dev_rx_intr_enable(uint8_t port_id, } int -rte_eth_dev_rx_intr_disable(uint8_t port_id, +rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id) { struct rte_eth_dev *dev; @@ -2896,7 +3058,8 @@ rte_eth_dev_rx_intr_disable(uint8_t port_id, int -rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type) +rte_eth_dev_filter_supported(uint16_t port_id, + enum rte_filter_type filter_type) { struct rte_eth_dev *dev; @@ -2909,7 +3072,7 @@ rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type) } int -rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, +rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, enum rte_filter_op filter_op, void *arg) { struct rte_eth_dev *dev; @@ -2922,7 +3085,7 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, } void * -rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, +rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, rte_rx_callback_fn fn, void *user_param) { #ifndef RTE_ETHDEV_RXTX_CALLBACKS @@ -2964,7 +3127,7 @@ rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, } void * -rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id, +rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, rte_rx_callback_fn fn, void *user_param) { #ifndef RTE_ETHDEV_RXTX_CALLBACKS @@ -2999,7 +3162,7 @@ rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id, } void * -rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, +rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, rte_tx_callback_fn fn, void *user_param) { #ifndef RTE_ETHDEV_RXTX_CALLBACKS @@ -3042,7 +3205,7 @@ rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, } int -rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, +rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, struct rte_eth_rxtx_callback *user_cb) { #ifndef RTE_ETHDEV_RXTX_CALLBACKS @@ -3076,7 +3239,7 @@ rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, } int -rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, +rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, struct rte_eth_rxtx_callback *user_cb) { #ifndef RTE_ETHDEV_RXTX_CALLBACKS @@ -3110,7 +3273,7 @@ rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, } int -rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, +rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, struct rte_eth_rxq_info *qinfo) { struct rte_eth_dev *dev; @@ -3134,7 +3297,7 @@ rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, } int -rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, +rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, struct rte_eth_txq_info *qinfo) { struct rte_eth_dev *dev; @@ -3158,7 +3321,7 @@ rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, } int -rte_eth_dev_set_mc_addr_list(uint8_t port_id, +rte_eth_dev_set_mc_addr_list(uint16_t port_id, struct ether_addr *mc_addr_set, uint32_t nb_mc_addr) { @@ -3172,7 +3335,7 @@ rte_eth_dev_set_mc_addr_list(uint8_t port_id, } int -rte_eth_timesync_enable(uint8_t port_id) +rte_eth_timesync_enable(uint16_t port_id) { struct rte_eth_dev *dev; @@ -3184,7 +3347,7 @@ rte_eth_timesync_enable(uint8_t port_id) } int -rte_eth_timesync_disable(uint8_t port_id) +rte_eth_timesync_disable(uint16_t port_id) { struct rte_eth_dev *dev; @@ -3196,7 +3359,7 @@ rte_eth_timesync_disable(uint8_t port_id) } int -rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp, +rte_eth_timesync_read_rx_timestamp(uint16_t port_id, struct timespec *timestamp, uint32_t flags) { struct rte_eth_dev *dev; @@ -3209,7 +3372,8 @@ rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp, } int -rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp) +rte_eth_timesync_read_tx_timestamp(uint16_t port_id, + struct timespec *timestamp) { struct rte_eth_dev *dev; @@ -3221,7 +3385,7 @@ rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp) } int -rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta) +rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta) { struct rte_eth_dev *dev; @@ -3233,7 +3397,7 @@ rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta) } int -rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp) +rte_eth_timesync_read_time(uint16_t port_id, struct timespec *timestamp) { struct rte_eth_dev *dev; @@ -3245,7 +3409,7 @@ rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp) } int -rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp) +rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *timestamp) { struct rte_eth_dev *dev; @@ -3257,7 +3421,7 @@ rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp) } int -rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info) +rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) { struct rte_eth_dev *dev; @@ -3269,7 +3433,7 @@ rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info) } int -rte_eth_dev_get_eeprom_length(uint8_t port_id) +rte_eth_dev_get_eeprom_length(uint16_t port_id) { struct rte_eth_dev *dev; @@ -3281,7 +3445,7 @@ rte_eth_dev_get_eeprom_length(uint8_t port_id) } int -rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) +rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) { struct rte_eth_dev *dev; @@ -3293,7 +3457,7 @@ rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) } int -rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) +rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) { struct rte_eth_dev *dev; @@ -3305,7 +3469,7 @@ rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) } int -rte_eth_dev_get_dcb_info(uint8_t port_id, +rte_eth_dev_get_dcb_info(uint16_t port_id, struct rte_eth_dcb_info *dcb_info) { struct rte_eth_dev *dev; @@ -3320,7 +3484,7 @@ rte_eth_dev_get_dcb_info(uint8_t port_id, } int -rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, +rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, struct rte_eth_l2_tunnel_conf *l2_tunnel) { struct rte_eth_dev *dev; @@ -3343,7 +3507,7 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, } int -rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, +rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, struct rte_eth_l2_tunnel_conf *l2_tunnel, uint32_t mask, uint8_t en) @@ -3387,7 +3551,7 @@ rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc, } int -rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id, +rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, uint16_t *nb_rx_desc, uint16_t *nb_tx_desc) { @@ -3409,3 +3573,21 @@ rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id, return 0; } + +int +rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (pool == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + + if (*dev->dev_ops->pool_ops_supported == NULL) + return 1; /* all pools are supported */ + + return (*dev->dev_ops->pool_ops_supported)(dev, pool); +} diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 0adf3274..18e474db 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -180,6 +180,8 @@ extern "C" { #include <rte_dev.h> #include <rte_devargs.h> #include <rte_errno.h> +#include <rte_common.h> + #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -348,7 +350,18 @@ struct rte_eth_rxmode { enum rte_eth_rx_mq_mode mq_mode; uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + /** + * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags. + * Only offloads set on rx_offload_capa field on rte_eth_dev_info + * structure are allowed to be set. + */ + uint64_t offloads; __extension__ + /** + * Below bitfield API is obsolete. Application should + * enable per-port offloads using the offload field + * above. + */ uint16_t header_split : 1, /**< Header Split enable. */ hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ hw_vlan_filter : 1, /**< VLAN filter enable. */ @@ -357,7 +370,19 @@ struct rte_eth_rxmode { jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */ hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */ enable_scatter : 1, /**< Enable scatter packets rx handler */ - enable_lro : 1; /**< Enable LRO */ + enable_lro : 1, /**< Enable LRO */ + hw_timestamp : 1, /**< Enable HW timestamp */ + security : 1, /**< Enable rte_security offloads */ + /** + * When set the offload bitfield should be ignored. + * Instead per-port Rx offloads should be set on offloads + * field above. + * Per-queue offloads shuold be set on rte_eth_rxq_conf + * structure. + * This bit is temporary till rxmode bitfield offloads API will + * be deprecated. + */ + ignore_offload_bitfield : 1; }; /** @@ -671,6 +696,12 @@ struct rte_eth_vmdq_rx_conf { */ struct rte_eth_txmode { enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */ + /** + * Per-port Tx offloads to be set using DEV_TX_OFFLOAD_* flags. + * Only offloads set on tx_offload_capa field on rte_eth_dev_info + * structure are allowed to be set. + */ + uint64_t offloads; /* For i40e specifically */ uint16_t pvid; @@ -691,6 +722,12 @@ struct rte_eth_rxconf { uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */ uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */ uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ + /** + * Per-queue Rx offloads to be set using DEV_RX_OFFLOAD_* flags. + * Only offloads set on rx_queue_offload_capa or rx_offload_capa + * fields on rte_eth_dev_info structure are allowed to be set. + */ + uint64_t offloads; }; #define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */ @@ -707,6 +744,15 @@ struct rte_eth_rxconf { (ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \ ETH_TXQ_FLAGS_NOXSUMTCP) /** + * When set the txq_flags should be ignored, + * instead per-queue Tx offloads will be set on offloads field + * located on rte_eth_txq_conf struct. + * This flag is temporary till the rte_eth_txq_conf.txq_flags + * API will be deprecated. + */ +#define ETH_TXQ_FLAGS_IGNORE 0x8000 + +/** * A structure used to configure a TX ring of an Ethernet port. */ struct rte_eth_txconf { @@ -717,6 +763,12 @@ struct rte_eth_txconf { uint32_t txq_flags; /**< Set flags for the Tx queue */ uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ + /** + * Per-queue Tx offloads to be set using DEV_TX_OFFLOAD_* flags. + * Only offloads set on tx_queue_offload_capa or tx_offload_capa + * fields on rte_eth_dev_info structure are allowed to be set. + */ + uint64_t offloads; }; /** @@ -874,7 +926,7 @@ struct rte_eth_conf { /**< Port dcb RX configuration. */ struct rte_eth_vmdq_rx_conf vmdq_rx_conf; /**< Port vmdq RX configuration. */ - } rx_adv_conf; /**< Port RX filtering configuration (union). */ + } rx_adv_conf; /**< Port RX filtering configuration. */ union { struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf; /**< Port vmdq+dcb TX configuration. */ @@ -907,6 +959,20 @@ struct rte_eth_conf { #define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 #define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 #define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 +#define DEV_RX_OFFLOAD_HEADER_SPLIT 0x00000100 +#define DEV_RX_OFFLOAD_VLAN_FILTER 0x00000200 +#define DEV_RX_OFFLOAD_VLAN_EXTEND 0x00000400 +#define DEV_RX_OFFLOAD_JUMBO_FRAME 0x00000800 +#define DEV_RX_OFFLOAD_CRC_STRIP 0x00001000 +#define DEV_RX_OFFLOAD_SCATTER 0x00002000 +#define DEV_RX_OFFLOAD_TIMESTAMP 0x00004000 +#define DEV_RX_OFFLOAD_SECURITY 0x00008000 +#define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM) +#define DEV_RX_OFFLOAD_VLAN (DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_VLAN_FILTER | \ + DEV_RX_OFFLOAD_VLAN_EXTEND) /** * TX offload capabilities of a device. @@ -929,6 +995,14 @@ struct rte_eth_conf { /**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same * tx queue without SW lock. */ +#define DEV_TX_OFFLOAD_MULTI_SEGS 0x00008000 +/**< Device supports multi segment send. */ +#define DEV_TX_OFFLOAD_MBUF_FAST_FREE 0x00010000 +/**< Device supports optimization for fast release of mbufs. + * When set application must guarantee that per-queue all mbufs comes from + * the same mempool and has refcnt = 1. + */ +#define DEV_TX_OFFLOAD_SECURITY 0x00020000 struct rte_pci_device; @@ -949,8 +1023,14 @@ struct rte_eth_dev_info { /** Maximum number of hash MAC addresses for MTA and UTA. */ uint16_t max_vfs; /**< Maximum number of VFs. */ uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ - uint32_t rx_offload_capa; /**< Device RX offload capabilities. */ - uint32_t tx_offload_capa; /**< Device TX offload capabilities. */ + uint64_t rx_offload_capa; + /**< Device per port RX offload capabilities. */ + uint64_t tx_offload_capa; + /**< Device per port TX offload capabilities. */ + uint64_t rx_queue_offload_capa; + /**< Device per queue RX offload capabilities. */ + uint64_t tx_queue_offload_capa; + /**< Device per queue TX offload capabilities. */ uint16_t reta_size; /**< Device redirection table size, the total number of entries. */ uint8_t hash_key_size; /**< Hash key size in bytes */ @@ -1076,8 +1156,6 @@ TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); } \ } while (0) -#define RTE_ETH_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) - /** * l2 tunnel configuration. */ @@ -1115,6 +1193,9 @@ typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev); typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev); /**< @internal Function used to close a configured Ethernet device. */ +typedef int (*eth_dev_reset_t)(struct rte_eth_dev *dev); +/** <@internal Function used to reset a configured Ethernet device. */ + typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); /**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */ @@ -1131,7 +1212,7 @@ typedef int (*eth_link_update_t)(struct rte_eth_dev *dev, int wait_to_complete); /**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */ -typedef void (*eth_stats_get_t)(struct rte_eth_dev *dev, +typedef int (*eth_stats_get_t)(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats); /**< @internal Get global I/O statistics of an Ethernet device. */ @@ -1245,7 +1326,7 @@ typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev, enum rte_vlan_type type, uint16_t tpid); /**< @internal set the outer/inner VLAN-TPID by an Ethernet device. */ -typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); +typedef int (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); /**< @internal set VLAN offload function by an Ethernet device. */ typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev, @@ -1421,10 +1502,17 @@ typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops); /**< @internal Get Traffic Management (TM) operations on an Ethernet device */ +typedef int (*eth_mtr_ops_get_t)(struct rte_eth_dev *dev, void *ops); +/**< @internal Get Trafffic Metering and Policing (MTR) operations */ + typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, struct rte_eth_dcb_info *dcb_info); /**< @internal Get dcb information on an Ethernet device */ +typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev, + const char *pool); +/**< @internal Test if a port supports specific mempool ops */ + /** * @internal A structure containing the functions exported by an Ethernet driver. */ @@ -1435,6 +1523,7 @@ struct eth_dev_ops { eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ eth_dev_close_t dev_close; /**< Close device. */ + eth_dev_reset_t dev_reset; /**< Reset device. */ eth_link_update_t link_update; /**< Get device link state. */ eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ @@ -1544,6 +1633,12 @@ struct eth_dev_ops { eth_tm_ops_get_t tm_ops_get; /**< Get Traffic Management (TM) operations. */ + + eth_mtr_ops_get_t mtr_ops_get; + /**< Get Traffic Metering and Policing (MTR) operations. */ + + eth_pool_ops_supported_t pool_ops_supported; + /**< Test if a port supports specific mempool ops */ }; /** @@ -1568,7 +1663,7 @@ struct eth_dev_ops { * @return * The number of packets returned to the user. */ -typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue, +typedef uint16_t (*rte_rx_callback_fn)(uint16_t port, uint16_t queue, struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts, void *user_param); @@ -1592,7 +1687,7 @@ typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue, * @return * The number of packets to be written to the NIC. */ -typedef uint16_t (*rte_tx_callback_fn)(uint8_t port, uint16_t queue, +typedef uint16_t (*rte_tx_callback_fn)(uint16_t port, uint16_t queue, struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param); /** @@ -1649,8 +1744,12 @@ struct rte_eth_dev { */ struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; enum rte_eth_dev_state state; /**< Flag indicating the port state */ + void *security_ctx; /**< Context for security ops */ } __rte_cache_aligned; +void * +rte_eth_dev_get_sec_ctx(uint8_t port_id); + struct rte_eth_dev_sriov { uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ uint8_t nb_q_per_pool; /**< rx queue number per pool */ @@ -1695,7 +1794,7 @@ struct rte_eth_dev_data { /** bitmap array of associating Ethernet MAC addresses to pools */ struct ether_addr* hash_mac_addrs; /** Device Ethernet MAC addresses of hash filtering. */ - uint8_t port_id; /**< Device [external] port identifier. */ + uint16_t port_id; /**< Device [external] port identifier. */ __extension__ uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ @@ -1713,8 +1812,6 @@ struct rte_eth_dev_data { /**< VLAN filter configuration. */ }; -/** Device supports hotplug detach */ -#define RTE_ETH_DEV_DETACHABLE 0x0001 /** Device supports link state interrupt */ #define RTE_ETH_DEV_INTR_LSC 0x0002 /** Device is a bonded slave */ @@ -1737,7 +1834,7 @@ extern struct rte_eth_dev rte_eth_devices[]; * @return * Next valid port id, RTE_MAX_ETHPORTS if there is none. */ -uint8_t rte_eth_find_next(uint8_t port_id); +uint16_t rte_eth_find_next(uint16_t port_id); /** * Macro to iterate over all enabled ethdev ports. @@ -1760,7 +1857,7 @@ uint8_t rte_eth_find_next(uint8_t port_id); * @return * - The total number of usable Ethernet devices. */ -uint8_t rte_eth_dev_count(void); +uint16_t rte_eth_dev_count(void); /** * @internal @@ -1821,7 +1918,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); * @return * 0 on success and port_id is filled, negative on error */ -int rte_eth_dev_attach(const char *devargs, uint8_t *port_id); +int rte_eth_dev_attach(const char *devargs, uint16_t *port_id); /** * Detach a Ethernet device specified by port identifier. @@ -1836,7 +1933,7 @@ int rte_eth_dev_attach(const char *devargs, uint8_t *port_id); * @return * 0 on success and devname is filled, negative on error */ -int rte_eth_dev_detach(uint8_t port_id, char *devname); +int rte_eth_dev_detach(uint16_t port_id, char *devname); /** * Convert a numerical speed in Mbps to a bitmap flag that can be used in @@ -1870,6 +1967,9 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); * each statically configurable offload hardware feature provided by * Ethernet devices, such as IP checksum or VLAN tag stripping for * example. + * The Rx offload bitfield API is obsolete and will be deprecated. + * Applications should set the ignore_bitfield_offloads bit on *rxmode* + * structure and use offloads field to set per-port offloads instead. * - the Receive Side Scaling (RSS) configuration when using multiple RX * queues per port. * @@ -1880,7 +1980,7 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); * - 0: Success, device configured. * - <0: Error code returned by the driver configuration function. */ -int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue, +int rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_queue, uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); /** @@ -1923,6 +2023,8 @@ void _rte_eth_dev_reset(struct rte_eth_dev *dev); * The *rx_conf* structure contains an *rx_thresh* structure with the values * of the Prefetch, Host, and Write-Back threshold registers of the receive * ring. + * In addition it contains the hardware offloads features to activate using + * the DEV_RX_OFFLOAD_* flags. * @param mb_pool * The pointer to the memory pool from which to allocate *rte_mbuf* network * memory buffers to populate each descriptor of the receive ring. @@ -1935,7 +2037,7 @@ void _rte_eth_dev_reset(struct rte_eth_dev *dev); * allocate network memory buffers from the memory pool when * initializing receive descriptors. */ -int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, +int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool); @@ -1976,6 +2078,11 @@ int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, * - The *txq_flags* member contains flags to pass to the TX queue setup * function to configure the behavior of the TX queue. This should be set * to 0 if no special configuration is required. + * This API is obsolete and will be deprecated. Applications + * should set it to ETH_TXQ_FLAGS_IGNORE and use + * the offloads field below. + * - The *offloads* member contains Tx offloads to be enabled. + * Offloads which are not set cannot be used on the datapath. * * Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces * the transmit function to use default values. @@ -1983,7 +2090,7 @@ int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, * - 0: Success, the transmit queue is correctly set up. * - -ENOMEM: Unable to allocate the transmit ring descriptors. */ -int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, +int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); @@ -1997,7 +2104,7 @@ int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, * a default of zero if the socket could not be determined. * -1 is returned is the port_id value is out of range. */ -int rte_eth_dev_socket_id(uint8_t port_id); +int rte_eth_dev_socket_id(uint16_t port_id); /** * Check if port_id of device is attached @@ -2008,7 +2115,7 @@ int rte_eth_dev_socket_id(uint8_t port_id); * - 0 if port is out of range or not attached * - 1 if device is attached */ -int rte_eth_dev_is_valid_port(uint8_t port_id); +int rte_eth_dev_is_valid_port(uint16_t port_id); /** * Start specified RX queue of a port. It is used when rx_deferred_start @@ -2025,7 +2132,7 @@ int rte_eth_dev_is_valid_port(uint8_t port_id); * - -EINVAL: The port_id or the queue_id out of range. * - -ENOTSUP: The function not supported in PMD driver. */ -int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id); +int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id); /** * Stop specified RX queue of a port @@ -2041,7 +2148,7 @@ int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id); * - -EINVAL: The port_id or the queue_id out of range. * - -ENOTSUP: The function not supported in PMD driver. */ -int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id); +int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id); /** * Start TX for specified queue of a port. It is used when tx_deferred_start @@ -2058,7 +2165,7 @@ int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id); * - -EINVAL: The port_id or the queue_id out of range. * - -ENOTSUP: The function not supported in PMD driver. */ -int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id); +int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id); /** * Stop specified TX queue of a port @@ -2074,7 +2181,7 @@ int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id); * - -EINVAL: The port_id or the queue_id out of range. * - -ENOTSUP: The function not supported in PMD driver. */ -int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id); +int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id); @@ -2093,7 +2200,7 @@ int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id); * - 0: Success, Ethernet device started. * - <0: Error code of the driver device start function. */ -int rte_eth_dev_start(uint8_t port_id); +int rte_eth_dev_start(uint16_t port_id); /** * Stop an Ethernet device. The device can be restarted with a call to @@ -2102,7 +2209,7 @@ int rte_eth_dev_start(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_dev_stop(uint8_t port_id); +void rte_eth_dev_stop(uint16_t port_id); /** @@ -2117,7 +2224,7 @@ void rte_eth_dev_stop(uint8_t port_id); * - 0: Success, Ethernet device linked up. * - <0: Error code of the driver device link up function. */ -int rte_eth_dev_set_link_up(uint8_t port_id); +int rte_eth_dev_set_link_up(uint16_t port_id); /** * Link down an Ethernet device. @@ -2128,7 +2235,7 @@ int rte_eth_dev_set_link_up(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -int rte_eth_dev_set_link_down(uint8_t port_id); +int rte_eth_dev_set_link_down(uint16_t port_id); /** * Close a stopped Ethernet device. The device cannot be restarted! @@ -2138,7 +2245,46 @@ int rte_eth_dev_set_link_down(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_dev_close(uint8_t port_id); +void rte_eth_dev_close(uint16_t port_id); + +/** + * Reset a Ethernet device and keep its port id. + * + * When a port has to be reset passively, the DPDK application can invoke + * this function. For example when a PF is reset, all its VFs should also + * be reset. Normally a DPDK application can invoke this function when + * RTE_ETH_EVENT_INTR_RESET event is detected, but can also use it to start + * a port reset in other circumstances. + * + * When this function is called, it first stops the port and then calls the + * PMD specific dev_uninit( ) and dev_init( ) to return the port to initial + * state, in which no Tx and Rx queues are setup, as if the port has been + * reset and not started. The port keeps the port id it had before the + * function call. + * + * After calling rte_eth_dev_reset( ), the application should use + * rte_eth_dev_configure( ), rte_eth_rx_queue_setup( ), + * rte_eth_tx_queue_setup( ), and rte_eth_dev_start( ) + * to reconfigure the device as appropriate. + * + * Note: To avoid unexpected behavior, the application should stop calling + * Tx and Rx functions before calling rte_eth_dev_reset( ). For thread + * safety, all these controlling functions should be called from the same + * thread. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - (0) if successful. + * - (-EINVAL) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support this function. + * - (-EPERM) if not ran from the primary process. + * - (-EIO) if re-initialisation failed. + * - (-ENOMEM) if the reset failed due to OOM. + * - (-EAGAIN) if the reset temporarily failed and should be retried later. + */ +int rte_eth_dev_reset(uint16_t port_id); /** * Enable receipt in promiscuous mode for an Ethernet device. @@ -2146,7 +2292,7 @@ void rte_eth_dev_close(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_promiscuous_enable(uint8_t port_id); +void rte_eth_promiscuous_enable(uint16_t port_id); /** * Disable receipt in promiscuous mode for an Ethernet device. @@ -2154,7 +2300,7 @@ void rte_eth_promiscuous_enable(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_promiscuous_disable(uint8_t port_id); +void rte_eth_promiscuous_disable(uint16_t port_id); /** * Return the value of promiscuous mode for an Ethernet device. @@ -2166,7 +2312,7 @@ void rte_eth_promiscuous_disable(uint8_t port_id); * - (0) if promiscuous is disabled. * - (-1) on error */ -int rte_eth_promiscuous_get(uint8_t port_id); +int rte_eth_promiscuous_get(uint16_t port_id); /** * Enable the receipt of any multicast frame by an Ethernet device. @@ -2174,7 +2320,7 @@ int rte_eth_promiscuous_get(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_allmulticast_enable(uint8_t port_id); +void rte_eth_allmulticast_enable(uint16_t port_id); /** * Disable the receipt of all multicast frames by an Ethernet device. @@ -2182,7 +2328,7 @@ void rte_eth_allmulticast_enable(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_allmulticast_disable(uint8_t port_id); +void rte_eth_allmulticast_disable(uint16_t port_id); /** * Return the value of allmulticast mode for an Ethernet device. @@ -2194,7 +2340,7 @@ void rte_eth_allmulticast_disable(uint8_t port_id); * - (0) if allmulticast is disabled. * - (-1) on error */ -int rte_eth_allmulticast_get(uint8_t port_id); +int rte_eth_allmulticast_get(uint16_t port_id); /** * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX @@ -2207,7 +2353,7 @@ int rte_eth_allmulticast_get(uint8_t port_id); * A pointer to an *rte_eth_link* structure to be filled with * the status, the speed and the mode of the Ethernet device link. */ -void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link); +void rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); /** * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX @@ -2220,7 +2366,7 @@ void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link); * A pointer to an *rte_eth_link* structure to be filled with * the status, the speed and the mode of the Ethernet device link. */ -void rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *link); +void rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link); /** * Retrieve the general I/O statistics of an Ethernet device. @@ -2239,15 +2385,19 @@ void rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *link); * @return * Zero if successful. Non-zero otherwise. */ -int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats); +int rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats); /** * Reset the general I/O statistics of an Ethernet device. * * @param port_id * The port identifier of the Ethernet device. + * @return + * - (0) if device notified to reset stats. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. */ -void rte_eth_stats_reset(uint8_t port_id); +int rte_eth_stats_reset(uint16_t port_id); /** * Retrieve names of extended statistics of an Ethernet device. @@ -2269,7 +2419,7 @@ void rte_eth_stats_reset(uint8_t port_id); * shall not be used by the caller. * - A negative value on error (invalid port id). */ -int rte_eth_xstats_get_names(uint8_t port_id, +int rte_eth_xstats_get_names(uint16_t port_id, struct rte_eth_xstat_name *xstats_names, unsigned int size); @@ -2295,7 +2445,7 @@ int rte_eth_xstats_get_names(uint8_t port_id, * shall not be used by the caller. * - A negative value on error (invalid port id). */ -int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, +int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, unsigned int n); /** @@ -2321,7 +2471,7 @@ int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, * - A negative value on error (invalid port id). */ int -rte_eth_xstats_get_names_by_id(uint8_t port_id, +rte_eth_xstats_get_names_by_id(uint16_t port_id, struct rte_eth_xstat_name *xstats_names, unsigned int size, uint64_t *ids); @@ -2333,23 +2483,23 @@ rte_eth_xstats_get_names_by_id(uint8_t port_id, * @param ids * A pointer to an ids array passed by application. This tells which * statistics values function should retrieve. This parameter - * can be set to NULL if n is 0. In this case function will retrieve + * can be set to NULL if size is 0. In this case function will retrieve * all avalible statistics. * @param values * A pointer to a table to be filled with device statistics values. - * @param n + * @param size * The size of the ids array (number of elements). * @return - * - A positive value lower or equal to n: success. The return value + * - A positive value lower or equal to size: success. The return value * is the number of entries filled in the stats table. - * - A positive value higher than n: error, the given statistics table + * - A positive value higher than size: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. * - A negative value on error (invalid port id). */ -int rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids, - uint64_t *values, unsigned int n); +int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size); /** * Gets the ID of a statistic from its name. @@ -2368,7 +2518,7 @@ int rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids, * -ENODEV for invalid port_id, * -EINVAL if the xstat_name doesn't exist in port_id */ -int rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name, +int rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, uint64_t *id); /** @@ -2377,7 +2527,7 @@ int rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name, * @param port_id * The port identifier of the Ethernet device. */ -void rte_eth_xstats_reset(uint8_t port_id); +void rte_eth_xstats_reset(uint16_t port_id); /** * Set a mapping for the specified transmit queue to the specified per-queue @@ -2396,7 +2546,7 @@ void rte_eth_xstats_reset(uint8_t port_id); * @return * Zero if successful. Non-zero otherwise. */ -int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, +int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id, uint8_t stat_idx); /** @@ -2416,7 +2566,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, * @return * Zero if successful. Non-zero otherwise. */ -int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, +int rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id, uint8_t stat_idx); @@ -2429,7 +2579,7 @@ int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, * A pointer to a structure of type *ether_addr* to be filled with * the Ethernet address of the Ethernet device. */ -void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); +void rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr); /** * Retrieve the contextual information of an Ethernet device. @@ -2440,7 +2590,7 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); * A pointer to a structure of type *rte_eth_dev_info* to be filled with * the contextual information of the Ethernet device. */ -void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); +void rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info); /** * Retrieve the firmware version of a device. @@ -2460,7 +2610,7 @@ void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); * - (>0) if *fw_size* is not enough to store firmware version, return * the size of the non truncated string. */ -int rte_eth_dev_fw_version_get(uint8_t port_id, +int rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size); /** @@ -2501,7 +2651,7 @@ int rte_eth_dev_fw_version_get(uint8_t port_id, * count of supported ptypes will be returned. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask, +int rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, uint32_t *ptypes, int num); /** @@ -2515,7 +2665,7 @@ int rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask, * - (0) if successful. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu); +int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu); /** * Change the MTU of an Ethernet device. @@ -2531,7 +2681,7 @@ int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu); * - (-EINVAL) if *mtu* invalid. * - (-EBUSY) if operation is not allowed when the port is running */ -int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu); +int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu); /** * Enable/Disable hardware filtering by an Ethernet device of received @@ -2551,7 +2701,7 @@ int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu); * - (-ENOSYS) if VLAN filtering on *port_id* disabled. * - (-EINVAL) if *vlan_id* > 4095. */ -int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on); +int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on); /** * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device. @@ -2572,7 +2722,7 @@ int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on); * - (-ENODEV) if *port_id* invalid. * - (-EINVAL) if *rx_queue_id* invalid. */ -int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, +int rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, int on); /** @@ -2591,7 +2741,7 @@ int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, * - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_set_vlan_ether_type(uint8_t port_id, +int rte_eth_dev_set_vlan_ether_type(uint16_t port_id, enum rte_vlan_type vlan_type, uint16_t tag_type); @@ -2615,7 +2765,7 @@ int rte_eth_dev_set_vlan_ether_type(uint8_t port_id, * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask); +int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask); /** * Read VLAN Offload configuration from an Ethernet device @@ -2629,7 +2779,7 @@ int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask); * ETH_VLAN_EXTEND_OFFLOAD * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_get_vlan_offload(uint8_t port_id); +int rte_eth_dev_get_vlan_offload(uint16_t port_id); /** * Set port based TX VLAN insertion on or off. @@ -2645,7 +2795,7 @@ int rte_eth_dev_get_vlan_offload(uint8_t port_id); * - (0) if successful. * - negative if failed. */ -int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on); +int rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on); /** * @@ -2730,7 +2880,7 @@ int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on); * *rx_pkts* array. */ static inline uint16_t -rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, +rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; @@ -2775,7 +2925,7 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, * (-ENOTSUP) if the device does not support this function */ static inline int -rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id) +rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id) { struct rte_eth_dev *dev; @@ -2804,7 +2954,7 @@ rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id) * - (-ENOTSUP) if the device does not support this function */ static inline int -rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset) +rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -2851,7 +3001,7 @@ rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset) * - (-ENODEV) bad port or queue (only if compiled with debug). */ static inline int -rte_eth_rx_descriptor_status(uint8_t port_id, uint16_t queue_id, +rte_eth_rx_descriptor_status(uint16_t port_id, uint16_t queue_id, uint16_t offset) { struct rte_eth_dev *dev; @@ -2908,7 +3058,7 @@ rte_eth_rx_descriptor_status(uint8_t port_id, uint16_t queue_id, * - (-ENOTSUP) if the device does not support this function. * - (-ENODEV) bad port or queue (only if compiled with debug). */ -static inline int rte_eth_tx_descriptor_status(uint8_t port_id, +static inline int rte_eth_tx_descriptor_status(uint16_t port_id, uint16_t queue_id, uint16_t offset) { struct rte_eth_dev *dev; @@ -2992,7 +3142,7 @@ static inline int rte_eth_tx_descriptor_status(uint8_t port_id, * *tx_pkts* parameter when the transmit ring is full or has been filled up. */ static inline uint16_t -rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, +rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; @@ -3081,7 +3231,7 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, #ifndef RTE_ETHDEV_TX_PREPARE_NOOP static inline uint16_t -rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, +rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct rte_eth_dev *dev; @@ -3123,7 +3273,8 @@ rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, */ static inline uint16_t -rte_eth_tx_prepare(__rte_unused uint8_t port_id, __rte_unused uint16_t queue_id, +rte_eth_tx_prepare(__rte_unused uint16_t port_id, + __rte_unused uint16_t queue_id, __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { return nb_pkts; @@ -3192,7 +3343,7 @@ rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size); * callback is called for any packets which could not be sent. */ static inline uint16_t -rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id, +rte_eth_tx_buffer_flush(uint16_t port_id, uint16_t queue_id, struct rte_eth_dev_tx_buffer *buffer) { uint16_t sent; @@ -3244,7 +3395,7 @@ rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id, * the rest. */ static __rte_always_inline uint16_t -rte_eth_tx_buffer(uint8_t port_id, uint16_t queue_id, +rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id, struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt) { buffer->pkts[buffer->length++] = tx_pkt; @@ -3360,7 +3511,7 @@ rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, * are in use. */ int -rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt); +rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt); /** * The eth device event type for interrupt, and maybe others in the future. @@ -3378,7 +3529,7 @@ enum rte_eth_event_type { RTE_ETH_EVENT_MAX /**< max value of this enum */ }; -typedef int (*rte_eth_dev_cb_fn)(uint8_t port_id, +typedef int (*rte_eth_dev_cb_fn)(uint16_t port_id, enum rte_eth_event_type event, void *cb_arg, void *ret_param); /**< user application callback to be registered for interrupts */ @@ -3400,7 +3551,7 @@ typedef int (*rte_eth_dev_cb_fn)(uint8_t port_id, * - On success, zero. * - On failure, a negative value. */ -int rte_eth_dev_callback_register(uint8_t port_id, +int rte_eth_dev_callback_register(uint16_t port_id, enum rte_eth_event_type event, rte_eth_dev_cb_fn cb_fn, void *cb_arg); @@ -3421,7 +3572,7 @@ int rte_eth_dev_callback_register(uint8_t port_id, * - On success, zero. * - On failure, a negative value. */ -int rte_eth_dev_callback_unregister(uint8_t port_id, +int rte_eth_dev_callback_unregister(uint16_t port_id, enum rte_eth_event_type event, rte_eth_dev_cb_fn cb_fn, void *cb_arg); @@ -3467,7 +3618,7 @@ int _rte_eth_dev_callback_process(struct rte_eth_dev *dev, * that operation. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id); +int rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id); /** * When lcore wakes up from rx interrupt indicating packet coming, disable rx @@ -3488,7 +3639,7 @@ int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id); * that operation. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id); +int rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id); /** * RX Interrupt control per port. @@ -3507,7 +3658,7 @@ int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id); * - On success, zero. * - On failure, a negative value. */ -int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data); +int rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data); /** * RX Interrupt control per queue. @@ -3530,7 +3681,7 @@ int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data); * - On success, zero. * - On failure, a negative value. */ -int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, +int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, int epfd, int op, void *data); /** @@ -3545,7 +3696,7 @@ int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, * that operation. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_led_on(uint8_t port_id); +int rte_eth_led_on(uint16_t port_id); /** * Turn off the LED on the Ethernet device. @@ -3559,7 +3710,7 @@ int rte_eth_led_on(uint8_t port_id); * that operation. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_led_off(uint8_t port_id); +int rte_eth_led_off(uint16_t port_id); /** * Get current status of the Ethernet link flow control for Ethernet device @@ -3573,7 +3724,7 @@ int rte_eth_led_off(uint8_t port_id); * - (-ENOTSUP) if hardware doesn't support flow control. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_flow_ctrl_get(uint8_t port_id, +int rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf); /** @@ -3590,7 +3741,7 @@ int rte_eth_dev_flow_ctrl_get(uint8_t port_id, * - (-EINVAL) if bad parameter * - (-EIO) if flow control setup failure */ -int rte_eth_dev_flow_ctrl_set(uint8_t port_id, +int rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf); /** @@ -3608,7 +3759,7 @@ int rte_eth_dev_flow_ctrl_set(uint8_t port_id, * - (-EINVAL) if bad parameter * - (-EIO) if flow control setup failure */ -int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, +int rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, struct rte_eth_pfc_conf *pfc_conf); /** @@ -3629,7 +3780,7 @@ int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, * - (-ENOSPC) if no more MAC addresses can be added. * - (-EINVAL) if MAC address is invalid. */ -int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr, +int rte_eth_dev_mac_addr_add(uint16_t port, struct ether_addr *mac_addr, uint32_t pool); /** @@ -3645,7 +3796,7 @@ int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr, * - (-ENODEV) if *port* invalid. * - (-EADDRINUSE) if attempting to remove the default MAC address */ -int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr); +int rte_eth_dev_mac_addr_remove(uint16_t port, struct ether_addr *mac_addr); /** * Set the default MAC address. @@ -3660,8 +3811,8 @@ int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr); * - (-ENODEV) if *port* invalid. * - (-EINVAL) if MAC address is invalid. */ -int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr); - +int rte_eth_dev_default_mac_addr_set(uint16_t port, + struct ether_addr *mac_addr); /** * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device. @@ -3678,7 +3829,7 @@ int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr); * - (-ENOTSUP) if hardware doesn't support. * - (-EINVAL) if bad parameter. */ -int rte_eth_dev_rss_reta_update(uint8_t port, +int rte_eth_dev_rss_reta_update(uint16_t port, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size); @@ -3697,7 +3848,7 @@ int rte_eth_dev_rss_reta_update(uint8_t port, * - (-ENOTSUP) if hardware doesn't support. * - (-EINVAL) if bad parameter. */ -int rte_eth_dev_rss_reta_query(uint8_t port, +int rte_eth_dev_rss_reta_query(uint16_t port, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size); @@ -3719,8 +3870,8 @@ int rte_eth_dev_rss_reta_query(uint8_t port, * - (-ENODEV) if *port_id* invalid. * - (-EINVAL) if bad parameter. */ -int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, - uint8_t on); +int rte_eth_dev_uc_hash_table_set(uint16_t port, struct ether_addr *addr, + uint8_t on); /** * Updates all unicast hash bitmaps for receiving packet with any Unicast @@ -3739,7 +3890,7 @@ int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, * - (-ENODEV) if *port_id* invalid. * - (-EINVAL) if bad parameter. */ -int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); +int rte_eth_dev_uc_all_hash_table_set(uint16_t port, uint8_t on); /** * Set a traffic mirroring rule on an Ethernet device @@ -3762,7 +3913,7 @@ int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); * - (-ENODEV) if *port_id* invalid. * - (-EINVAL) if the mr_conf information is not correct. */ -int rte_eth_mirror_rule_set(uint8_t port_id, +int rte_eth_mirror_rule_set(uint16_t port_id, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on); @@ -3780,7 +3931,7 @@ int rte_eth_mirror_rule_set(uint8_t port_id, * - (-ENODEV) if *port_id* invalid. * - (-EINVAL) if bad parameter. */ -int rte_eth_mirror_rule_reset(uint8_t port_id, +int rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id); /** @@ -3798,7 +3949,7 @@ int rte_eth_mirror_rule_reset(uint8_t port_id, * - (-ENODEV) if *port_id* invalid. * - (-EINVAL) if bad parameter. */ -int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, +int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, uint16_t tx_rate); /** @@ -3814,7 +3965,7 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, * - (-ENOTSUP) if hardware doesn't support. * - (-EINVAL) if bad parameter. */ -int rte_eth_dev_rss_hash_update(uint8_t port_id, +int rte_eth_dev_rss_hash_update(uint16_t port_id, struct rte_eth_rss_conf *rss_conf); /** @@ -3831,7 +3982,7 @@ int rte_eth_dev_rss_hash_update(uint8_t port_id, * - (-ENOTSUP) if hardware doesn't support RSS. */ int -rte_eth_dev_rss_hash_conf_get(uint8_t port_id, +rte_eth_dev_rss_hash_conf_get(uint16_t port_id, struct rte_eth_rss_conf *rss_conf); /** @@ -3852,7 +4003,7 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id, * - (-ENOTSUP) if hardware doesn't support tunnel type. */ int -rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, +rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, struct rte_eth_udp_tunnel *tunnel_udp); /** @@ -3874,7 +4025,7 @@ rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, * - (-ENOTSUP) if hardware doesn't support tunnel type. */ int -rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id, +rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, struct rte_eth_udp_tunnel *tunnel_udp); /** @@ -3890,7 +4041,8 @@ rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id, * - (-ENOTSUP) if hardware doesn't support this filter type. * - (-ENODEV) if *port_id* invalid. */ -int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type); +int rte_eth_dev_filter_supported(uint16_t port_id, + enum rte_filter_type filter_type); /** * Take operations to assigned filter type on an Ethernet device. @@ -3910,7 +4062,7 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty * - (-ENODEV) if *port_id* invalid. * - others depends on the specific operations implementation. */ -int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, +int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, enum rte_filter_op filter_op, void *arg); /** @@ -3925,7 +4077,7 @@ int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, * - (-ENODEV) if port identifier is invalid. * - (-ENOTSUP) if hardware doesn't support. */ -int rte_eth_dev_get_dcb_info(uint8_t port_id, +int rte_eth_dev_get_dcb_info(uint16_t port_id, struct rte_eth_dcb_info *dcb_info); /** @@ -3952,7 +4104,7 @@ int rte_eth_dev_get_dcb_info(uint8_t port_id, * NULL on error. * On success, a pointer value which can later be used to remove the callback. */ -void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, +void *rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, rte_rx_callback_fn fn, void *user_param); /** @@ -3980,7 +4132,7 @@ void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, * NULL on error. * On success, a pointer value which can later be used to remove the callback. */ -void *rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id, +void *rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, rte_rx_callback_fn fn, void *user_param); /** @@ -4007,7 +4159,7 @@ void *rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id, * NULL on error. * On success, a pointer value which can later be used to remove the callback. */ -void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, +void *rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, rte_tx_callback_fn fn, void *user_param); /** @@ -4040,7 +4192,7 @@ void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, * - -EINVAL: The port_id or the queue_id is out of range, or the callback * is NULL or not found for the port/queue. */ -int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, +int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, struct rte_eth_rxtx_callback *user_cb); /** @@ -4073,7 +4225,7 @@ int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, * - -EINVAL: The port_id or the queue_id is out of range, or the callback * is NULL or not found for the port/queue. */ -int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, +int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, struct rte_eth_rxtx_callback *user_cb); /** @@ -4093,7 +4245,7 @@ int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, * - -ENOTSUP: routine is not supported by the device PMD. * - -EINVAL: The port_id or the queue_id is out of range. */ -int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, +int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); /** @@ -4113,7 +4265,7 @@ int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, * - -ENOTSUP: routine is not supported by the device PMD. * - -EINVAL: The port_id or the queue_id is out of range. */ -int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, +int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, struct rte_eth_txq_info *qinfo); /** @@ -4132,7 +4284,7 @@ int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, * - (-ENODEV) if *port_id* invalid. * - others depends on the specific operations implementation. */ -int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info); +int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info); /** * Retrieve size of device EEPROM @@ -4145,7 +4297,7 @@ int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info); * - (-ENODEV) if *port_id* invalid. * - others depends on the specific operations implementation. */ -int rte_eth_dev_get_eeprom_length(uint8_t port_id); +int rte_eth_dev_get_eeprom_length(uint16_t port_id); /** * Retrieve EEPROM and EEPROM attribute @@ -4161,7 +4313,7 @@ int rte_eth_dev_get_eeprom_length(uint8_t port_id); * - (-ENODEV) if *port_id* invalid. * - others depends on the specific operations implementation. */ -int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); +int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); /** * Program EEPROM with provided data @@ -4177,7 +4329,7 @@ int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); * - (-ENODEV) if *port_id* invalid. * - others depends on the specific operations implementation. */ -int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); +int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); /** * Set the list of multicast addresses to filter on an Ethernet device. @@ -4196,7 +4348,7 @@ int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); * - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering. * - (-ENOSPC) if *port_id* has not enough multicast filtering resources. */ -int rte_eth_dev_set_mc_addr_list(uint8_t port_id, +int rte_eth_dev_set_mc_addr_list(uint16_t port_id, struct ether_addr *mc_addr_set, uint32_t nb_mc_addr); @@ -4211,7 +4363,7 @@ int rte_eth_dev_set_mc_addr_list(uint8_t port_id, * - -ENODEV: The port ID is invalid. * - -ENOTSUP: The function is not supported by the Ethernet driver. */ -int rte_eth_timesync_enable(uint8_t port_id); +int rte_eth_timesync_enable(uint16_t port_id); /** * Disable IEEE1588/802.1AS timestamping for an Ethernet device. @@ -4224,7 +4376,7 @@ int rte_eth_timesync_enable(uint8_t port_id); * - -ENODEV: The port ID is invalid. * - -ENOTSUP: The function is not supported by the Ethernet driver. */ -int rte_eth_timesync_disable(uint8_t port_id); +int rte_eth_timesync_disable(uint16_t port_id); /** * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device. @@ -4243,7 +4395,7 @@ int rte_eth_timesync_disable(uint8_t port_id); * - -ENODEV: The port ID is invalid. * - -ENOTSUP: The function is not supported by the Ethernet driver. */ -int rte_eth_timesync_read_rx_timestamp(uint8_t port_id, +int rte_eth_timesync_read_rx_timestamp(uint16_t port_id, struct timespec *timestamp, uint32_t flags); /** @@ -4260,7 +4412,7 @@ int rte_eth_timesync_read_rx_timestamp(uint8_t port_id, * - -ENODEV: The port ID is invalid. * - -ENOTSUP: The function is not supported by the Ethernet driver. */ -int rte_eth_timesync_read_tx_timestamp(uint8_t port_id, +int rte_eth_timesync_read_tx_timestamp(uint16_t port_id, struct timespec *timestamp); /** @@ -4279,7 +4431,7 @@ int rte_eth_timesync_read_tx_timestamp(uint8_t port_id, * - -ENODEV: The port ID is invalid. * - -ENOTSUP: The function is not supported by the Ethernet driver. */ -int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta); +int rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta); /** * Read the time from the timesync clock on an Ethernet device. @@ -4295,7 +4447,7 @@ int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta); * @return * - 0: Success. */ -int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time); +int rte_eth_timesync_read_time(uint16_t port_id, struct timespec *time); /** * Set the time of the timesync clock on an Ethernet device. @@ -4314,7 +4466,7 @@ int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time); * - -ENODEV: The port ID is invalid. * - -ENOTSUP: The function is not supported by the Ethernet driver. */ -int rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *time); +int rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *time); /** * Create memzone for HW rings. @@ -4355,7 +4507,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name, * - (-ENOTSUP) if hardware doesn't support tunnel type. */ int -rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, +rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, struct rte_eth_l2_tunnel_conf *l2_tunnel); /** @@ -4382,7 +4534,7 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, * - (-ENOTSUP) if hardware doesn't support tunnel type. */ int -rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, +rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, struct rte_eth_l2_tunnel_conf *l2_tunnel, uint32_t mask, uint8_t en); @@ -4400,7 +4552,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, * - (-ENODEV or -EINVAL) on failure. */ int -rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); +rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id); /** * Get the device name from port id @@ -4414,7 +4566,7 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); * - (-EINVAL) on failure. */ int -rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); +rte_eth_dev_get_name_by_port(uint16_t port_id, char *name); /** * Check that numbers of Rx and Tx descriptors satisfy descriptors limits from @@ -4432,10 +4584,28 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); * - (0) if successful. * - (-ENOTSUP, -ENODEV or -EINVAL) on failure. */ -int rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id, +int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, uint16_t *nb_rx_desc, uint16_t *nb_tx_desc); + +/** + * Test if a port supports specific mempool ops. + * + * @param port_id + * Port identifier of the Ethernet device. + * @param [in] pool + * The name of the pool operations to test. + * @return + * - 0: best mempool ops choice for this port. + * - 1: mempool ops are supported for this port. + * - -ENOTSUP: mempool ops not supported for this port. + * - -ENODEV: Invalid port Identifier. + * - -EINVAL: Pool param is null. + */ +int +rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool); + #ifdef __cplusplus } #endif diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h index 56b10721..722075e0 100644 --- a/lib/librte_ether/rte_ethdev_pci.h +++ b/lib/librte_ether/rte_ethdev_pci.h @@ -36,6 +36,7 @@ #include <rte_malloc.h> #include <rte_pci.h> +#include <rte_bus_pci.h> #include <rte_ethdev.h> /** diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ether/rte_ethdev_vdev.h index 4d2c3e2b..ff92e6ed 100644 --- a/lib/librte_ether/rte_ethdev_vdev.h +++ b/lib/librte_ether/rte_ethdev_vdev.h @@ -35,7 +35,7 @@ #define _RTE_ETHDEV_VDEV_H_ #include <rte_malloc.h> -#include <rte_vdev.h> +#include <rte_bus_vdev.h> #include <rte_ethdev.h> /** diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ethdev_version.map index 42837285..e9681ac8 100644 --- a/lib/librte_ether/rte_ether_version.map +++ b/lib/librte_ether/rte_ethdev_version.map @@ -187,3 +187,31 @@ DPDK_17.08 { rte_tm_wred_profile_delete; } DPDK_17.05; + +DPDK_17.11 { + global: + + rte_eth_dev_get_sec_ctx; + rte_eth_dev_pool_ops_supported; + rte_eth_dev_reset; + rte_flow_error_set; + +} DPDK_17.08; + +EXPERIMENTAL { + global: + + rte_mtr_capabilities_get; + rte_mtr_create; + rte_mtr_destroy; + rte_mtr_meter_disable; + rte_mtr_meter_dscp_table_update; + rte_mtr_meter_enable; + rte_mtr_meter_profile_add; + rte_mtr_meter_profile_delete; + rte_mtr_meter_profile_update; + rte_mtr_policer_actions_update; + rte_mtr_stats_read; + rte_mtr_stats_update; + +} DPDK_17.11; diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c index 2001fbbf..66590630 100644 --- a/lib/librte_ether/rte_flow.c +++ b/lib/librte_ether/rte_flow.c @@ -108,7 +108,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = { /* Get generic flow operations structure from a port. */ const struct rte_flow_ops * -rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error) +rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; const struct rte_flow_ops *ops; @@ -132,7 +132,7 @@ rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error) /* Check whether a flow rule can be created on a given port. */ int -rte_flow_validate(uint8_t port_id, +rte_flow_validate(uint16_t port_id, const struct rte_flow_attr *attr, const struct rte_flow_item pattern[], const struct rte_flow_action actions[], @@ -145,14 +145,14 @@ rte_flow_validate(uint8_t port_id, return -rte_errno; if (likely(!!ops->validate)) return ops->validate(dev, attr, pattern, actions, error); - return -rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); } /* Create a flow rule on a given port. */ struct rte_flow * -rte_flow_create(uint8_t port_id, +rte_flow_create(uint16_t port_id, const struct rte_flow_attr *attr, const struct rte_flow_item pattern[], const struct rte_flow_action actions[], @@ -172,7 +172,7 @@ rte_flow_create(uint8_t port_id, /* Destroy a flow rule on a given port. */ int -rte_flow_destroy(uint8_t port_id, +rte_flow_destroy(uint16_t port_id, struct rte_flow *flow, struct rte_flow_error *error) { @@ -183,14 +183,14 @@ rte_flow_destroy(uint8_t port_id, return -rte_errno; if (likely(!!ops->destroy)) return ops->destroy(dev, flow, error); - return -rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); } /* Destroy all flow rules associated with a port. */ int -rte_flow_flush(uint8_t port_id, +rte_flow_flush(uint16_t port_id, struct rte_flow_error *error) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; @@ -200,14 +200,14 @@ rte_flow_flush(uint8_t port_id, return -rte_errno; if (likely(!!ops->flush)) return ops->flush(dev, error); - return -rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); } /* Query an existing flow rule. */ int -rte_flow_query(uint8_t port_id, +rte_flow_query(uint16_t port_id, struct rte_flow *flow, enum rte_flow_action_type action, void *data, @@ -220,14 +220,14 @@ rte_flow_query(uint8_t port_id, return -rte_errno; if (likely(!!ops->query)) return ops->query(dev, flow, action, data, error); - return -rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); } /* Restrict ingress traffic to the defined flow rules. */ int -rte_flow_isolate(uint8_t port_id, +rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error) { @@ -238,9 +238,28 @@ rte_flow_isolate(uint8_t port_id, return -rte_errno; if (likely(!!ops->isolate)) return ops->isolate(dev, set, error); - return -rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Initialize flow error structure. */ +int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_flow_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return -code; } /** Compute storage space needed by item specification. */ diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h index bba6169f..47c88ea5 100644 --- a/lib/librte_ether/rte_flow.h +++ b/lib/librte_ether/rte_flow.h @@ -50,6 +50,7 @@ #include <rte_tcp.h> #include <rte_udp.h> #include <rte_byteorder.h> +#include <rte_esp.h> #ifdef __cplusplus extern "C" { @@ -309,6 +310,40 @@ enum rte_flow_item_type { * See struct rte_flow_item_fuzzy. */ RTE_FLOW_ITEM_TYPE_FUZZY, + + /** + * Matches a GTP header. + * + * Configure flow for GTP packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTP, + + /** + * Matches a GTP header. + * + * Configure flow for GTP-C packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTPC, + + /** + * Matches a GTP header. + * + * Configure flow for GTP-U packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTPU, + + /** + * Matches a ESP header. + * + * See struct rte_flow_item_esp. + */ + RTE_FLOW_ITEM_TYPE_ESP, }; /** @@ -735,6 +770,49 @@ static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = { #endif /** + * RTE_FLOW_ITEM_TYPE_GTP. + * + * Matches a GTPv1 header. + */ +struct rte_flow_item_gtp { + /** + * Version (3b), protocol type (1b), reserved (1b), + * Extension header flag (1b), + * Sequence number flag (1b), + * N-PDU number flag (1b). + */ + uint8_t v_pt_rsv_flags; + uint8_t msg_type; /**< Message type. */ + rte_be16_t msg_len; /**< Message length. */ + rte_be32_t teid; /**< Tunnel endpoint identifier. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GTP. */ +#ifndef __cplusplus +static const struct rte_flow_item_gtp rte_flow_item_gtp_mask = { + .teid = RTE_BE32(0xffffffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ESP + * + * Matches an ESP header. + */ +struct rte_flow_item_esp { + struct esp_hdr hdr; /**< ESP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ESP. */ +#ifndef __cplusplus +static const struct rte_flow_item_esp rte_flow_item_esp_mask = { + .hdr = { + .spi = 0xffffffff, + }, +}; +#endif + +/** * Matching pattern item definition. * * A pattern is formed by stacking items starting from the lowest protocol @@ -915,6 +993,22 @@ enum rte_flow_action_type { * See struct rte_flow_action_vf. */ RTE_FLOW_ACTION_TYPE_VF, + + /** + * Traffic metering and policing (MTR). + * + * See struct rte_flow_action_meter. + * See file rte_mtr.h for MTR object configuration. + */ + RTE_FLOW_ACTION_TYPE_METER, + + /** + * Redirects packets to security engine of current device for security + * processing as specified by security session. + * + * See struct rte_flow_action_security. + */ + RTE_FLOW_ACTION_TYPE_SECURITY }; /** @@ -1008,6 +1102,51 @@ struct rte_flow_action_vf { }; /** + * RTE_FLOW_ACTION_TYPE_METER + * + * Traffic metering and policing (MTR). + * + * Packets matched by items of this type can be either dropped or passed to the + * next item with their color set by the MTR object. + * + * Non-terminating by default. + */ +struct rte_flow_action_meter { + uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_SECURITY + * + * Perform the security action on flows matched by the pattern items + * according to the configuration of the security session. + * + * This action modifies the payload of matched flows. For INLINE_CRYPTO, the + * security protocol headers and IV are fully provided by the application as + * specified in the flow pattern. The payload of matching packets is + * encrypted on egress, and decrypted and authenticated on ingress. + * For INLINE_PROTOCOL, the security protocol is fully offloaded to HW, + * providing full encapsulation and decapsulation of packets in security + * protocols. The flow pattern specifies both the outer security header fields + * and the inner packet fields. The security session specified in the action + * must match the pattern parameters. + * + * The security session specified in the action must be created on the same + * port as the flow action that is being specified. + * + * The ingress/egress flow attribute should match that specified in the + * security session if the security session supports the definition of the + * direction. + * + * Multiple flows can be configured to use the same security session. + * + * Non-terminating by default. + */ +struct rte_flow_action_security { + void *security_session; /**< Pointer to security session structure. */ +}; + +/** * Definition of a single action. * * A list of actions is terminated by a END action. @@ -1116,7 +1255,7 @@ struct rte_flow_error { * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). */ int -rte_flow_validate(uint8_t port_id, +rte_flow_validate(uint16_t port_id, const struct rte_flow_attr *attr, const struct rte_flow_item pattern[], const struct rte_flow_action actions[], @@ -1143,7 +1282,7 @@ rte_flow_validate(uint8_t port_id, * rte_flow_validate(). */ struct rte_flow * -rte_flow_create(uint8_t port_id, +rte_flow_create(uint16_t port_id, const struct rte_flow_attr *attr, const struct rte_flow_item pattern[], const struct rte_flow_action actions[], @@ -1170,7 +1309,7 @@ rte_flow_create(uint8_t port_id, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -rte_flow_destroy(uint8_t port_id, +rte_flow_destroy(uint16_t port_id, struct rte_flow *flow, struct rte_flow_error *error); @@ -1191,7 +1330,7 @@ rte_flow_destroy(uint8_t port_id, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -rte_flow_flush(uint8_t port_id, +rte_flow_flush(uint16_t port_id, struct rte_flow_error *error); /** @@ -1219,7 +1358,7 @@ rte_flow_flush(uint8_t port_id, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -rte_flow_query(uint8_t port_id, +rte_flow_query(uint16_t port_id, struct rte_flow *flow, enum rte_flow_action_type action, void *data, @@ -1267,7 +1406,31 @@ rte_flow_query(uint8_t port_id, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -rte_flow_isolate(uint8_t port_id, int set, struct rte_flow_error *error); +rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error); + +/** + * Initialize flow error structure. + * + * @param[out] error + * Pointer to flow error structure (may be NULL). + * @param code + * Related error code (rte_errno). + * @param type + * Cause field and error types. + * @param cause + * Object responsible for the error. + * @param message + * Human-readable error message. + * + * @return + * Negative error code (errno value) and rte_errno is set. + */ +int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message); /** * Generic flow representation. diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h index 4d95391d..254d1cb2 100644 --- a/lib/librte_ether/rte_flow_driver.h +++ b/lib/librte_ether/rte_flow_driver.h @@ -45,7 +45,6 @@ #include <stdint.h> -#include <rte_errno.h> #include "rte_ethdev.h" #include "rte_flow.h" @@ -128,43 +127,6 @@ struct rte_flow_ops { }; /** - * Initialize generic flow error structure. - * - * This function also sets rte_errno to a given value. - * - * @param[out] error - * Pointer to flow error structure (may be NULL). - * @param code - * Related error code (rte_errno). - * @param type - * Cause field and error types. - * @param cause - * Object responsible for the error. - * @param message - * Human-readable error message. - * - * @return - * Error code. - */ -static inline int -rte_flow_error_set(struct rte_flow_error *error, - int code, - enum rte_flow_error_type type, - const void *cause, - const char *message) -{ - if (error) { - *error = (struct rte_flow_error){ - .type = type, - .cause = cause, - .message = message, - }; - } - rte_errno = code; - return code; -} - -/** * Get generic flow operations structure from a port. * * @param port_id @@ -178,7 +140,7 @@ rte_flow_error_set(struct rte_flow_error *error, * additional details. */ const struct rte_flow_ops * -rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error); +rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error); #ifdef __cplusplus } diff --git a/lib/librte_ether/rte_mtr.c b/lib/librte_ether/rte_mtr.c new file mode 100644 index 00000000..4f56f871 --- /dev/null +++ b/lib/librte_ether/rte_mtr.c @@ -0,0 +1,229 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_mtr_driver.h" +#include "rte_mtr.h" + +/* Get generic traffic metering & policing operations structure from a port. */ +const struct rte_mtr_ops * +rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_mtr_ops *ops; + + if (!rte_eth_dev_is_valid_port(port_id)) { + rte_mtr_error_set(error, + ENODEV, + RTE_MTR_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENODEV)); + return NULL; + } + + if ((dev->dev_ops->mtr_ops_get == NULL) || + (dev->dev_ops->mtr_ops_get(dev, &ops) != 0) || + (ops == NULL)) { + rte_mtr_error_set(error, + ENOSYS, + RTE_MTR_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENOSYS)); + return NULL; + } + + return ops; +} + +#define RTE_MTR_FUNC(port_id, func) \ +({ \ + const struct rte_mtr_ops *ops = \ + rte_mtr_ops_get(port_id, error); \ + if (ops == NULL) \ + return -rte_errno; \ + \ + if (ops->func == NULL) \ + return -rte_mtr_error_set(error, \ + ENOSYS, \ + RTE_MTR_ERROR_TYPE_UNSPECIFIED, \ + NULL, \ + rte_strerror(ENOSYS)); \ + \ + ops->func; \ +}) + +/* MTR capabilities get */ +int +rte_mtr_capabilities_get(uint16_t port_id, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, capabilities_get)(dev, + cap, error); +} + +/* MTR meter profile add */ +int +rte_mtr_meter_profile_add(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_add)(dev, + meter_profile_id, profile, error); +} + +/** MTR meter profile delete */ +int +rte_mtr_meter_profile_delete(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_delete)(dev, + meter_profile_id, error); +} + +/** MTR object create */ +int +rte_mtr_create(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, create)(dev, + mtr_id, params, shared, error); +} + +/** MTR object destroy */ +int +rte_mtr_destroy(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, destroy)(dev, + mtr_id, error); +} + +/** MTR object meter enable */ +int +rte_mtr_meter_enable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_enable)(dev, + mtr_id, error); +} + +/** MTR object meter disable */ +int +rte_mtr_meter_disable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_disable)(dev, + mtr_id, error); +} + +/** MTR object meter profile update */ +int +rte_mtr_meter_profile_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_update)(dev, + mtr_id, meter_profile_id, error); +} + +/** MTR object meter DSCP table update */ +int +rte_mtr_meter_dscp_table_update(uint16_t port_id, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_dscp_table_update)(dev, + mtr_id, dscp_table, error); +} + +/** MTR object policer action update */ +int +rte_mtr_policer_actions_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, policer_actions_update)(dev, + mtr_id, action_mask, actions, error); +} + +/** MTR object enabled stats update */ +int +rte_mtr_stats_update(uint16_t port_id, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, stats_update)(dev, + mtr_id, stats_mask, error); +} + +/** MTR object stats read */ +int +rte_mtr_stats_read(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, stats_read)(dev, + mtr_id, stats, stats_mask, clear, error); +} diff --git a/lib/librte_ether/rte_mtr.h b/lib/librte_ether/rte_mtr.h new file mode 100644 index 00000000..f6b6ef3b --- /dev/null +++ b/lib/librte_ether/rte_mtr.h @@ -0,0 +1,730 @@ +/*- + * BSD LICENSE + * + * Copyright 2017 Intel Corporation + * Copyright 2017 NXP + * Copyright 2017 Cavium + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_MTR_H__ +#define __INCLUDE_RTE_MTR_H__ + +/** + * @file + * RTE Generic Traffic Metering and Policing API + * + * This interface provides the ability to configure the traffic metering and + * policing (MTR) in a generic way. + * + * The processing done for each input packet hitting a MTR object is: + * A) Traffic metering: The packet is assigned a color (the meter output + * color), based on the previous history of the flow reflected in the + * current state of the MTR object, according to the specific traffic + * metering algorithm. The traffic metering algorithm can typically work + * in color aware mode, in which case the input packet already has an + * initial color (the input color), or in color blind mode, which is + * equivalent to considering all input packets initially colored as green. + * B) Policing: There is a separate policer action configured for each meter + * output color, which can: + * a) Drop the packet. + * b) Keep the same packet color: the policer output color matches the + * meter output color (essentially a no-op action). + * c) Recolor the packet: the policer output color is different than + * the meter output color. + * The policer output color is the output color of the packet, which is + * set in the packet meta-data (i.e. struct rte_mbuf::sched::color). + * C) Statistics: The set of counters maintained for each MTR object is + * configurable and subject to the implementation support. This set + * includes the number of packets and bytes dropped or passed for each + * output color. + * + * Once successfully created, an MTR object is linked to one or several flows + * through the meter action of the flow API. + * A) Whether an MTR object is private to a flow or potentially shared by + * several flows has to be specified at creation time. + * B) Several meter actions can be potentially registered for the same flow. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ +#include <stdint.h> + +#include <rte_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Color + */ +enum rte_mtr_color { + RTE_MTR_GREEN = 0, /**< Green */ + RTE_MTR_YELLOW, /**< Yellow */ + RTE_MTR_RED, /**< Red */ + RTE_MTR_COLORS /**< Number of colors. */ +}; + +/** + * Statistics counter type + */ +enum rte_mtr_stats_type { + /** Number of packets passed as green by the policer. */ + RTE_MTR_STATS_N_PKTS_GREEN = 1 << 0, + + /** Number of packets passed as yellow by the policer. */ + RTE_MTR_STATS_N_PKTS_YELLOW = 1 << 1, + + /** Number of packets passed as red by the policer. */ + RTE_MTR_STATS_N_PKTS_RED = 1 << 2, + + /** Number of packets dropped by the policer. */ + RTE_MTR_STATS_N_PKTS_DROPPED = 1 << 3, + + /** Number of bytes passed as green by the policer. */ + RTE_MTR_STATS_N_BYTES_GREEN = 1 << 4, + + /** Number of bytes passed as yellow by the policer. */ + RTE_MTR_STATS_N_BYTES_YELLOW = 1 << 5, + + /** Number of bytes passed as red by the policer. */ + RTE_MTR_STATS_N_BYTES_RED = 1 << 6, + + /** Number of bytes dropped by the policer. */ + RTE_MTR_STATS_N_BYTES_DROPPED = 1 << 7, +}; + +/** + * Statistics counters + */ +struct rte_mtr_stats { + /** Number of packets passed by the policer (per color). */ + uint64_t n_pkts[RTE_MTR_COLORS]; + + /** Number of bytes passed by the policer (per color). */ + uint64_t n_bytes[RTE_MTR_COLORS]; + + /** Number of packets dropped by the policer. */ + uint64_t n_pkts_dropped; + + /** Number of bytes passed by the policer. */ + uint64_t n_bytes_dropped; +}; + +/** + * Traffic metering algorithms + */ +enum rte_mtr_algorithm { + /** No traffic metering performed, the output color is the same as the + * input color for every input packet. The meter of the MTR object is + * working in pass-through mode, having same effect as meter disable. + * @see rte_mtr_meter_disable() + */ + RTE_MTR_NONE = 0, + + /** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */ + RTE_MTR_SRTCM_RFC2697, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */ + RTE_MTR_TRTCM_RFC2698, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */ + RTE_MTR_TRTCM_RFC4115, +}; + +/** + * Meter profile + */ +struct rte_mtr_meter_profile { + /** Traffic metering algorithm. */ + enum rte_mtr_algorithm alg; + + RTE_STD_C11 + union { + /** Items only valid when *alg* is set to srTCM - RFC 2697. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Committed Burst Size (CBS) (bytes). */ + uint64_t cbs; + + /** Excess Burst Size (EBS) (bytes). */ + uint64_t ebs; + } srtcm_rfc2697; + + /** Items only valid when *alg* is set to trTCM - RFC 2698. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Peak Information Rate (PIR) (bytes/second). */ + uint64_t pir; + + /** Committed Burst Size (CBS) (byes). */ + uint64_t cbs; + + /** Peak Burst Size (PBS) (bytes). */ + uint64_t pbs; + } trtcm_rfc2698; + + /** Items only valid when *alg* is set to trTCM - RFC 4115. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Excess Information Rate (EIR) (bytes/second). */ + uint64_t eir; + + /** Committed Burst Size (CBS) (byes). */ + uint64_t cbs; + + /** Excess Burst Size (EBS) (bytes). */ + uint64_t ebs; + } trtcm_rfc4115; + }; +}; + +/** + * Policer actions + */ +enum rte_mtr_policer_action { + /** Recolor the packet as green. */ + MTR_POLICER_ACTION_COLOR_GREEN = 0, + + /** Recolor the packet as yellow. */ + MTR_POLICER_ACTION_COLOR_YELLOW, + + /** Recolor the packet as red. */ + MTR_POLICER_ACTION_COLOR_RED, + + /** Drop the packet. */ + MTR_POLICER_ACTION_DROP, +}; + +/** + * Parameters for each traffic metering & policing object + * + * @see enum rte_mtr_stats_type + */ +struct rte_mtr_params { + /** Meter profile ID. */ + uint32_t meter_profile_id; + + /** Meter input color in case of MTR object chaining. When non-zero: if + * a previous MTR object is enabled in the same flow, then the color + * determined by the latest MTR object in the same flow is used as the + * input color by the current MTR object, otherwise the current MTR + * object uses the *dscp_table* to determine the input color. When zero: + * the color determined by any previous MTR object in same flow is + * ignored by the current MTR object, which uses the *dscp_table* to + * determine the input color. + */ + int use_prev_mtr_color; + + /** Meter input color. When non-NULL: it points to a pre-allocated and + * pre-populated table with exactly 64 elements providing the input + * color for each value of the IPv4/IPv6 Differentiated Services Code + * Point (DSCP) input packet field. When NULL: it is equivalent to + * setting this parameter to an all-green populated table (i.e. table + * with all the 64 elements set to green color). The color blind mode + * is configured by setting *use_prev_mtr_color* to 0 and *dscp_table* + * to either NULL or to an all-green populated table. When + * *use_prev_mtr_color* is non-zero value or when *dscp_table* contains + * at least one yellow or red color element, then the color aware mode + * is configured. + */ + enum rte_mtr_color *dscp_table; + + /** Non-zero to enable the meter, zero to disable the meter at the time + * of MTR object creation. Ignored when the meter profile indicated by + * *meter_profile_id* is set to NONE. + * @see rte_mtr_meter_disable() + */ + int meter_enable; + + /** Policer actions (per meter output color). */ + enum rte_mtr_policer_action action[RTE_MTR_COLORS]; + + /** Set of stats counters to be enabled. + * @see enum rte_mtr_stats_type + */ + uint64_t stats_mask; +}; + +/** + * MTR capabilities + */ +struct rte_mtr_capabilities { + /** Maximum number of MTR objects. */ + uint32_t n_max; + + /** Maximum number of MTR objects that can be shared by multiple flows. + * The value of zero indicates that shared MTR objects are not + * supported. The maximum value is *n_max*. + */ + uint32_t n_shared_max; + + /** When non-zero, this flag indicates that all the MTR objects that + * cannot be shared by multiple flows have identical capability set. + */ + int identical; + + /** When non-zero, this flag indicates that all the MTR objects that + * can be shared by multiple flows have identical capability set. + */ + int shared_identical; + + /** Maximum number of flows that can share the same MTR object. The + * value of zero is invalid. The value of 1 means that shared MTR + * objects not supported. + */ + uint32_t shared_n_flows_per_mtr_max; + + /** Maximum number of MTR objects that can be part of the same flow. The + * value of zero is invalid. The value of 1 indicates that MTR object + * chaining is not supported. The maximum value is *n_max*. + */ + uint32_t chaining_n_mtrs_per_flow_max; + + /** + * When non-zero, it indicates that the packet color identified by one + * MTR object can be used as the packet input color by any subsequent + * MTR object from the same flow. When zero, it indicates that the color + * determined by one MTR object is always ignored by any subsequent MTR + * object from the same flow. Only valid when MTR chaining is supported, + * i.e. *chaining_n_mtrs_per_flow_max* is greater than 1. When non-zero, + * it also means that the color aware mode is supported by at least one + * metering algorithm. + */ + int chaining_use_prev_mtr_color_supported; + + /** + * When non-zero, it indicates that the packet color identified by one + * MTR object is always used as the packet input color by any subsequent + * MTR object that is part of the same flow. When zero, it indicates + * that whether the color determined by one MTR object is either ignored + * or used as the packet input color by any subsequent MTR object from + * the same flow is individually configurable for each MTR object. Only + * valid when *chaining_use_prev_mtr_color_supported* is non-zero. + */ + int chaining_use_prev_mtr_color_enforced; + + /** Maximum number of MTR objects that can have their meter configured + * to run the srTCM RFC 2697 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_srtcm_rfc2697_n_max; + + /** Maximum number of MTR objects that can have their meter configured + * to run the trTCM RFC 2698 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_trtcm_rfc2698_n_max; + + /** Maximum number of MTR objects that can have their meter configured + * to run the trTCM RFC 4115 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_trtcm_rfc4115_n_max; + + /** Maximum traffic rate that can be metered by a single MTR object. For + * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698, + * this is the maximum PIR rate. For trTCM RFC 4115, this is the maximum + * value for the sum of PIR and EIR rates. + */ + uint64_t meter_rate_max; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the srTCM RFC 2697 metering algorithm. + */ + int color_aware_srtcm_rfc2697_supported; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the trTCM RFC 2698 metering algorithm. + */ + int color_aware_trtcm_rfc2698_supported; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the trTCM RFC 4115 metering algorithm. + */ + int color_aware_trtcm_rfc4115_supported; + + /** When non-zero, it indicates that the policer packet recolor actions + * are supported. + * @see enum rte_mtr_policer_action + */ + int policer_action_recolor_supported; + + /** When non-zero, it indicates that the policer packet drop action is + * supported. + * @see enum rte_mtr_policer_action + */ + int policer_action_drop_supported; + + /** Set of supported statistics counter types. + * @see enum rte_mtr_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_mtr_error::cause. + */ +enum rte_mtr_error_type { + RTE_MTR_ERROR_TYPE_NONE, /**< No error. */ + RTE_MTR_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_MTR_ERROR_TYPE_METER_PROFILE_ID, + RTE_MTR_ERROR_TYPE_METER_PROFILE, + RTE_MTR_ERROR_TYPE_MTR_ID, + RTE_MTR_ERROR_TYPE_MTR_PARAMS, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_GREEN, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_YELLOW, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_RED, + RTE_MTR_ERROR_TYPE_STATS_MASK, + RTE_MTR_ERROR_TYPE_STATS, + RTE_MTR_ERROR_TYPE_SHARED, +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_mtr_error { + enum rte_mtr_error_type type; /**< Cause field and error type. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * MTR capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] cap + * MTR capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_capabilities_get(uint16_t port_id, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error); + +/** + * Meter profile add + * + * Create a new meter profile with ID set to *meter_profile_id*. The new profile + * is used to create one or several MTR objects. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] meter_profile_id + * ID for the new meter profile. Needs to be unused by any of the existing + * meter profiles added for the current port. + * @param[in] profile + * Meter profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_meter_profile_add(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error); + +/** + * Meter profile delete + * + * Delete an existing meter profile. This operation fails when there is + * currently at least one user (i.e. MTR object) of this profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] meter_profile_id + * Meter profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_meter_profile_delete(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); + +/** + * MTR object create + * + * Create a new MTR object for the current port. This object is run as part of + * associated flow action for traffic metering and policing. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be unused by any of the existing MTR objects. + * created for the current port. + * @param[in] params + * MTR object params. Needs to be pre-allocated and valid. + * @param[in] shared + * Non-zero when this MTR object can be shared by multiple flows, zero when + * this MTR object can be used by a single flow. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_flow_action_type::RTE_FLOW_ACTION_TYPE_METER + */ +int +rte_mtr_create(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error); + +/** + * MTR object destroy + * + * Delete an existing MTR object. This operation fails when there is currently + * at least one user (i.e. flow) of this MTR object. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * created for the current port. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_destroy(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter disable + * + * Disable the meter of an existing MTR object. In disabled state, the meter of + * the current MTR object works in pass-through mode, meaning that for each + * input packet the meter output color is always the same as the input color. In + * particular, when the meter of the current MTR object is configured in color + * blind mode, the input color is always green, so the meter output color is + * also always green. Note that the policer and the statistics of the current + * MTR object are working as usual while the meter is disabled. No action is + * taken and this function returns successfully when the meter of the current + * MTR object is already disabled. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_meter_disable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter enable + * + * Enable the meter of an existing MTR object. If the MTR object has its meter + * already enabled, then no action is taken and this function returns + * successfully. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_meter_enable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter profile update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] meter_profile_id + * Meter profile ID for the current MTR object. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_meter_profile_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); + +/** + * MTR object DSCP table update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] dscp_table + * When non-NULL: it points to a pre-allocated and pre-populated table with + * exactly 64 elements providing the input color for each value of the + * IPv4/IPv6 Differentiated Services Code Point (DSCP) input packet field. + * When NULL: it is equivalent to setting this parameter to an “all-green” + * populated table (i.e. table with all the 64 elements set to green color). + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_meter_dscp_table_update(uint16_t port_id, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error); + +/** + * MTR object policer actions update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] action_mask + * Bit mask indicating which policer actions need to be updated. One or more + * policer actions can be updated in a single function invocation. To update + * the policer action associated with color C, bit (1 << C) needs to be set in + * *action_mask* and element at position C in the *actions* array needs to be + * valid. + * @param[in] actions + * Pre-allocated and pre-populated array of policer actions. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_mtr_policer_actions_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error); + +/** + * MTR object enabled statistics counters update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] stats_mask + * Mask of statistics counter types to be enabled for the current MTR object. + * Any statistics counter type not included in this set is to be disabled for + * the current MTR object. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_mtr_stats_type + */ +int +rte_mtr_stats_update(uint16_t port_id, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error); + +/** + * MTR object statistics counters read + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[out] stats + * When non-NULL, it contains the current value for the statistics counters + * enabled for the current MTR object. + * @param[out] stats_mask + * When non-NULL, it contains the mask of statistics counter types that are + * currently enabled for this MTR object, indicating which of the counters + * retrieved with the *stats* structure are valid. + * @param[in] clear + * When this parameter has a non-zero value, the statistics counters are + * cleared (i.e. set to zero) immediately after they have been read, + * otherwise the statistics counters are left untouched. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_mtr_stats_type + */ +int +rte_mtr_stats_read(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_MTR_H__ */ diff --git a/lib/librte_ether/rte_mtr_driver.h b/lib/librte_ether/rte_mtr_driver.h new file mode 100644 index 00000000..6a289ef1 --- /dev/null +++ b/lib/librte_ether/rte_mtr_driver.h @@ -0,0 +1,221 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_MTR_DRIVER_H__ +#define __INCLUDE_RTE_MTR_DRIVER_H__ + +/** + * @file + * RTE Generic Traffic Metering and Policing API (Driver Side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_mtr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*rte_mtr_capabilities_get_t)(struct rte_eth_dev *dev, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error); +/**< @internal MTR capabilities get */ + +typedef int (*rte_mtr_meter_profile_add_t)(struct rte_eth_dev *dev, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error); +/**< @internal MTR meter profile add */ + +typedef int (*rte_mtr_meter_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t meter_profile_id, + struct rte_mtr_error *error); +/**< @internal MTR meter profile delete */ + +typedef int (*rte_mtr_create_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error); +/**< @internal MTR object create */ + +typedef int (*rte_mtr_destroy_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object destroy */ + +typedef int (*rte_mtr_meter_enable_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter enable */ + +typedef int (*rte_mtr_meter_disable_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter disable */ + +typedef int (*rte_mtr_meter_profile_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter profile update */ + +typedef int (*rte_mtr_meter_dscp_table_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error); +/**< @internal MTR object meter DSCP table update */ + +typedef int (*rte_mtr_policer_actions_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error); +/**< @internal MTR object policer action update*/ + +typedef int (*rte_mtr_stats_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error); +/**< @internal MTR object enabled stats update */ + +typedef int (*rte_mtr_stats_read_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error); +/**< @internal MTR object stats read */ + +struct rte_mtr_ops { + /** MTR capabilities get */ + rte_mtr_capabilities_get_t capabilities_get; + + /** MTR meter profile add */ + rte_mtr_meter_profile_add_t meter_profile_add; + + /** MTR meter profile delete */ + rte_mtr_meter_profile_delete_t meter_profile_delete; + + /** MTR object create */ + rte_mtr_create_t create; + + /** MTR object destroy */ + rte_mtr_destroy_t destroy; + + /** MTR object meter enable */ + rte_mtr_meter_enable_t meter_enable; + + /** MTR object meter disable */ + rte_mtr_meter_disable_t meter_disable; + + /** MTR object meter profile update */ + rte_mtr_meter_profile_update_t meter_profile_update; + + /** MTR object meter DSCP table update */ + rte_mtr_meter_dscp_table_update_t meter_dscp_table_update; + + /** MTR object policer action update */ + rte_mtr_policer_actions_update_t policer_actions_update; + + /** MTR object enabled stats update */ + rte_mtr_stats_update_t stats_update; + + /** MTR object stats read */ + rte_mtr_stats_read_t stats_read; +}; + +/** + * Initialize generic error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to error structure (may be NULL). + * @param[in] code + * Related error code (rte_errno). + * @param[in] type + * Cause field and error type. + * @param[in] cause + * Object responsible for the error. + * @param[in] message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_mtr_error_set(struct rte_mtr_error *error, + int code, + enum rte_mtr_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_mtr_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic traffic metering and policing operations structure from a port + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] error + * Error details + * + * @return + * The traffic metering and policing operations structure associated with + * port_id on success, NULL otherwise. + */ +const struct rte_mtr_ops * +rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_MTR_DRIVER_H__ */ diff --git a/lib/librte_ether/rte_tm.c b/lib/librte_ether/rte_tm.c index 71679650..ceac3411 100644 --- a/lib/librte_ether/rte_tm.c +++ b/lib/librte_ether/rte_tm.c @@ -40,7 +40,7 @@ /* Get generic traffic manager operations structure from a port. */ const struct rte_tm_ops * -rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error) +rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; const struct rte_tm_ops *ops; @@ -87,7 +87,7 @@ rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error) /* Get number of leaf nodes */ int -rte_tm_get_number_of_leaf_nodes(uint8_t port_id, +rte_tm_get_number_of_leaf_nodes(uint16_t port_id, uint32_t *n_leaf_nodes, struct rte_tm_error *error) { @@ -113,7 +113,7 @@ rte_tm_get_number_of_leaf_nodes(uint8_t port_id, /* Check node type (leaf or non-leaf) */ int -rte_tm_node_type_get(uint8_t port_id, +rte_tm_node_type_get(uint16_t port_id, uint32_t node_id, int *is_leaf, struct rte_tm_error *error) @@ -124,7 +124,7 @@ rte_tm_node_type_get(uint8_t port_id, } /* Get capabilities */ -int rte_tm_capabilities_get(uint8_t port_id, +int rte_tm_capabilities_get(uint16_t port_id, struct rte_tm_capabilities *cap, struct rte_tm_error *error) { @@ -134,7 +134,7 @@ int rte_tm_capabilities_get(uint8_t port_id, } /* Get level capabilities */ -int rte_tm_level_capabilities_get(uint8_t port_id, +int rte_tm_level_capabilities_get(uint16_t port_id, uint32_t level_id, struct rte_tm_level_capabilities *cap, struct rte_tm_error *error) @@ -145,7 +145,7 @@ int rte_tm_level_capabilities_get(uint8_t port_id, } /* Get node capabilities */ -int rte_tm_node_capabilities_get(uint8_t port_id, +int rte_tm_node_capabilities_get(uint16_t port_id, uint32_t node_id, struct rte_tm_node_capabilities *cap, struct rte_tm_error *error) @@ -156,7 +156,7 @@ int rte_tm_node_capabilities_get(uint8_t port_id, } /* Add WRED profile */ -int rte_tm_wred_profile_add(uint8_t port_id, +int rte_tm_wred_profile_add(uint16_t port_id, uint32_t wred_profile_id, struct rte_tm_wred_params *profile, struct rte_tm_error *error) @@ -167,7 +167,7 @@ int rte_tm_wred_profile_add(uint8_t port_id, } /* Delete WRED profile */ -int rte_tm_wred_profile_delete(uint8_t port_id, +int rte_tm_wred_profile_delete(uint16_t port_id, uint32_t wred_profile_id, struct rte_tm_error *error) { @@ -177,7 +177,7 @@ int rte_tm_wred_profile_delete(uint8_t port_id, } /* Add/update shared WRED context */ -int rte_tm_shared_wred_context_add_update(uint8_t port_id, +int rte_tm_shared_wred_context_add_update(uint16_t port_id, uint32_t shared_wred_context_id, uint32_t wred_profile_id, struct rte_tm_error *error) @@ -188,7 +188,7 @@ int rte_tm_shared_wred_context_add_update(uint8_t port_id, } /* Delete shared WRED context */ -int rte_tm_shared_wred_context_delete(uint8_t port_id, +int rte_tm_shared_wred_context_delete(uint16_t port_id, uint32_t shared_wred_context_id, struct rte_tm_error *error) { @@ -198,7 +198,7 @@ int rte_tm_shared_wred_context_delete(uint8_t port_id, } /* Add shaper profile */ -int rte_tm_shaper_profile_add(uint8_t port_id, +int rte_tm_shaper_profile_add(uint16_t port_id, uint32_t shaper_profile_id, struct rte_tm_shaper_params *profile, struct rte_tm_error *error) @@ -209,7 +209,7 @@ int rte_tm_shaper_profile_add(uint8_t port_id, } /* Delete WRED profile */ -int rte_tm_shaper_profile_delete(uint8_t port_id, +int rte_tm_shaper_profile_delete(uint16_t port_id, uint32_t shaper_profile_id, struct rte_tm_error *error) { @@ -219,7 +219,7 @@ int rte_tm_shaper_profile_delete(uint8_t port_id, } /* Add shared shaper */ -int rte_tm_shared_shaper_add_update(uint8_t port_id, +int rte_tm_shared_shaper_add_update(uint16_t port_id, uint32_t shared_shaper_id, uint32_t shaper_profile_id, struct rte_tm_error *error) @@ -230,7 +230,7 @@ int rte_tm_shared_shaper_add_update(uint8_t port_id, } /* Delete shared shaper */ -int rte_tm_shared_shaper_delete(uint8_t port_id, +int rte_tm_shared_shaper_delete(uint16_t port_id, uint32_t shared_shaper_id, struct rte_tm_error *error) { @@ -240,7 +240,7 @@ int rte_tm_shared_shaper_delete(uint8_t port_id, } /* Add node to port traffic manager hierarchy */ -int rte_tm_node_add(uint8_t port_id, +int rte_tm_node_add(uint16_t port_id, uint32_t node_id, uint32_t parent_node_id, uint32_t priority, @@ -256,7 +256,7 @@ int rte_tm_node_add(uint8_t port_id, } /* Delete node from traffic manager hierarchy */ -int rte_tm_node_delete(uint8_t port_id, +int rte_tm_node_delete(uint16_t port_id, uint32_t node_id, struct rte_tm_error *error) { @@ -266,7 +266,7 @@ int rte_tm_node_delete(uint8_t port_id, } /* Suspend node */ -int rte_tm_node_suspend(uint8_t port_id, +int rte_tm_node_suspend(uint16_t port_id, uint32_t node_id, struct rte_tm_error *error) { @@ -276,7 +276,7 @@ int rte_tm_node_suspend(uint8_t port_id, } /* Resume node */ -int rte_tm_node_resume(uint8_t port_id, +int rte_tm_node_resume(uint16_t port_id, uint32_t node_id, struct rte_tm_error *error) { @@ -286,7 +286,7 @@ int rte_tm_node_resume(uint8_t port_id, } /* Commit the initial port traffic manager hierarchy */ -int rte_tm_hierarchy_commit(uint8_t port_id, +int rte_tm_hierarchy_commit(uint16_t port_id, int clear_on_fail, struct rte_tm_error *error) { @@ -296,7 +296,7 @@ int rte_tm_hierarchy_commit(uint8_t port_id, } /* Update node parent */ -int rte_tm_node_parent_update(uint8_t port_id, +int rte_tm_node_parent_update(uint16_t port_id, uint32_t node_id, uint32_t parent_node_id, uint32_t priority, @@ -309,7 +309,7 @@ int rte_tm_node_parent_update(uint8_t port_id, } /* Update node private shaper */ -int rte_tm_node_shaper_update(uint8_t port_id, +int rte_tm_node_shaper_update(uint16_t port_id, uint32_t node_id, uint32_t shaper_profile_id, struct rte_tm_error *error) @@ -320,7 +320,7 @@ int rte_tm_node_shaper_update(uint8_t port_id, } /* Update node shared shapers */ -int rte_tm_node_shared_shaper_update(uint8_t port_id, +int rte_tm_node_shared_shaper_update(uint16_t port_id, uint32_t node_id, uint32_t shared_shaper_id, int add, @@ -332,7 +332,7 @@ int rte_tm_node_shared_shaper_update(uint8_t port_id, } /* Update node stats */ -int rte_tm_node_stats_update(uint8_t port_id, +int rte_tm_node_stats_update(uint16_t port_id, uint32_t node_id, uint64_t stats_mask, struct rte_tm_error *error) @@ -343,7 +343,7 @@ int rte_tm_node_stats_update(uint8_t port_id, } /* Update WFQ weight mode */ -int rte_tm_node_wfq_weight_mode_update(uint8_t port_id, +int rte_tm_node_wfq_weight_mode_update(uint16_t port_id, uint32_t node_id, int *wfq_weight_mode, uint32_t n_sp_priorities, @@ -355,7 +355,7 @@ int rte_tm_node_wfq_weight_mode_update(uint8_t port_id, } /* Update node congestion management mode */ -int rte_tm_node_cman_update(uint8_t port_id, +int rte_tm_node_cman_update(uint16_t port_id, uint32_t node_id, enum rte_tm_cman_mode cman, struct rte_tm_error *error) @@ -366,7 +366,7 @@ int rte_tm_node_cman_update(uint8_t port_id, } /* Update node private WRED context */ -int rte_tm_node_wred_context_update(uint8_t port_id, +int rte_tm_node_wred_context_update(uint16_t port_id, uint32_t node_id, uint32_t wred_profile_id, struct rte_tm_error *error) @@ -377,7 +377,7 @@ int rte_tm_node_wred_context_update(uint8_t port_id, } /* Update node shared WRED context */ -int rte_tm_node_shared_wred_context_update(uint8_t port_id, +int rte_tm_node_shared_wred_context_update(uint16_t port_id, uint32_t node_id, uint32_t shared_wred_context_id, int add, @@ -389,7 +389,7 @@ int rte_tm_node_shared_wred_context_update(uint8_t port_id, } /* Read and/or clear stats counters for specific node */ -int rte_tm_node_stats_read(uint8_t port_id, +int rte_tm_node_stats_read(uint16_t port_id, uint32_t node_id, struct rte_tm_node_stats *stats, uint64_t *stats_mask, @@ -402,7 +402,7 @@ int rte_tm_node_stats_read(uint8_t port_id, } /* Packet marking - VLAN DEI */ -int rte_tm_mark_vlan_dei(uint8_t port_id, +int rte_tm_mark_vlan_dei(uint16_t port_id, int mark_green, int mark_yellow, int mark_red, @@ -414,7 +414,7 @@ int rte_tm_mark_vlan_dei(uint8_t port_id, } /* Packet marking - IPv4/IPv6 ECN */ -int rte_tm_mark_ip_ecn(uint8_t port_id, +int rte_tm_mark_ip_ecn(uint16_t port_id, int mark_green, int mark_yellow, int mark_red, @@ -426,7 +426,7 @@ int rte_tm_mark_ip_ecn(uint8_t port_id, } /* Packet marking - IPv4/IPv6 DSCP */ -int rte_tm_mark_ip_dscp(uint8_t port_id, +int rte_tm_mark_ip_dscp(uint16_t port_id, int mark_green, int mark_yellow, int mark_red, diff --git a/lib/librte_ether/rte_tm.h b/lib/librte_ether/rte_tm.h index ebbfa1ee..2b25a871 100644 --- a/lib/librte_ether/rte_tm.h +++ b/lib/librte_ether/rte_tm.h @@ -1040,7 +1040,7 @@ struct rte_tm_error { * 0 on success, non-zero error code otherwise. */ int -rte_tm_get_number_of_leaf_nodes(uint8_t port_id, +rte_tm_get_number_of_leaf_nodes(uint16_t port_id, uint32_t *n_leaf_nodes, struct rte_tm_error *error); @@ -1064,7 +1064,7 @@ rte_tm_get_number_of_leaf_nodes(uint8_t port_id, * 0 on success, non-zero error code otherwise. */ int -rte_tm_node_type_get(uint8_t port_id, +rte_tm_node_type_get(uint16_t port_id, uint32_t node_id, int *is_leaf, struct rte_tm_error *error); @@ -1082,7 +1082,7 @@ rte_tm_node_type_get(uint8_t port_id, * 0 on success, non-zero error code otherwise. */ int -rte_tm_capabilities_get(uint8_t port_id, +rte_tm_capabilities_get(uint16_t port_id, struct rte_tm_capabilities *cap, struct rte_tm_error *error); @@ -1102,7 +1102,7 @@ rte_tm_capabilities_get(uint8_t port_id, * 0 on success, non-zero error code otherwise. */ int -rte_tm_level_capabilities_get(uint8_t port_id, +rte_tm_level_capabilities_get(uint16_t port_id, uint32_t level_id, struct rte_tm_level_capabilities *cap, struct rte_tm_error *error); @@ -1122,7 +1122,7 @@ rte_tm_level_capabilities_get(uint8_t port_id, * 0 on success, non-zero error code otherwise. */ int -rte_tm_node_capabilities_get(uint8_t port_id, +rte_tm_node_capabilities_get(uint16_t port_id, uint32_t node_id, struct rte_tm_node_capabilities *cap, struct rte_tm_error *error); @@ -1147,7 +1147,7 @@ rte_tm_node_capabilities_get(uint8_t port_id, * @see struct rte_tm_capabilities::cman_wred_context_n_max */ int -rte_tm_wred_profile_add(uint8_t port_id, +rte_tm_wred_profile_add(uint16_t port_id, uint32_t wred_profile_id, struct rte_tm_wred_params *profile, struct rte_tm_error *error); @@ -1170,7 +1170,7 @@ rte_tm_wred_profile_add(uint8_t port_id, * @see struct rte_tm_capabilities::cman_wred_context_n_max */ int -rte_tm_wred_profile_delete(uint8_t port_id, +rte_tm_wred_profile_delete(uint16_t port_id, uint32_t wred_profile_id, struct rte_tm_error *error); @@ -1201,7 +1201,7 @@ rte_tm_wred_profile_delete(uint8_t port_id, * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max */ int -rte_tm_shared_wred_context_add_update(uint8_t port_id, +rte_tm_shared_wred_context_add_update(uint16_t port_id, uint32_t shared_wred_context_id, uint32_t wred_profile_id, struct rte_tm_error *error); @@ -1225,7 +1225,7 @@ rte_tm_shared_wred_context_add_update(uint8_t port_id, * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max */ int -rte_tm_shared_wred_context_delete(uint8_t port_id, +rte_tm_shared_wred_context_delete(uint16_t port_id, uint32_t shared_wred_context_id, struct rte_tm_error *error); @@ -1249,7 +1249,7 @@ rte_tm_shared_wred_context_delete(uint8_t port_id, * @see struct rte_tm_capabilities::shaper_n_max */ int -rte_tm_shaper_profile_add(uint8_t port_id, +rte_tm_shaper_profile_add(uint16_t port_id, uint32_t shaper_profile_id, struct rte_tm_shaper_params *profile, struct rte_tm_error *error); @@ -1272,7 +1272,7 @@ rte_tm_shaper_profile_add(uint8_t port_id, * @see struct rte_tm_capabilities::shaper_n_max */ int -rte_tm_shaper_profile_delete(uint8_t port_id, +rte_tm_shaper_profile_delete(uint16_t port_id, uint32_t shaper_profile_id, struct rte_tm_error *error); @@ -1301,7 +1301,7 @@ rte_tm_shaper_profile_delete(uint8_t port_id, * @see struct rte_tm_capabilities::shaper_shared_n_max */ int -rte_tm_shared_shaper_add_update(uint8_t port_id, +rte_tm_shared_shaper_add_update(uint16_t port_id, uint32_t shared_shaper_id, uint32_t shaper_profile_id, struct rte_tm_error *error); @@ -1324,7 +1324,7 @@ rte_tm_shared_shaper_add_update(uint8_t port_id, * @see struct rte_tm_capabilities::shaper_shared_n_max */ int -rte_tm_shared_shaper_delete(uint8_t port_id, +rte_tm_shared_shaper_delete(uint16_t port_id, uint32_t shared_shaper_id, struct rte_tm_error *error); @@ -1392,7 +1392,7 @@ rte_tm_shared_shaper_delete(uint8_t port_id, * @see struct rte_tm_capabilities */ int -rte_tm_node_add(uint8_t port_id, +rte_tm_node_add(uint16_t port_id, uint32_t node_id, uint32_t parent_node_id, uint32_t priority, @@ -1425,7 +1425,7 @@ rte_tm_node_add(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_ADD_DELETE */ int -rte_tm_node_delete(uint8_t port_id, +rte_tm_node_delete(uint16_t port_id, uint32_t node_id, struct rte_tm_error *error); @@ -1449,7 +1449,7 @@ rte_tm_node_delete(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME */ int -rte_tm_node_suspend(uint8_t port_id, +rte_tm_node_suspend(uint16_t port_id, uint32_t node_id, struct rte_tm_error *error); @@ -1472,7 +1472,7 @@ rte_tm_node_suspend(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME */ int -rte_tm_node_resume(uint8_t port_id, +rte_tm_node_resume(uint16_t port_id, uint32_t node_id, struct rte_tm_error *error); @@ -1513,7 +1513,7 @@ rte_tm_node_resume(uint8_t port_id, * @see rte_tm_node_delete() */ int -rte_tm_hierarchy_commit(uint8_t port_id, +rte_tm_hierarchy_commit(uint16_t port_id, int clear_on_fail, struct rte_tm_error *error); @@ -1549,7 +1549,7 @@ rte_tm_hierarchy_commit(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL */ int -rte_tm_node_parent_update(uint8_t port_id, +rte_tm_node_parent_update(uint16_t port_id, uint32_t node_id, uint32_t parent_node_id, uint32_t priority, @@ -1578,7 +1578,7 @@ rte_tm_node_parent_update(uint8_t port_id, * @see struct rte_tm_capabilities::shaper_private_n_max */ int -rte_tm_node_shaper_update(uint8_t port_id, +rte_tm_node_shaper_update(uint16_t port_id, uint32_t node_id, uint32_t shaper_profile_id, struct rte_tm_error *error); @@ -1605,7 +1605,7 @@ rte_tm_node_shaper_update(uint8_t port_id, * @see struct rte_tm_capabilities::shaper_shared_n_max */ int -rte_tm_node_shared_shaper_update(uint8_t port_id, +rte_tm_node_shared_shaper_update(uint16_t port_id, uint32_t node_id, uint32_t shared_shaper_id, int add, @@ -1632,7 +1632,7 @@ rte_tm_node_shared_shaper_update(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_STATS */ int -rte_tm_node_stats_update(uint8_t port_id, +rte_tm_node_stats_update(uint16_t port_id, uint32_t node_id, uint64_t stats_mask, struct rte_tm_error *error); @@ -1660,7 +1660,7 @@ rte_tm_node_stats_update(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES */ int -rte_tm_node_wfq_weight_mode_update(uint8_t port_id, +rte_tm_node_wfq_weight_mode_update(uint16_t port_id, uint32_t node_id, int *wfq_weight_mode, uint32_t n_sp_priorities, @@ -1683,7 +1683,7 @@ rte_tm_node_wfq_weight_mode_update(uint8_t port_id, * @see RTE_TM_UPDATE_NODE_CMAN */ int -rte_tm_node_cman_update(uint8_t port_id, +rte_tm_node_cman_update(uint16_t port_id, uint32_t node_id, enum rte_tm_cman_mode cman, struct rte_tm_error *error); @@ -1707,7 +1707,7 @@ rte_tm_node_cman_update(uint8_t port_id, * @see struct rte_tm_capabilities::cman_wred_context_private_n_max */ int -rte_tm_node_wred_context_update(uint8_t port_id, +rte_tm_node_wred_context_update(uint16_t port_id, uint32_t node_id, uint32_t wred_profile_id, struct rte_tm_error *error); @@ -1732,7 +1732,7 @@ rte_tm_node_wred_context_update(uint8_t port_id, * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max */ int -rte_tm_node_shared_wred_context_update(uint8_t port_id, +rte_tm_node_shared_wred_context_update(uint16_t port_id, uint32_t node_id, uint32_t shared_wred_context_id, int add, @@ -1764,7 +1764,7 @@ rte_tm_node_shared_wred_context_update(uint8_t port_id, * @see enum rte_tm_stats_type */ int -rte_tm_node_stats_read(uint8_t port_id, +rte_tm_node_stats_read(uint16_t port_id, uint32_t node_id, struct rte_tm_node_stats *stats, uint64_t *stats_mask, @@ -1801,7 +1801,7 @@ rte_tm_node_stats_read(uint8_t port_id, * @see struct rte_tm_capabilities::mark_vlan_dei_supported */ int -rte_tm_mark_vlan_dei(uint8_t port_id, +rte_tm_mark_vlan_dei(uint16_t port_id, int mark_green, int mark_yellow, int mark_red, @@ -1851,7 +1851,7 @@ rte_tm_mark_vlan_dei(uint8_t port_id, * @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported */ int -rte_tm_mark_ip_ecn(uint8_t port_id, +rte_tm_mark_ip_ecn(uint16_t port_id, int mark_green, int mark_yellow, int mark_red, @@ -1899,7 +1899,7 @@ rte_tm_mark_ip_ecn(uint8_t port_id, * @see struct rte_tm_capabilities::mark_ip_dscp_supported */ int -rte_tm_mark_ip_dscp(uint8_t port_id, +rte_tm_mark_ip_dscp(uint16_t port_id, int mark_green, int mark_yellow, int mark_red, diff --git a/lib/librte_ether/rte_tm_driver.h b/lib/librte_ether/rte_tm_driver.h index a5b698fe..b2e8ccf8 100644 --- a/lib/librte_ether/rte_tm_driver.h +++ b/lib/librte_ether/rte_tm_driver.h @@ -357,7 +357,7 @@ rte_tm_error_set(struct rte_tm_error *error, * success, NULL otherwise. */ const struct rte_tm_ops * -rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error); +rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error); #ifdef __cplusplus } diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile index 410578a1..5ac22cde 100644 --- a/lib/librte_eventdev/Makefile +++ b/lib/librte_eventdev/Makefile @@ -34,15 +34,17 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_eventdev.a # library version -LIBABIVER := 2 +LIBABIVER := 3 # build flags CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash # library source files SRCS-y += rte_eventdev.c SRCS-y += rte_event_ring.c +SRCS-y += rte_event_eth_rx_adapter.c # export include files SYMLINK-y-include += rte_eventdev.h @@ -50,6 +52,7 @@ SYMLINK-y-include += rte_eventdev_pmd.h SYMLINK-y-include += rte_eventdev_pmd_pci.h SYMLINK-y-include += rte_eventdev_pmd_vdev.h SYMLINK-y-include += rte_event_ring.h +SYMLINK-y-include += rte_event_eth_rx_adapter.h # versioning export map EXPORT_MAP := rte_eventdev_version.map diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c new file mode 100644 index 00000000..90106e6c --- /dev/null +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c @@ -0,0 +1,1240 @@ +#include <rte_cycles.h> +#include <rte_common.h> +#include <rte_dev.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_service_component.h> +#include <rte_thash.h> + +#include "rte_eventdev.h" +#include "rte_eventdev_pmd.h" +#include "rte_event_eth_rx_adapter.h" + +#define BATCH_SIZE 32 +#define BLOCK_CNT_THRESHOLD 10 +#define ETH_EVENT_BUFFER_SIZE (4*BATCH_SIZE) + +#define ETH_RX_ADAPTER_SERVICE_NAME_LEN 32 +#define ETH_RX_ADAPTER_MEM_NAME_LEN 32 + +#define RSS_KEY_SIZE 40 + +/* + * There is an instance of this struct per polled Rx queue added to the + * adapter + */ +struct eth_rx_poll_entry { + /* Eth port to poll */ + uint8_t eth_dev_id; + /* Eth rx queue to poll */ + uint16_t eth_rx_qid; +}; + +/* Instance per adapter */ +struct rte_eth_event_enqueue_buffer { + /* Count of events in this buffer */ + uint16_t count; + /* Array of events in this buffer */ + struct rte_event events[ETH_EVENT_BUFFER_SIZE]; +}; + +struct rte_event_eth_rx_adapter { + /* RSS key */ + uint8_t rss_key_be[RSS_KEY_SIZE]; + /* Event device identifier */ + uint8_t eventdev_id; + /* Per ethernet device structure */ + struct eth_device_info *eth_devices; + /* Event port identifier */ + uint8_t event_port_id; + /* Lock to serialize config updates with service function */ + rte_spinlock_t rx_lock; + /* Max mbufs processed in any service function invocation */ + uint32_t max_nb_rx; + /* Receive queues that need to be polled */ + struct eth_rx_poll_entry *eth_rx_poll; + /* Size of the eth_rx_poll array */ + uint16_t num_rx_polled; + /* Weighted round robin schedule */ + uint32_t *wrr_sched; + /* wrr_sched[] size */ + uint32_t wrr_len; + /* Next entry in wrr[] to begin polling */ + uint32_t wrr_pos; + /* Event burst buffer */ + struct rte_eth_event_enqueue_buffer event_enqueue_buffer; + /* Per adapter stats */ + struct rte_event_eth_rx_adapter_stats stats; + /* Block count, counts up to BLOCK_CNT_THRESHOLD */ + uint16_t enq_block_count; + /* Block start ts */ + uint64_t rx_enq_block_start_ts; + /* Configuration callback for rte_service configuration */ + rte_event_eth_rx_adapter_conf_cb conf_cb; + /* Configuration callback argument */ + void *conf_arg; + /* Set if default_cb is being used */ + int default_cb_arg; + /* Service initialization state */ + uint8_t service_inited; + /* Total count of Rx queues in adapter */ + uint32_t nb_queues; + /* Memory allocation name */ + char mem_name[ETH_RX_ADAPTER_MEM_NAME_LEN]; + /* Socket identifier cached from eventdev */ + int socket_id; + /* Per adapter EAL service */ + uint32_t service_id; +} __rte_cache_aligned; + +/* Per eth device */ +struct eth_device_info { + struct rte_eth_dev *dev; + struct eth_rx_queue_info *rx_queue; + /* Set if ethdev->eventdev packet transfer uses a + * hardware mechanism + */ + uint8_t internal_event_port; + /* Set if the adapter is processing rx queues for + * this eth device and packet processing has been + * started, allows for the code to know if the PMD + * rx_adapter_stop callback needs to be invoked + */ + uint8_t dev_rx_started; + /* If nb_dev_queues > 0, the start callback will + * be invoked if not already invoked + */ + uint16_t nb_dev_queues; +}; + +/* Per Rx queue */ +struct eth_rx_queue_info { + int queue_enabled; /* True if added */ + uint16_t wt; /* Polling weight */ + uint8_t event_queue_id; /* Event queue to enqueue packets to */ + uint8_t sched_type; /* Sched type for events */ + uint8_t priority; /* Event priority */ + uint32_t flow_id; /* App provided flow identifier */ + uint32_t flow_id_mask; /* Set to ~0 if app provides flow id else 0 */ +}; + +static struct rte_event_eth_rx_adapter **event_eth_rx_adapter; + +static inline int +valid_id(uint8_t id) +{ + return id < RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE; +} + +#define RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \ + if (!valid_id(id)) { \ + RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d\n", id); \ + return retval; \ + } \ +} while (0) + +static inline int +sw_rx_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter) +{ + return rx_adapter->num_rx_polled; +} + +/* Greatest common divisor */ +static uint16_t gcd_u16(uint16_t a, uint16_t b) +{ + uint16_t r = a % b; + + return r ? gcd_u16(b, r) : b; +} + +/* Returns the next queue in the polling sequence + * + * http://kb.linuxvirtualserver.org/wiki/Weighted_Round-Robin_Scheduling + */ +static int +wrr_next(struct rte_event_eth_rx_adapter *rx_adapter, + unsigned int n, int *cw, + struct eth_rx_poll_entry *eth_rx_poll, uint16_t max_wt, + uint16_t gcd, int prev) +{ + int i = prev; + uint16_t w; + + while (1) { + uint16_t q; + uint8_t d; + + i = (i + 1) % n; + if (i == 0) { + *cw = *cw - gcd; + if (*cw <= 0) + *cw = max_wt; + } + + q = eth_rx_poll[i].eth_rx_qid; + d = eth_rx_poll[i].eth_dev_id; + w = rx_adapter->eth_devices[d].rx_queue[q].wt; + + if ((int)w >= *cw) + return i; + } +} + +/* Precalculate WRR polling sequence for all queues in rx_adapter */ +static int +eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter) +{ + uint8_t d; + uint16_t q; + unsigned int i; + + /* Initialize variables for calculation of wrr schedule */ + uint16_t max_wrr_pos = 0; + unsigned int poll_q = 0; + uint16_t max_wt = 0; + uint16_t gcd = 0; + + struct eth_rx_poll_entry *rx_poll = NULL; + uint32_t *rx_wrr = NULL; + + if (rx_adapter->num_rx_polled) { + size_t len = RTE_ALIGN(rx_adapter->num_rx_polled * + sizeof(*rx_adapter->eth_rx_poll), + RTE_CACHE_LINE_SIZE); + rx_poll = rte_zmalloc_socket(rx_adapter->mem_name, + len, + RTE_CACHE_LINE_SIZE, + rx_adapter->socket_id); + if (rx_poll == NULL) + return -ENOMEM; + + /* Generate array of all queues to poll, the size of this + * array is poll_q + */ + for (d = 0; d < rte_eth_dev_count(); d++) { + uint16_t nb_rx_queues; + struct eth_device_info *dev_info = + &rx_adapter->eth_devices[d]; + nb_rx_queues = dev_info->dev->data->nb_rx_queues; + if (dev_info->rx_queue == NULL) + continue; + for (q = 0; q < nb_rx_queues; q++) { + struct eth_rx_queue_info *queue_info = + &dev_info->rx_queue[q]; + if (queue_info->queue_enabled == 0) + continue; + + uint16_t wt = queue_info->wt; + rx_poll[poll_q].eth_dev_id = d; + rx_poll[poll_q].eth_rx_qid = q; + max_wrr_pos += wt; + max_wt = RTE_MAX(max_wt, wt); + gcd = (gcd) ? gcd_u16(gcd, wt) : wt; + poll_q++; + } + } + + len = RTE_ALIGN(max_wrr_pos * sizeof(*rx_wrr), + RTE_CACHE_LINE_SIZE); + rx_wrr = rte_zmalloc_socket(rx_adapter->mem_name, + len, + RTE_CACHE_LINE_SIZE, + rx_adapter->socket_id); + if (rx_wrr == NULL) { + rte_free(rx_poll); + return -ENOMEM; + } + + /* Generate polling sequence based on weights */ + int prev = -1; + int cw = -1; + for (i = 0; i < max_wrr_pos; i++) { + rx_wrr[i] = wrr_next(rx_adapter, poll_q, &cw, + rx_poll, max_wt, gcd, prev); + prev = rx_wrr[i]; + } + } + + rte_free(rx_adapter->eth_rx_poll); + rte_free(rx_adapter->wrr_sched); + + rx_adapter->eth_rx_poll = rx_poll; + rx_adapter->wrr_sched = rx_wrr; + rx_adapter->wrr_len = max_wrr_pos; + + return 0; +} + +static inline void +mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr, + struct ipv6_hdr **ipv6_hdr) +{ + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct vlan_hdr *vlan_hdr; + + *ipv4_hdr = NULL; + *ipv6_hdr = NULL; + + switch (eth_hdr->ether_type) { + case RTE_BE16(ETHER_TYPE_IPv4): + *ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + break; + + case RTE_BE16(ETHER_TYPE_IPv6): + *ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + break; + + case RTE_BE16(ETHER_TYPE_VLAN): + vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + switch (vlan_hdr->eth_proto) { + case RTE_BE16(ETHER_TYPE_IPv4): + *ipv4_hdr = (struct ipv4_hdr *)(vlan_hdr + 1); + break; + case RTE_BE16(ETHER_TYPE_IPv6): + *ipv6_hdr = (struct ipv6_hdr *)(vlan_hdr + 1); + break; + default: + break; + } + break; + + default: + break; + } +} + +/* Calculate RSS hash for IPv4/6 */ +static inline uint32_t +do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be) +{ + uint32_t input_len; + void *tuple; + struct rte_ipv4_tuple ipv4_tuple; + struct rte_ipv6_tuple ipv6_tuple; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + + mtoip(m, &ipv4_hdr, &ipv6_hdr); + + if (ipv4_hdr) { + ipv4_tuple.src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr); + ipv4_tuple.dst_addr = rte_be_to_cpu_32(ipv4_hdr->dst_addr); + tuple = &ipv4_tuple; + input_len = RTE_THASH_V4_L3_LEN; + } else if (ipv6_hdr) { + rte_thash_load_v6_addrs(ipv6_hdr, + (union rte_thash_tuple *)&ipv6_tuple); + tuple = &ipv6_tuple; + input_len = RTE_THASH_V6_L3_LEN; + } else + return 0; + + return rte_softrss_be(tuple, input_len, rss_key_be); +} + +static inline int +rx_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter) +{ + return !!rx_adapter->enq_block_count; +} + +static inline void +rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter) +{ + if (rx_adapter->rx_enq_block_start_ts) + return; + + rx_adapter->enq_block_count++; + if (rx_adapter->enq_block_count < BLOCK_CNT_THRESHOLD) + return; + + rx_adapter->rx_enq_block_start_ts = rte_get_tsc_cycles(); +} + +static inline void +rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter, + struct rte_event_eth_rx_adapter_stats *stats) +{ + if (unlikely(!stats->rx_enq_start_ts)) + stats->rx_enq_start_ts = rte_get_tsc_cycles(); + + if (likely(!rx_enq_blocked(rx_adapter))) + return; + + rx_adapter->enq_block_count = 0; + if (rx_adapter->rx_enq_block_start_ts) { + stats->rx_enq_end_ts = rte_get_tsc_cycles(); + stats->rx_enq_block_cycles += stats->rx_enq_end_ts - + rx_adapter->rx_enq_block_start_ts; + rx_adapter->rx_enq_block_start_ts = 0; + } +} + +/* Add event to buffer, free space check is done prior to calling + * this function + */ +static inline void +buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter, + struct rte_event *ev) +{ + struct rte_eth_event_enqueue_buffer *buf = + &rx_adapter->event_enqueue_buffer; + rte_memcpy(&buf->events[buf->count++], ev, sizeof(struct rte_event)); +} + +/* Enqueue buffered events to event device */ +static inline uint16_t +flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter) +{ + struct rte_eth_event_enqueue_buffer *buf = + &rx_adapter->event_enqueue_buffer; + struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats; + + uint16_t n = rte_event_enqueue_new_burst(rx_adapter->eventdev_id, + rx_adapter->event_port_id, + buf->events, + buf->count); + if (n != buf->count) { + memmove(buf->events, + &buf->events[n], + (buf->count - n) * sizeof(struct rte_event)); + stats->rx_enq_retry++; + } + + n ? rx_enq_block_end_ts(rx_adapter, stats) : + rx_enq_block_start_ts(rx_adapter); + + buf->count -= n; + stats->rx_enq_count += n; + + return n; +} + +static inline void +fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, + uint8_t dev_id, + uint16_t rx_queue_id, + struct rte_mbuf **mbufs, + uint16_t num) +{ + uint32_t i; + struct eth_device_info *eth_device_info = + &rx_adapter->eth_devices[dev_id]; + struct eth_rx_queue_info *eth_rx_queue_info = + ð_device_info->rx_queue[rx_queue_id]; + + int32_t qid = eth_rx_queue_info->event_queue_id; + uint8_t sched_type = eth_rx_queue_info->sched_type; + uint8_t priority = eth_rx_queue_info->priority; + uint32_t flow_id; + struct rte_event events[BATCH_SIZE]; + struct rte_mbuf *m = mbufs[0]; + uint32_t rss_mask; + uint32_t rss; + int do_rss; + + /* 0xffff ffff if PKT_RX_RSS_HASH is set, otherwise 0 */ + rss_mask = ~(((m->ol_flags & PKT_RX_RSS_HASH) != 0) - 1); + do_rss = !rss_mask && !eth_rx_queue_info->flow_id_mask; + + for (i = 0; i < num; i++) { + m = mbufs[i]; + struct rte_event *ev = &events[i]; + + rss = do_rss ? + do_softrss(m, rx_adapter->rss_key_be) : m->hash.rss; + flow_id = + eth_rx_queue_info->flow_id & + eth_rx_queue_info->flow_id_mask; + flow_id |= rss & ~eth_rx_queue_info->flow_id_mask; + + ev->flow_id = flow_id; + ev->op = RTE_EVENT_OP_NEW; + ev->sched_type = sched_type; + ev->queue_id = qid; + ev->event_type = RTE_EVENT_TYPE_ETH_RX_ADAPTER; + ev->sub_event_type = 0; + ev->priority = priority; + ev->mbuf = m; + + buf_event_enqueue(rx_adapter, ev); + } +} + +/* + * Polls receive queues added to the event adapter and enqueues received + * packets to the event device. + * + * The receive code enqueues initially to a temporary buffer, the + * temporary buffer is drained anytime it holds >= BATCH_SIZE packets + * + * If there isn't space available in the temporary buffer, packets from the + * Rx queue aren't dequeued from the eth device, this back pressures the + * eth device, in virtual device environments this back pressure is relayed to + * the hypervisor's switching layer where adjustments can be made to deal with + * it. + */ +static inline uint32_t +eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) +{ + uint32_t num_queue; + uint16_t n; + uint32_t nb_rx = 0; + struct rte_mbuf *mbufs[BATCH_SIZE]; + struct rte_eth_event_enqueue_buffer *buf; + uint32_t wrr_pos; + uint32_t max_nb_rx; + + wrr_pos = rx_adapter->wrr_pos; + max_nb_rx = rx_adapter->max_nb_rx; + buf = &rx_adapter->event_enqueue_buffer; + struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats; + + /* Iterate through a WRR sequence */ + for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) { + unsigned int poll_idx = rx_adapter->wrr_sched[wrr_pos]; + uint16_t qid = rx_adapter->eth_rx_poll[poll_idx].eth_rx_qid; + uint8_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id; + + /* Don't do a batch dequeue from the rx queue if there isn't + * enough space in the enqueue buffer. + */ + if (buf->count >= BATCH_SIZE) + flush_event_buffer(rx_adapter); + if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) + break; + + stats->rx_poll_count++; + n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE); + + if (n) { + stats->rx_packets += n; + /* The check before rte_eth_rx_burst() ensures that + * all n mbufs can be buffered + */ + fill_event_buffer(rx_adapter, d, qid, mbufs, n); + nb_rx += n; + if (nb_rx > max_nb_rx) { + rx_adapter->wrr_pos = + (wrr_pos + 1) % rx_adapter->wrr_len; + return nb_rx; + } + } + + if (++wrr_pos == rx_adapter->wrr_len) + wrr_pos = 0; + } + + return nb_rx; +} + +static int +event_eth_rx_adapter_service_func(void *args) +{ + struct rte_event_eth_rx_adapter *rx_adapter = args; + struct rte_eth_event_enqueue_buffer *buf; + + buf = &rx_adapter->event_enqueue_buffer; + if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0) + return 0; + if (eth_rx_poll(rx_adapter) == 0 && buf->count) + flush_event_buffer(rx_adapter); + rte_spinlock_unlock(&rx_adapter->rx_lock); + return 0; +} + +static int +rte_event_eth_rx_adapter_init(void) +{ + const char *name = "rte_event_eth_rx_adapter_array"; + const struct rte_memzone *mz; + unsigned int sz; + + sz = sizeof(*event_eth_rx_adapter) * + RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE; + sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + + mz = rte_memzone_lookup(name); + if (mz == NULL) { + mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0, + RTE_CACHE_LINE_SIZE); + if (mz == NULL) { + RTE_EDEV_LOG_ERR("failed to reserve memzone err = %" + PRId32, rte_errno); + return -rte_errno; + } + } + + event_eth_rx_adapter = mz->addr; + return 0; +} + +static inline struct rte_event_eth_rx_adapter * +id_to_rx_adapter(uint8_t id) +{ + return event_eth_rx_adapter ? + event_eth_rx_adapter[id] : NULL; +} + +static int +default_conf_cb(uint8_t id, uint8_t dev_id, + struct rte_event_eth_rx_adapter_conf *conf, void *arg) +{ + int ret; + struct rte_eventdev *dev; + struct rte_event_dev_config dev_conf; + int started; + uint8_t port_id; + struct rte_event_port_conf *port_conf = arg; + struct rte_event_eth_rx_adapter *rx_adapter = id_to_rx_adapter(id); + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + dev_conf = dev->data->dev_conf; + + started = dev->data->dev_started; + if (started) + rte_event_dev_stop(dev_id); + port_id = dev_conf.nb_event_ports; + dev_conf.nb_event_ports += 1; + ret = rte_event_dev_configure(dev_id, &dev_conf); + if (ret) { + RTE_EDEV_LOG_ERR("failed to configure event dev %u\n", + dev_id); + if (started) + rte_event_dev_start(dev_id); + return ret; + } + + ret = rte_event_port_setup(dev_id, port_id, port_conf); + if (ret) { + RTE_EDEV_LOG_ERR("failed to setup event port %u\n", + port_id); + return ret; + } + + conf->event_port_id = port_id; + conf->max_nb_rx = 128; + if (started) + rte_event_dev_start(dev_id); + rx_adapter->default_cb_arg = 1; + return ret; +} + +static int +init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id) +{ + int ret; + struct rte_service_spec service; + struct rte_event_eth_rx_adapter_conf rx_adapter_conf; + + if (rx_adapter->service_inited) + return 0; + + memset(&service, 0, sizeof(service)); + snprintf(service.name, ETH_RX_ADAPTER_SERVICE_NAME_LEN, + "rte_event_eth_rx_adapter_%d", id); + service.socket_id = rx_adapter->socket_id; + service.callback = event_eth_rx_adapter_service_func; + service.callback_userdata = rx_adapter; + /* Service function handles locking for queue add/del updates */ + service.capabilities = RTE_SERVICE_CAP_MT_SAFE; + ret = rte_service_component_register(&service, &rx_adapter->service_id); + if (ret) { + RTE_EDEV_LOG_ERR("failed to register service %s err = %" PRId32, + service.name, ret); + return ret; + } + + ret = rx_adapter->conf_cb(id, rx_adapter->eventdev_id, + &rx_adapter_conf, rx_adapter->conf_arg); + if (ret) { + RTE_EDEV_LOG_ERR("configuration callback failed err = %" PRId32, + ret); + goto err_done; + } + rx_adapter->event_port_id = rx_adapter_conf.event_port_id; + rx_adapter->max_nb_rx = rx_adapter_conf.max_nb_rx; + rx_adapter->service_inited = 1; + return 0; + +err_done: + rte_service_component_unregister(rx_adapter->service_id); + return ret; +} + + +static void +update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int32_t rx_queue_id, + uint8_t add) +{ + struct eth_rx_queue_info *queue_info; + int enabled; + uint16_t i; + + if (dev_info->rx_queue == NULL) + return; + + if (rx_queue_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) + update_queue_info(rx_adapter, dev_info, i, add); + } else { + queue_info = &dev_info->rx_queue[rx_queue_id]; + enabled = queue_info->queue_enabled; + if (add) { + rx_adapter->nb_queues += !enabled; + dev_info->nb_dev_queues += !enabled; + } else { + rx_adapter->nb_queues -= enabled; + dev_info->nb_dev_queues -= enabled; + } + queue_info->queue_enabled = !!add; + } +} + +static int +event_eth_rx_adapter_queue_del(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + uint16_t rx_queue_id) +{ + struct eth_rx_queue_info *queue_info; + + if (rx_adapter->nb_queues == 0) + return 0; + + queue_info = &dev_info->rx_queue[rx_queue_id]; + rx_adapter->num_rx_polled -= queue_info->queue_enabled; + update_queue_info(rx_adapter, dev_info, rx_queue_id, 0); + return 0; +} + +static void +event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + uint16_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *conf) + +{ + struct eth_rx_queue_info *queue_info; + const struct rte_event *ev = &conf->ev; + + queue_info = &dev_info->rx_queue[rx_queue_id]; + queue_info->event_queue_id = ev->queue_id; + queue_info->sched_type = ev->sched_type; + queue_info->priority = ev->priority; + queue_info->wt = conf->servicing_weight; + + if (conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) { + queue_info->flow_id = ev->flow_id; + queue_info->flow_id_mask = ~0; + } + + /* The same queue can be added more than once */ + rx_adapter->num_rx_polled += !queue_info->queue_enabled; + update_queue_info(rx_adapter, dev_info, rx_queue_id, 1); +} + +static int add_rx_queue(struct rte_event_eth_rx_adapter *rx_adapter, + uint8_t eth_dev_id, + int rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct eth_device_info *dev_info = &rx_adapter->eth_devices[eth_dev_id]; + struct rte_event_eth_rx_adapter_queue_conf temp_conf; + uint32_t i; + int ret; + + if (queue_conf->servicing_weight == 0) { + + struct rte_eth_dev_data *data = dev_info->dev->data; + if (data->dev_conf.intr_conf.rxq) { + RTE_EDEV_LOG_ERR("Interrupt driven queues" + " not supported"); + return -ENOTSUP; + } + temp_conf = *queue_conf; + + /* If Rx interrupts are disabled set wt = 1 */ + temp_conf.servicing_weight = 1; + queue_conf = &temp_conf; + } + + if (dev_info->rx_queue == NULL) { + dev_info->rx_queue = + rte_zmalloc_socket(rx_adapter->mem_name, + dev_info->dev->data->nb_rx_queues * + sizeof(struct eth_rx_queue_info), 0, + rx_adapter->socket_id); + if (dev_info->rx_queue == NULL) + return -ENOMEM; + } + + if (rx_queue_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) + event_eth_rx_adapter_queue_add(rx_adapter, + dev_info, i, + queue_conf); + } else { + event_eth_rx_adapter_queue_add(rx_adapter, dev_info, + (uint16_t)rx_queue_id, + queue_conf); + } + + ret = eth_poll_wrr_calc(rx_adapter); + if (ret) { + event_eth_rx_adapter_queue_del(rx_adapter, + dev_info, rx_queue_id); + return ret; + } + + return ret; +} + +static int +rx_adapter_ctrl(uint8_t id, int start) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + struct rte_eventdev *dev; + struct eth_device_info *dev_info; + uint32_t i; + int use_service = 0; + int stop = !start; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + + for (i = 0; i < rte_eth_dev_count(); i++) { + dev_info = &rx_adapter->eth_devices[i]; + /* if start check for num dev queues */ + if (start && !dev_info->nb_dev_queues) + continue; + /* if stop check if dev has been started */ + if (stop && !dev_info->dev_rx_started) + continue; + use_service |= !dev_info->internal_event_port; + dev_info->dev_rx_started = start; + if (dev_info->internal_event_port == 0) + continue; + start ? (*dev->dev_ops->eth_rx_adapter_start)(dev, + &rte_eth_devices[i]) : + (*dev->dev_ops->eth_rx_adapter_stop)(dev, + &rte_eth_devices[i]); + } + + if (use_service) + rte_service_runstate_set(rx_adapter->service_id, start); + + return 0; +} + +int +rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id, + rte_event_eth_rx_adapter_conf_cb conf_cb, + void *conf_arg) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + int ret; + int socket_id; + uint8_t i; + char mem_name[ETH_RX_ADAPTER_SERVICE_NAME_LEN]; + const uint8_t default_rss_key[] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, + }; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + if (conf_cb == NULL) + return -EINVAL; + + if (event_eth_rx_adapter == NULL) { + ret = rte_event_eth_rx_adapter_init(); + if (ret) + return ret; + } + + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter != NULL) { + RTE_EDEV_LOG_ERR("Eth Rx adapter exists id = %" PRIu8, id); + return -EEXIST; + } + + socket_id = rte_event_dev_socket_id(dev_id); + snprintf(mem_name, ETH_RX_ADAPTER_MEM_NAME_LEN, + "rte_event_eth_rx_adapter_%d", + id); + + rx_adapter = rte_zmalloc_socket(mem_name, sizeof(*rx_adapter), + RTE_CACHE_LINE_SIZE, socket_id); + if (rx_adapter == NULL) { + RTE_EDEV_LOG_ERR("failed to get mem for rx adapter"); + return -ENOMEM; + } + + rx_adapter->eventdev_id = dev_id; + rx_adapter->socket_id = socket_id; + rx_adapter->conf_cb = conf_cb; + rx_adapter->conf_arg = conf_arg; + strcpy(rx_adapter->mem_name, mem_name); + rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name, + rte_eth_dev_count() * + sizeof(struct eth_device_info), 0, + socket_id); + rte_convert_rss_key((const uint32_t *)default_rss_key, + (uint32_t *)rx_adapter->rss_key_be, + RTE_DIM(default_rss_key)); + + if (rx_adapter->eth_devices == NULL) { + RTE_EDEV_LOG_ERR("failed to get mem for eth devices\n"); + rte_free(rx_adapter); + return -ENOMEM; + } + rte_spinlock_init(&rx_adapter->rx_lock); + for (i = 0; i < rte_eth_dev_count(); i++) + rx_adapter->eth_devices[i].dev = &rte_eth_devices[i]; + + event_eth_rx_adapter[id] = rx_adapter; + if (conf_cb == default_conf_cb) + rx_adapter->default_cb_arg = 1; + return 0; +} + +int +rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id, + struct rte_event_port_conf *port_config) +{ + struct rte_event_port_conf *pc; + int ret; + + if (port_config == NULL) + return -EINVAL; + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + pc = rte_malloc(NULL, sizeof(*pc), 0); + if (pc == NULL) + return -ENOMEM; + *pc = *port_config; + ret = rte_event_eth_rx_adapter_create_ext(id, dev_id, + default_conf_cb, + pc); + if (ret) + rte_free(pc); + return ret; +} + +int +rte_event_eth_rx_adapter_free(uint8_t id) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter == NULL) + return -EINVAL; + + if (rx_adapter->nb_queues) { + RTE_EDEV_LOG_ERR("%" PRIu16 " Rx queues not deleted", + rx_adapter->nb_queues); + return -EBUSY; + } + + if (rx_adapter->default_cb_arg) + rte_free(rx_adapter->conf_arg); + rte_free(rx_adapter->eth_devices); + rte_free(rx_adapter); + event_eth_rx_adapter[id] = NULL; + + return 0; +} + +int +rte_event_eth_rx_adapter_queue_add(uint8_t id, + uint8_t eth_dev_id, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + int ret; + uint32_t cap; + struct rte_event_eth_rx_adapter *rx_adapter; + struct rte_eventdev *dev; + struct eth_device_info *dev_info; + int start_service; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL); + + rx_adapter = id_to_rx_adapter(id); + if ((rx_adapter == NULL) || (queue_conf == NULL)) + return -EINVAL; + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id, + eth_dev_id, + &cap); + if (ret) { + RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8 + "eth port %" PRIu8, id, eth_dev_id); + return ret; + } + + if ((cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) == 0 + && (queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)) { + RTE_EDEV_LOG_ERR("Flow ID override is not supported," + " eth port: %" PRIu8 " adapter id: %" PRIu8, + eth_dev_id, id); + return -EINVAL; + } + + if ((cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) == 0 && + (rx_queue_id != -1)) { + RTE_EDEV_LOG_ERR("Rx queues can only be connected to single " + "event queue id %u eth port %u", id, eth_dev_id); + return -EINVAL; + } + + if (rx_queue_id != -1 && (uint16_t)rx_queue_id >= + rte_eth_devices[eth_dev_id].data->nb_rx_queues) { + RTE_EDEV_LOG_ERR("Invalid rx queue_id %" PRIu16, + (uint16_t)rx_queue_id); + return -EINVAL; + } + + start_service = 0; + dev_info = &rx_adapter->eth_devices[eth_dev_id]; + + if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->eth_rx_adapter_queue_add, + -ENOTSUP); + if (dev_info->rx_queue == NULL) { + dev_info->rx_queue = + rte_zmalloc_socket(rx_adapter->mem_name, + dev_info->dev->data->nb_rx_queues * + sizeof(struct eth_rx_queue_info), 0, + rx_adapter->socket_id); + if (dev_info->rx_queue == NULL) + return -ENOMEM; + } + + ret = (*dev->dev_ops->eth_rx_adapter_queue_add)(dev, + &rte_eth_devices[eth_dev_id], + rx_queue_id, queue_conf); + if (ret == 0) { + update_queue_info(rx_adapter, + &rx_adapter->eth_devices[eth_dev_id], + rx_queue_id, + 1); + } + } else { + rte_spinlock_lock(&rx_adapter->rx_lock); + ret = init_service(rx_adapter, id); + if (ret == 0) + ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id, + queue_conf); + rte_spinlock_unlock(&rx_adapter->rx_lock); + if (ret == 0) + start_service = !!sw_rx_adapter_queue_count(rx_adapter); + } + + if (ret) + return ret; + + if (start_service) + rte_service_component_runstate_set(rx_adapter->service_id, 1); + + return 0; +} + +int +rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, + int32_t rx_queue_id) +{ + int ret = 0; + struct rte_eventdev *dev; + struct rte_event_eth_rx_adapter *rx_adapter; + struct eth_device_info *dev_info; + uint32_t cap; + uint16_t i; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL); + + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id, + eth_dev_id, + &cap); + if (ret) + return ret; + + if (rx_queue_id != -1 && (uint16_t)rx_queue_id >= + rte_eth_devices[eth_dev_id].data->nb_rx_queues) { + RTE_EDEV_LOG_ERR("Invalid rx queue_id %" PRIu16, + (uint16_t)rx_queue_id); + return -EINVAL; + } + + dev_info = &rx_adapter->eth_devices[eth_dev_id]; + + if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->eth_rx_adapter_queue_del, + -ENOTSUP); + ret = (*dev->dev_ops->eth_rx_adapter_queue_del)(dev, + &rte_eth_devices[eth_dev_id], + rx_queue_id); + if (ret == 0) { + update_queue_info(rx_adapter, + &rx_adapter->eth_devices[eth_dev_id], + rx_queue_id, + 0); + if (dev_info->nb_dev_queues == 0) { + rte_free(dev_info->rx_queue); + dev_info->rx_queue = NULL; + } + } + } else { + int rc; + rte_spinlock_lock(&rx_adapter->rx_lock); + if (rx_queue_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) + event_eth_rx_adapter_queue_del(rx_adapter, + dev_info, + i); + } else { + event_eth_rx_adapter_queue_del(rx_adapter, + dev_info, + (uint16_t)rx_queue_id); + } + + rc = eth_poll_wrr_calc(rx_adapter); + if (rc) + RTE_EDEV_LOG_ERR("WRR recalculation failed %" PRId32, + rc); + + if (dev_info->nb_dev_queues == 0) { + rte_free(dev_info->rx_queue); + dev_info->rx_queue = NULL; + } + + rte_spinlock_unlock(&rx_adapter->rx_lock); + rte_service_component_runstate_set(rx_adapter->service_id, + sw_rx_adapter_queue_count(rx_adapter)); + } + + return ret; +} + + +int +rte_event_eth_rx_adapter_start(uint8_t id) +{ + return rx_adapter_ctrl(id, 1); +} + +int +rte_event_eth_rx_adapter_stop(uint8_t id) +{ + return rx_adapter_ctrl(id, 0); +} + +int +rte_event_eth_rx_adapter_stats_get(uint8_t id, + struct rte_event_eth_rx_adapter_stats *stats) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + struct rte_event_eth_rx_adapter_stats dev_stats_sum = { 0 }; + struct rte_event_eth_rx_adapter_stats dev_stats; + struct rte_eventdev *dev; + struct eth_device_info *dev_info; + uint32_t i; + int ret; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter == NULL || stats == NULL) + return -EINVAL; + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + memset(stats, 0, sizeof(*stats)); + for (i = 0; i < rte_eth_dev_count(); i++) { + dev_info = &rx_adapter->eth_devices[i]; + if (dev_info->internal_event_port == 0 || + dev->dev_ops->eth_rx_adapter_stats_get == NULL) + continue; + ret = (*dev->dev_ops->eth_rx_adapter_stats_get)(dev, + &rte_eth_devices[i], + &dev_stats); + if (ret) + continue; + dev_stats_sum.rx_packets += dev_stats.rx_packets; + dev_stats_sum.rx_enq_count += dev_stats.rx_enq_count; + } + + if (rx_adapter->service_inited) + *stats = rx_adapter->stats; + + stats->rx_packets += dev_stats_sum.rx_packets; + stats->rx_enq_count += dev_stats_sum.rx_enq_count; + return 0; +} + +int +rte_event_eth_rx_adapter_stats_reset(uint8_t id) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + struct rte_eventdev *dev; + struct eth_device_info *dev_info; + uint32_t i; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + for (i = 0; i < rte_eth_dev_count(); i++) { + dev_info = &rx_adapter->eth_devices[i]; + if (dev_info->internal_event_port == 0 || + dev->dev_ops->eth_rx_adapter_stats_reset == NULL) + continue; + (*dev->dev_ops->eth_rx_adapter_stats_reset)(dev, + &rte_eth_devices[i]); + } + + memset(&rx_adapter->stats, 0, sizeof(rx_adapter->stats)); + return 0; +} + +int +rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + rx_adapter = id_to_rx_adapter(id); + if (rx_adapter == NULL || service_id == NULL) + return -EINVAL; + + if (rx_adapter->service_inited) + *service_id = rx_adapter->service_id; + + return rx_adapter->service_inited ? 0 : -ESRCH; +} diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h new file mode 100644 index 00000000..6a9e7edf --- /dev/null +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h @@ -0,0 +1,444 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EVENT_ETH_RX_ADAPTER_ +#define _RTE_EVENT_ETH_RX_ADAPTER_ + +/** + * @file + * + * RTE Event Ethernet Rx Adapter + * + * An eventdev-based packet processing application enqueues/dequeues mbufs + * to/from the event device. Packet flow from the ethernet device to the event + * device can be accomplished using either HW or SW mechanisms depending on the + * platform and the particular combination of ethernet and event devices. The + * event ethernet Rx adapter provides common APIs to configure the packet flow + * from the ethernet devices to event devices across both these transfer + * mechanisms. + * + * The adapter uses a EAL service core function for SW based packet transfer + * and uses the eventdev PMD functions to configure HW based packet transfer + * between the ethernet device and the event device. + * + * The ethernet Rx event adapter's functions are: + * - rte_event_eth_rx_adapter_create_ext() + * - rte_event_eth_rx_adapter_create() + * - rte_event_eth_rx_adapter_free() + * - rte_event_eth_rx_adapter_queue_add() + * - rte_event_eth_rx_adapter_queue_del() + * - rte_event_eth_rx_adapter_start() + * - rte_event_eth_rx_adapter_stop() + * - rte_event_eth_rx_adapter_stats_get() + * - rte_event_eth_rx_adapter_stats_reset() + * + * The application creates an ethernet to event adapter using + * rte_event_eth_rx_adapter_create_ext() or rte_event_eth_rx_adapter_create() + * functions. + * The adapter needs to know which ethernet rx queues to poll for mbufs as well + * as event device parameters such as the event queue identifier, event + * priority and scheduling type that the adapter should use when constructing + * events. The rte_event_eth_rx_adapter_queue_add() function is provided for + * this purpose. + * The servicing weight parameter in the rte_event_eth_rx_adapter_queue_conf + * is applicable when the Rx adapter uses a service core function and is + * intended to provide application control of the frequency of polling ethernet + * device receive queues, for example, the application may want to poll higher + * priority queues with a higher frequency but at the same time not starve + * lower priority queues completely. If this parameter is zero and the receive + * interrupt is enabled when configuring the device, the receive queue is + * interrupt driven; else, the queue is assigned a servicing weight of one. + * + * The application can start/stop the adapter using the + * rte_event_eth_rx_adapter_start() and the rte_event_eth_rx_adapter_stop() + * functions. If the adapter uses a rte_service function, then the application + * is also required to assign a core to the service function and control the + * service core using the rte_service APIs. The + * rte_event_eth_rx_adapter_service_id_get() function can be used to retrieve + * the service function ID of the adapter in this case. + * + * Note: Interrupt driven receive queues are currently unimplemented. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +#include <rte_service.h> + +#include "rte_eventdev.h" + +#define RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE 32 + +/* struct rte_event_eth_rx_adapter_queue_conf flags definitions */ +#define RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID 0x1 +/**< This flag indicates the flow identifier is valid + * @see rte_event_eth_rx_adapter_queue_conf::rx_queue_flags + */ + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Adapter configuration structure that the adapter configuration callback + * function is expected to fill out + * @see rte_event_eth_rx_adapter_conf_cb + */ +struct rte_event_eth_rx_adapter_conf { + uint8_t event_port_id; + /**< Event port identifier, the adapter enqueues mbuf events to this + * port. + */ + uint32_t max_nb_rx; + /**< The adapter can return early if it has processed at least + * max_nb_rx mbufs. This isn't treated as a requirement; batching may + * cause the adapter to process more than max_nb_rx mbufs. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Function type used for adapter configuration callback. The callback is + * used to fill in members of the struct rte_event_eth_rx_adapter_conf, this + * callback is invoked when creating a SW service for packet transfer from + * ethdev queues to the event device. The SW service is created within the + * rte_event_eth_rx_adapter_queue_add() function if SW based packet transfers + * from ethdev queues to the event device are required. + * + * @param id + * Adapter identifier. + * + * @param dev_id + * Event device identifier. + * + * @param [out] conf + * Structure that needs to be populated by this callback. + * + * @param arg + * Argument to the callback. This is the same as the conf_arg passed to the + * rte_event_eth_rx_adapter_create_ext(). + */ +typedef int (*rte_event_eth_rx_adapter_conf_cb) (uint8_t id, uint8_t dev_id, + struct rte_event_eth_rx_adapter_conf *conf, + void *arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Rx queue configuration structure + */ +struct rte_event_eth_rx_adapter_queue_conf { + uint32_t rx_queue_flags; + /**< Flags for handling received packets + * @see RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID + */ + uint16_t servicing_weight; + /**< Relative polling frequency of ethernet receive queue when the + * adapter uses a service core function for ethernet to event device + * transfers. If it is set to zero, the Rx queue is interrupt driven + * (unless rx queue interrupts are not enabled for the ethernet + * device). + */ + struct rte_event ev; + /**< + * The values from the following event fields will be used when + * queuing mbuf events: + * - event_queue_id: Targeted event queue ID for received packets. + * - event_priority: Event priority of packets from this Rx queue in + * the event queue relative to other events. + * - sched_type: Scheduling type for packets from this Rx queue. + * - flow_id: If the RTE_ETH_RX_EVENT_ADAPTER_QUEUE_FLOW_ID_VALID bit + * is set in rx_queue_flags, this flow_id is used for all + * packets received from this queue. Otherwise the flow ID + * is set to the RSS hash of the src and dst IPv4/6 + * addresses. + * + * The event adapter sets ev.event_type to RTE_EVENT_TYPE_ETHDEV in the + * enqueued event. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * A structure used to retrieve statistics for an eth rx adapter instance. + */ +struct rte_event_eth_rx_adapter_stats { + uint64_t rx_poll_count; + /**< Receive queue poll count */ + uint64_t rx_packets; + /**< Received packet count */ + uint64_t rx_enq_count; + /**< Eventdev enqueue count */ + uint64_t rx_enq_retry; + /**< Eventdev enqueue retry count */ + uint64_t rx_enq_start_ts; + /**< Rx enqueue start timestamp */ + uint64_t rx_enq_block_cycles; + /**< Cycles for which the service is blocked by the event device, + * i.e, the service fails to enqueue to the event device. + */ + uint64_t rx_enq_end_ts; + /**< Latest timestamp at which the service is unblocked + * by the event device. The start, end timestamps and + * block cycles can be used to compute the percentage of + * cycles the service is blocked by the event device. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create a new ethernet Rx event adapter with the specified identifier. + * + * @param id + * The identifier of the ethernet Rx event adapter. + * + * @param dev_id + * The identifier of the device to configure. + * + * @param conf_cb + * Callback function that fills in members of a + * struct rte_event_eth_rx_adapter_conf struct passed into + * it. + * + * @param conf_arg + * Argument that is passed to the conf_cb function. + * + * @return + * - 0: Success + * - <0: Error code on failure + */ +int rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id, + rte_event_eth_rx_adapter_conf_cb conf_cb, + void *conf_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create a new ethernet Rx event adapter with the specified identifier. + * This function uses an internal configuration function that creates an event + * port. This default function reconfigures the event device with an + * additional event port and setups up the event port using the port_config + * parameter passed into this function. In case the application needs more + * control in configuration of the service, it should use the + * rte_event_eth_rx_adapter_create_ext() version. + * + * @param id + * The identifier of the ethernet Rx event adapter. + * + * @param dev_id + * The identifier of the device to configure. + * + * @param port_config + * Argument of type *rte_event_port_conf* that is passed to the conf_cb + * function. + * + * @return + * - 0: Success + * - <0: Error code on failure + */ +int rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id, + struct rte_event_port_conf *port_config); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Free an event adapter + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success + * - <0: Error code on failure, If the adapter still has Rx queues + * added to it, the function returns -EBUSY. + */ +int rte_event_eth_rx_adapter_free(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Add receive queue to an event adapter. After a queue has been + * added to the event adapter, the result of the application calling + * rte_eth_rx_burst(eth_dev_id, rx_queue_id, ..) is undefined. + * + * @param id + * Adapter identifier. + * + * @param eth_dev_id + * Port identifier of Ethernet device. + * + * @param rx_queue_id + * Ethernet device receive queue index. + * If rx_queue_id is -1, then all Rx queues configured for + * the device are added. If the ethdev Rx queues can only be + * connected to a single event queue then rx_queue_id is + * required to be -1. + * @see RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ + * + * @param conf + * Additional configuration structure of type *rte_event_eth_rx_adapter_conf* + * + * @return + * - 0: Success, Receive queue added correctly. + * - <0: Error code on failure. + */ +int rte_event_eth_rx_adapter_queue_add(uint8_t id, + uint8_t eth_dev_id, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *conf); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Delete receive queue from an event adapter. + * + * @param id + * Adapter identifier. + * + * @param eth_dev_id + * Port identifier of Ethernet device. + * + * @param rx_queue_id + * Ethernet device receive queue index. + * If rx_queue_id is -1, then all Rx queues configured for + * the device are deleted. If the ethdev Rx queues can only be + * connected to a single event queue then rx_queue_id is + * required to be -1. + * @see RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ + * + * @return + * - 0: Success, Receive queue deleted correctly. + * - <0: Error code on failure. + */ +int rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, + int32_t rx_queue_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Start ethernet Rx event adapter + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success, Adapter started correctly. + * - <0: Error code on failure. + */ +int rte_event_eth_rx_adapter_start(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Stop ethernet Rx event adapter + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success, Adapter started correctly. + * - <0: Error code on failure. + */ +int rte_event_eth_rx_adapter_stop(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve statistics for an adapter + * + * @param id + * Adapter identifier. + * + * @param [out] stats + * A pointer to structure used to retrieve statistics for an adapter. + * + * @return + * - 0: Success, retrieved successfully. + * - <0: Error code on failure. + */ +int rte_event_eth_rx_adapter_stats_get(uint8_t id, + struct rte_event_eth_rx_adapter_stats *stats); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset statistics for an adapter. + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success, statistics reset successfully. + * - <0: Error code on failure. + */ +int rte_event_eth_rx_adapter_stats_reset(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the service ID of an adapter. If the adapter doesn't use + * a rte_service function, this function returns -ESRCH. + * + * @param id + * Adapter identifier. + * + * @param [out] service_id + * A pointer to a uint32_t, to be filled in with the service id. + * + * @return + * - 0: Success + * - <0: Error code on failure, if the adapter doesn't use a rte_service + * function, this function returns -ESRCH. + */ +int rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id); + +#ifdef __cplusplus +} +#endif +#endif /* _RTE_EVENT_ETH_RX_ADAPTER_ */ diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index bbb38050..ce6a5dc1 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -56,6 +56,7 @@ #include <rte_common.h> #include <rte_malloc.h> #include <rte_errno.h> +#include <rte_ethdev.h> #include "rte_eventdev.h" #include "rte_eventdev_pmd.h" @@ -128,55 +129,77 @@ rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info) return 0; } +int +rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id, + uint32_t *caps) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_port_id, -EINVAL); + + dev = &rte_eventdevs[dev_id]; + + if (caps == NULL) + return -EINVAL; + *caps = 0; + + return dev->dev_ops->eth_rx_adapter_caps_get ? + (*dev->dev_ops->eth_rx_adapter_caps_get)(dev, + &rte_eth_devices[eth_port_id], + caps) + : 0; +} + static inline int rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues) { uint8_t old_nb_queues = dev->data->nb_queues; - uint8_t *queues_prio; + struct rte_event_queue_conf *queues_cfg; unsigned int i; RTE_EDEV_LOG_DEBUG("Setup %d queues on device %u", nb_queues, dev->data->dev_id); /* First time configuration */ - if (dev->data->queues_prio == NULL && nb_queues != 0) { - /* Allocate memory to store queue priority */ - dev->data->queues_prio = rte_zmalloc_socket( - "eventdev->data->queues_prio", - sizeof(dev->data->queues_prio[0]) * nb_queues, + if (dev->data->queues_cfg == NULL && nb_queues != 0) { + /* Allocate memory to store queue configuration */ + dev->data->queues_cfg = rte_zmalloc_socket( + "eventdev->data->queues_cfg", + sizeof(dev->data->queues_cfg[0]) * nb_queues, RTE_CACHE_LINE_SIZE, dev->data->socket_id); - if (dev->data->queues_prio == NULL) { + if (dev->data->queues_cfg == NULL) { dev->data->nb_queues = 0; - RTE_EDEV_LOG_ERR("failed to get mem for queue priority," + RTE_EDEV_LOG_ERR("failed to get mem for queue cfg," "nb_queues %u", nb_queues); return -(ENOMEM); } /* Re-configure */ - } else if (dev->data->queues_prio != NULL && nb_queues != 0) { + } else if (dev->data->queues_cfg != NULL && nb_queues != 0) { RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_release, -ENOTSUP); for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->queue_release)(dev, i); - /* Re allocate memory to store queue priority */ - queues_prio = dev->data->queues_prio; - queues_prio = rte_realloc(queues_prio, - sizeof(queues_prio[0]) * nb_queues, + /* Re allocate memory to store queue configuration */ + queues_cfg = dev->data->queues_cfg; + queues_cfg = rte_realloc(queues_cfg, + sizeof(queues_cfg[0]) * nb_queues, RTE_CACHE_LINE_SIZE); - if (queues_prio == NULL) { - RTE_EDEV_LOG_ERR("failed to realloc queue priority," + if (queues_cfg == NULL) { + RTE_EDEV_LOG_ERR("failed to realloc queue cfg memory," " nb_queues %u", nb_queues); return -(ENOMEM); } - dev->data->queues_prio = queues_prio; + dev->data->queues_cfg = queues_cfg; if (nb_queues > old_nb_queues) { uint8_t new_qs = nb_queues - old_nb_queues; - memset(queues_prio + old_nb_queues, 0, - sizeof(queues_prio[0]) * new_qs); + memset(queues_cfg + old_nb_queues, 0, + sizeof(queues_cfg[0]) * new_qs); } - } else if (dev->data->queues_prio != NULL && nb_queues == 0) { + } else if (dev->data->queues_cfg != NULL && nb_queues == 0) { RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_release, -ENOTSUP); for (i = nb_queues; i < old_nb_queues; i++) @@ -195,8 +218,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) uint8_t old_nb_ports = dev->data->nb_ports; void **ports; uint16_t *links_map; - uint8_t *ports_dequeue_depth; - uint8_t *ports_enqueue_depth; + struct rte_event_port_conf *ports_cfg; unsigned int i; RTE_EDEV_LOG_DEBUG("Setup %d ports on device %u", nb_ports, @@ -214,26 +236,14 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) return -(ENOMEM); } - /* Allocate memory to store ports dequeue depth */ - dev->data->ports_dequeue_depth = - rte_zmalloc_socket("eventdev->ports_dequeue_depth", - sizeof(dev->data->ports_dequeue_depth[0]) * nb_ports, + /* Allocate memory to store port configurations */ + dev->data->ports_cfg = + rte_zmalloc_socket("eventdev->ports_cfg", + sizeof(dev->data->ports_cfg[0]) * nb_ports, RTE_CACHE_LINE_SIZE, dev->data->socket_id); - if (dev->data->ports_dequeue_depth == NULL) { + if (dev->data->ports_cfg == NULL) { dev->data->nb_ports = 0; - RTE_EDEV_LOG_ERR("failed to get mem for port deq meta," - "nb_ports %u", nb_ports); - return -(ENOMEM); - } - - /* Allocate memory to store ports enqueue depth */ - dev->data->ports_enqueue_depth = - rte_zmalloc_socket("eventdev->ports_enqueue_depth", - sizeof(dev->data->ports_enqueue_depth[0]) * nb_ports, - RTE_CACHE_LINE_SIZE, dev->data->socket_id); - if (dev->data->ports_enqueue_depth == NULL) { - dev->data->nb_ports = 0; - RTE_EDEV_LOG_ERR("failed to get mem for port enq meta," + RTE_EDEV_LOG_ERR("failed to get mem for port cfg," "nb_ports %u", nb_ports); return -(ENOMEM); } @@ -257,8 +267,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_release, -ENOTSUP); ports = dev->data->ports; - ports_dequeue_depth = dev->data->ports_dequeue_depth; - ports_enqueue_depth = dev->data->ports_enqueue_depth; + ports_cfg = dev->data->ports_cfg; links_map = dev->data->links_map; for (i = nb_ports; i < old_nb_ports; i++) @@ -273,22 +282,12 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) return -(ENOMEM); } - /* Realloc memory for ports_dequeue_depth */ - ports_dequeue_depth = rte_realloc(ports_dequeue_depth, - sizeof(ports_dequeue_depth[0]) * nb_ports, + /* Realloc memory for ports_cfg */ + ports_cfg = rte_realloc(ports_cfg, + sizeof(ports_cfg[0]) * nb_ports, RTE_CACHE_LINE_SIZE); - if (ports_dequeue_depth == NULL) { - RTE_EDEV_LOG_ERR("failed to realloc port dequeue meta," - " nb_ports %u", nb_ports); - return -(ENOMEM); - } - - /* Realloc memory for ports_enqueue_depth */ - ports_enqueue_depth = rte_realloc(ports_enqueue_depth, - sizeof(ports_enqueue_depth[0]) * nb_ports, - RTE_CACHE_LINE_SIZE); - if (ports_enqueue_depth == NULL) { - RTE_EDEV_LOG_ERR("failed to realloc port enqueue meta," + if (ports_cfg == NULL) { + RTE_EDEV_LOG_ERR("failed to realloc port cfg mem," " nb_ports %u", nb_ports); return -(ENOMEM); } @@ -314,18 +313,15 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) memset(ports + old_nb_ports, 0, sizeof(ports[0]) * new_ps); - memset(ports_dequeue_depth + old_nb_ports, 0, - sizeof(ports_dequeue_depth[0]) * new_ps); - memset(ports_enqueue_depth + old_nb_ports, 0, - sizeof(ports_enqueue_depth[0]) * new_ps); + memset(ports_cfg + old_nb_ports, 0, + sizeof(ports_cfg[0]) * new_ps); for (i = old_links_map_end; i < links_map_end; i++) links_map[i] = EVENT_QUEUE_SERVICE_PRIORITY_INVALID; } dev->data->ports = ports; - dev->data->ports_dequeue_depth = ports_dequeue_depth; - dev->data->ports_enqueue_depth = ports_enqueue_depth; + dev->data->ports_cfg = ports_cfg; dev->data->links_map = links_map; } else if (dev->data->ports != NULL && nb_ports == 0) { RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_release, -ENOTSUP); @@ -519,13 +515,13 @@ rte_event_queue_default_conf_get(uint8_t dev_id, uint8_t queue_id, static inline int is_valid_atomic_queue_conf(const struct rte_event_queue_conf *queue_conf) { - if (queue_conf && ( - ((queue_conf->event_queue_cfg & - RTE_EVENT_QUEUE_CFG_TYPE_MASK) - == RTE_EVENT_QUEUE_CFG_ALL_TYPES) || + if (queue_conf && + !(queue_conf->event_queue_cfg & + RTE_EVENT_QUEUE_CFG_SINGLE_LINK) && ((queue_conf->event_queue_cfg & - RTE_EVENT_QUEUE_CFG_TYPE_MASK) - == RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY) + RTE_EVENT_QUEUE_CFG_ALL_TYPES) || + (queue_conf->schedule_type + == RTE_SCHED_TYPE_ATOMIC) )) return 1; else @@ -535,13 +531,13 @@ is_valid_atomic_queue_conf(const struct rte_event_queue_conf *queue_conf) static inline int is_valid_ordered_queue_conf(const struct rte_event_queue_conf *queue_conf) { - if (queue_conf && ( - ((queue_conf->event_queue_cfg & - RTE_EVENT_QUEUE_CFG_TYPE_MASK) - == RTE_EVENT_QUEUE_CFG_ALL_TYPES) || + if (queue_conf && + !(queue_conf->event_queue_cfg & + RTE_EVENT_QUEUE_CFG_SINGLE_LINK) && ((queue_conf->event_queue_cfg & - RTE_EVENT_QUEUE_CFG_TYPE_MASK) - == RTE_EVENT_QUEUE_CFG_ORDERED_ONLY) + RTE_EVENT_QUEUE_CFG_ALL_TYPES) || + (queue_conf->schedule_type + == RTE_SCHED_TYPE_ORDERED) )) return 1; else @@ -605,31 +601,10 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id, queue_conf = &def_conf; } - dev->data->queues_prio[queue_id] = queue_conf->priority; + dev->data->queues_cfg[queue_id] = *queue_conf; return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf); } -uint8_t -rte_event_queue_count(uint8_t dev_id) -{ - struct rte_eventdev *dev; - - dev = &rte_eventdevs[dev_id]; - return dev->data->nb_queues; -} - -uint8_t -rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id) -{ - struct rte_eventdev *dev; - - dev = &rte_eventdevs[dev_id]; - if (dev->data->event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_QOS) - return dev->data->queues_prio[queue_id]; - else - return RTE_EVENT_DEV_PRIORITY_NORMAL; -} - static inline int is_valid_port(struct rte_eventdev *dev, uint8_t port_id) { @@ -726,10 +701,7 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, port_conf = &def_conf; } - dev->data->ports_dequeue_depth[port_id] = - port_conf->dequeue_depth; - dev->data->ports_enqueue_depth[port_id] = - port_conf->enqueue_depth; + dev->data->ports_cfg[port_id] = *port_conf; diag = (*dev->dev_ops->port_setup)(dev, port_id, port_conf); @@ -743,31 +715,110 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, return 0; } -uint8_t -rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id) +int +rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id, + uint32_t *attr_value) { struct rte_eventdev *dev; + if (!attr_value) + return -EINVAL; + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_eventdevs[dev_id]; - return dev->data->ports_dequeue_depth[port_id]; + + switch (attr_id) { + case RTE_EVENT_DEV_ATTR_PORT_COUNT: + *attr_value = dev->data->nb_ports; + break; + case RTE_EVENT_DEV_ATTR_QUEUE_COUNT: + *attr_value = dev->data->nb_queues; + break; + case RTE_EVENT_DEV_ATTR_STARTED: + *attr_value = dev->data->dev_started; + break; + default: + return -EINVAL; + } + + return 0; } -uint8_t -rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id) +int +rte_event_port_attr_get(uint8_t dev_id, uint8_t port_id, uint32_t attr_id, + uint32_t *attr_value) { struct rte_eventdev *dev; + if (!attr_value) + return -EINVAL; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_eventdevs[dev_id]; - return dev->data->ports_enqueue_depth[port_id]; + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } + + switch (attr_id) { + case RTE_EVENT_PORT_ATTR_ENQ_DEPTH: + *attr_value = dev->data->ports_cfg[port_id].enqueue_depth; + break; + case RTE_EVENT_PORT_ATTR_DEQ_DEPTH: + *attr_value = dev->data->ports_cfg[port_id].dequeue_depth; + break; + case RTE_EVENT_PORT_ATTR_NEW_EVENT_THRESHOLD: + *attr_value = dev->data->ports_cfg[port_id].new_event_threshold; + break; + default: + return -EINVAL; + }; + return 0; } -uint8_t -rte_event_port_count(uint8_t dev_id) +int +rte_event_queue_attr_get(uint8_t dev_id, uint8_t queue_id, uint32_t attr_id, + uint32_t *attr_value) { + struct rte_event_queue_conf *conf; struct rte_eventdev *dev; + if (!attr_value) + return -EINVAL; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_eventdevs[dev_id]; - return dev->data->nb_ports; + if (!is_valid_queue(dev, queue_id)) { + RTE_EDEV_LOG_ERR("Invalid queue_id=%" PRIu8, queue_id); + return -EINVAL; + } + + conf = &dev->data->queues_cfg[queue_id]; + + switch (attr_id) { + case RTE_EVENT_QUEUE_ATTR_PRIORITY: + *attr_value = RTE_EVENT_DEV_PRIORITY_NORMAL; + if (dev->data->event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_QOS) + *attr_value = conf->priority; + break; + case RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_FLOWS: + *attr_value = conf->nb_atomic_flows; + break; + case RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_ORDER_SEQUENCES: + *attr_value = conf->nb_atomic_order_sequences; + break; + case RTE_EVENT_QUEUE_ATTR_EVENT_QUEUE_CFG: + *attr_value = conf->event_queue_cfg; + break; + case RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE: + if (conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) + return -EOVERFLOW; + + *attr_value = conf->schedule_type; + break; + default: + return -EINVAL; + }; + return 0; } int @@ -912,6 +963,23 @@ rte_event_dequeue_timeout_ticks(uint8_t dev_id, uint64_t ns, } int +rte_event_dev_service_id_get(uint8_t dev_id, uint32_t *service_id) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + if (service_id == NULL) + return -EINVAL; + + if (dev->data->service_inited) + *service_id = dev->data->service_id; + + return dev->data->service_inited ? 0 : -ESRCH; +} + +int rte_event_dev_dump(uint8_t dev_id, FILE *f) { struct rte_eventdev *dev; diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index 128bc522..f1949ff7 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -218,10 +218,10 @@ * (each worker thread schedules events to its own port) or centralized * (a dedicated thread schedules to all ports). Distributed software schedulers * perform the scheduling in rte_event_dequeue_burst(), whereas centralized - * scheduler logic is located in rte_event_schedule(). + * scheduler logic need a dedicated service core for scheduling. * The RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED capability flag is not set * indicates the device is centralized and thus needs a dedicated scheduling - * thread that repeatedly calls rte_event_schedule(). + * thread that repeatedly calls software specific scheduling function. * * An event driven worker thread has following typical workflow on fastpath: * \code{.c} @@ -263,16 +263,16 @@ struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ * In distributed scheduling mode, event scheduling happens in HW or * rte_event_dequeue_burst() or the combination of these two. * If the flag is not set then eventdev is centralized and thus needs a - * dedicated scheduling thread that repeatedly calls rte_event_schedule(). + * dedicated service core that acts as a scheduling thread . * - * @see rte_event_schedule(), rte_event_dequeue_burst() + * @see rte_event_dequeue_burst() */ #define RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES (1ULL << 3) /**< Event device is capable of enqueuing events of any type to any queue. * If this capability is not set, the queue only supports events of the - * *RTE_EVENT_QUEUE_CFG_* type that it was created with. + * *RTE_SCHED_TYPE_* type that it was created with. * - * @see RTE_EVENT_QUEUE_CFG_* values + * @see RTE_SCHED_TYPE_* values */ #define RTE_EVENT_DEV_CAP_BURST_MODE (1ULL << 4) /**< Event device is capable of operating in burst mode for enqueue(forward, @@ -399,6 +399,36 @@ struct rte_event_dev_info { int rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info); +/** + * The count of ports. + */ +#define RTE_EVENT_DEV_ATTR_PORT_COUNT 0 +/** + * The count of queues. + */ +#define RTE_EVENT_DEV_ATTR_QUEUE_COUNT 1 +/** + * The status of the device, zero for stopped, non-zero for started. + */ +#define RTE_EVENT_DEV_ATTR_STARTED 2 + +/** + * Get an attribute from a device. + * + * @param dev_id Eventdev id + * @param attr_id The attribute ID to retrieve + * @param[out] attr_value A pointer that will be filled in with the attribute + * value if successful. + * + * @retval 0 Successfully retrieved attribute value + * -EINVAL Invalid device or *attr_id* provided, or *attr_value* + * is NULL + */ +int +rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id, + uint32_t *attr_value); + + /* Event device configuration bitmap flags */ #define RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT (1ULL << 0) /**< Override the global *dequeue_timeout_ns* and use per dequeue timeout in ns. @@ -485,39 +515,13 @@ rte_event_dev_configure(uint8_t dev_id, /* Event queue specific APIs */ /* Event queue configuration bitmap flags */ -#define RTE_EVENT_QUEUE_CFG_TYPE_MASK (3ULL << 0) -/**< Mask for event queue schedule type configuration request */ -#define RTE_EVENT_QUEUE_CFG_ALL_TYPES (0ULL << 0) +#define RTE_EVENT_QUEUE_CFG_ALL_TYPES (1ULL << 0) /**< Allow ATOMIC,ORDERED,PARALLEL schedule type enqueue * * @see RTE_SCHED_TYPE_ORDERED, RTE_SCHED_TYPE_ATOMIC, RTE_SCHED_TYPE_PARALLEL * @see rte_event_enqueue_burst() */ -#define RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY (1ULL << 0) -/**< Allow only ATOMIC schedule type enqueue - * - * The rte_event_enqueue_burst() result is undefined if the queue configured - * with ATOMIC only and sched_type != RTE_SCHED_TYPE_ATOMIC - * - * @see RTE_SCHED_TYPE_ATOMIC, rte_event_enqueue_burst() - */ -#define RTE_EVENT_QUEUE_CFG_ORDERED_ONLY (2ULL << 0) -/**< Allow only ORDERED schedule type enqueue - * - * The rte_event_enqueue_burst() result is undefined if the queue configured - * with ORDERED only and sched_type != RTE_SCHED_TYPE_ORDERED - * - * @see RTE_SCHED_TYPE_ORDERED, rte_event_enqueue_burst() - */ -#define RTE_EVENT_QUEUE_CFG_PARALLEL_ONLY (3ULL << 0) -/**< Allow only PARALLEL schedule type enqueue - * - * The rte_event_enqueue_burst() result is undefined if the queue configured - * with PARALLEL only and sched_type != RTE_SCHED_TYPE_PARALLEL - * - * @see RTE_SCHED_TYPE_PARALLEL, rte_event_enqueue_burst() - */ -#define RTE_EVENT_QUEUE_CFG_SINGLE_LINK (1ULL << 2) +#define RTE_EVENT_QUEUE_CFG_SINGLE_LINK (1ULL << 1) /**< This event queue links only to a single event port. * * @see rte_event_port_setup(), rte_event_port_link() @@ -528,8 +532,8 @@ struct rte_event_queue_conf { uint32_t nb_atomic_flows; /**< The maximum number of active flows this queue can track at any * given time. If the queue is configured for atomic scheduling (by - * applying the RTE_EVENT_QUEUE_CFG_ALL_TYPES or - * RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY flags to event_queue_cfg), then the + * applying the RTE_EVENT_QUEUE_CFG_ALL_TYPES flag to event_queue_cfg + * or RTE_SCHED_TYPE_ATOMIC flag to schedule_type), then the * value must be in the range of [1, nb_event_queue_flows], which was * previously provided in rte_event_dev_configure(). */ @@ -542,12 +546,18 @@ struct rte_event_queue_conf { * event will be returned from dequeue until one or more entries are * freed up/released. * If the queue is configured for ordered scheduling (by applying the - * RTE_EVENT_QUEUE_CFG_ALL_TYPES or RTE_EVENT_QUEUE_CFG_ORDERED_ONLY - * flags to event_queue_cfg), then the value must be in the range of - * [1, nb_event_queue_flows], which was previously supplied to - * rte_event_dev_configure(). + * RTE_EVENT_QUEUE_CFG_ALL_TYPES flag to event_queue_cfg or + * RTE_SCHED_TYPE_ORDERED flag to schedule_type), then the value must + * be in the range of [1, nb_event_queue_flows], which was + * previously supplied to rte_event_dev_configure(). + */ + uint32_t event_queue_cfg; + /**< Queue cfg flags(EVENT_QUEUE_CFG_) */ + uint8_t schedule_type; + /**< Queue schedule type(RTE_SCHED_TYPE_*). + * Valid when RTE_EVENT_QUEUE_CFG_ALL_TYPES bit is not set in + * event_queue_cfg. */ - uint32_t event_queue_cfg; /**< Queue cfg flags(EVENT_QUEUE_CFG_) */ uint8_t priority; /**< Priority for this event queue relative to other event queues. * The requested priority should in the range of @@ -607,31 +617,45 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id, const struct rte_event_queue_conf *queue_conf); /** - * Get the number of event queues on a specific event device - * - * @param dev_id - * Event device identifier. - * @return - * - The number of configured event queues + * The priority of the queue. */ -uint8_t -rte_event_queue_count(uint8_t dev_id); +#define RTE_EVENT_QUEUE_ATTR_PRIORITY 0 +/** + * The number of atomic flows configured for the queue. + */ +#define RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_FLOWS 1 +/** + * The number of atomic order sequences configured for the queue. + */ +#define RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_ORDER_SEQUENCES 2 +/** + * The cfg flags for the queue. + */ +#define RTE_EVENT_QUEUE_ATTR_EVENT_QUEUE_CFG 3 +/** + * The schedule type of the queue. + */ +#define RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE 4 /** - * Get the priority of the event queue on a specific event device - * - * @param dev_id - * Event device identifier. - * @param queue_id - * Event queue identifier. - * @return - * - If the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability then the - * configured priority of the event queue in - * [RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST] range - * else the value RTE_EVENT_DEV_PRIORITY_NORMAL + * Get an attribute from a queue. + * + * @param dev_id Eventdev id + * @param queue_id Eventdev queue id + * @param attr_id The attribute ID to retrieve + * @param[out] attr_value A pointer that will be filled in with the attribute + * value if successful + * + * @retval 0 Successfully returned value + * -EINVAL invalid device, queue or attr_id provided, or attr_value + * was NULL + * -EOVERFLOW returned when attr_id is set to + * RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE and event_queue_cfg is set to + * RTE_EVENT_QUEUE_CFG_ALL_TYPES */ -uint8_t -rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id); +int +rte_event_queue_attr_get(uint8_t dev_id, uint8_t queue_id, uint32_t attr_id, + uint32_t *attr_value); /* Event port specific APIs */ @@ -715,47 +739,33 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, const struct rte_event_port_conf *port_conf); /** - * Get the number of dequeue queue depth configured for event port designated - * by its *port_id* on a specific event device - * - * @param dev_id - * Event device identifier. - * @param port_id - * Event port identifier. - * @return - * - The number of configured dequeue queue depth - * - * @see rte_event_dequeue_burst() + * The queue depth of the port on the enqueue side */ -uint8_t -rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id); - +#define RTE_EVENT_PORT_ATTR_ENQ_DEPTH 0 /** - * Get the number of enqueue queue depth configured for event port designated - * by its *port_id* on a specific event device - * - * @param dev_id - * Event device identifier. - * @param port_id - * Event port identifier. - * @return - * - The number of configured enqueue queue depth - * - * @see rte_event_enqueue_burst() + * The queue depth of the port on the dequeue side */ -uint8_t -rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id); +#define RTE_EVENT_PORT_ATTR_DEQ_DEPTH 1 +/** + * The new event threshold of the port + */ +#define RTE_EVENT_PORT_ATTR_NEW_EVENT_THRESHOLD 2 /** - * Get the number of ports on a specific event device + * Get an attribute from a port. * - * @param dev_id - * Event device identifier. - * @return - * - The number of configured ports + * @param dev_id Eventdev id + * @param port_id Eventdev port id + * @param attr_id The attribute ID to retrieve + * @param[out] attr_value A pointer that will be filled in with the attribute + * value if successful + * + * @retval 0 Successfully returned value + * -EINVAL Invalid device, port or attr_id, or attr_value was NULL */ -uint8_t -rte_event_port_count(uint8_t dev_id); +int +rte_event_port_attr_get(uint8_t dev_id, uint8_t port_id, uint32_t attr_id, + uint32_t *attr_value); /** * Start an event device. @@ -871,6 +881,8 @@ rte_event_dev_close(uint8_t dev_id); /**< The event generated from cpu for pipelining. * Application may use *sub_event_type* to further classify the event */ +#define RTE_EVENT_TYPE_ETH_RX_ADAPTER 0x4 +/**< The event generated from event eth Rx adapter */ #define RTE_EVENT_TYPE_MAX 0x10 /**< Maximum number of event types */ @@ -882,7 +894,10 @@ rte_event_dev_close(uint8_t dev_id); #define RTE_EVENT_OP_FORWARD 1 /**< The CPU use this operation to forward the event to different event queue or * change to new application specific flow or schedule type to enable - * pipelining + * pipelining. + * + * This operation must only be enqueued to the same port that the + * event to be forwarded was dequeued from. */ #define RTE_EVENT_OP_RELEASE 2 /**< Release the flow context associated with the schedule type. @@ -912,6 +927,9 @@ rte_event_dev_close(uint8_t dev_id); * or no scheduling context is held then this function may be an NOOP, * depending on the implementation. * + * This operation must only be enqueued to the same port that the + * event to be released was dequeued from. + * */ /** @@ -990,14 +1008,50 @@ struct rte_event { }; }; +/* Ethdev Rx adapter capability bitmap flags */ +#define RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT 0x1 +/**< This flag is sent when the packet transfer mechanism is in HW. + * Ethdev can send packets to the event device using internal event port. + */ +#define RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ 0x2 +/**< Adapter supports multiple event queues per ethdev. Every ethdev + * Rx queue can be connected to a unique event queue. + */ +#define RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID 0x4 +/**< The application can override the adapter generated flow ID in the + * event. This flow ID can be specified when adding an ethdev Rx queue + * to the adapter using the ev member of struct rte_event_eth_rx_adapter + * @see struct rte_event_eth_rx_adapter_queue_conf::ev + * @see struct rte_event_eth_rx_adapter_queue_conf::rx_queue_flags + */ + +/** + * Retrieve the event device's ethdev Rx adapter capabilities for the + * specified ethernet port + * + * @param dev_id + * The identifier of the device. + * + * @param eth_port_id + * The identifier of the ethernet device. + * + * @param[out] caps + * A pointer to memory filled with Rx event adapter capabilities. + * + * @return + * - 0: Success, driver provides Rx event adapter capabilities for the + * ethernet device. + * - <0: Error code returned by the driver function. + * + */ +int +rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id, + uint32_t *caps); struct rte_eventdev_driver; struct rte_eventdev_ops; struct rte_eventdev; -typedef void (*event_schedule_t)(struct rte_eventdev *dev); -/**< @internal Schedule one or more events in the event dev. */ - typedef uint16_t (*event_enqueue_t)(void *port, const struct rte_event *ev); /**< @internal Enqueue event on port of a device */ @@ -1034,12 +1088,10 @@ struct rte_eventdev_data { /**< Number of event ports. */ void **ports; /**< Array of pointers to ports. */ - uint8_t *ports_dequeue_depth; - /**< Array of port dequeue depth. */ - uint8_t *ports_enqueue_depth; - /**< Array of port enqueue depth. */ - uint8_t *queues_prio; - /**< Array of queue priority. */ + struct rte_event_port_conf *ports_cfg; + /**< Array of port configuration structures. */ + struct rte_event_queue_conf *queues_cfg; + /**< Array of queue configuration structures. */ uint16_t *links_map; /**< Memory to store queues to port connections. */ void *dev_private; @@ -1048,6 +1100,10 @@ struct rte_eventdev_data { /**< Event device capabilities(RTE_EVENT_DEV_CAP_)*/ struct rte_event_dev_config dev_conf; /**< Configuration applied to device. */ + uint8_t service_inited; + /* Service initialization state */ + uint32_t service_id; + /* Service ID*/ RTE_STD_C11 uint8_t dev_started : 1; @@ -1059,8 +1115,6 @@ struct rte_eventdev_data { /** @internal The data structure associated with each event device. */ struct rte_eventdev { - event_schedule_t schedule; - /**< Pointer to PMD schedule function. */ event_enqueue_t enqueue; /**< Pointer to PMD enqueue function. */ event_enqueue_burst_t enqueue_burst; @@ -1089,24 +1143,6 @@ struct rte_eventdev { extern struct rte_eventdev *rte_eventdevs; /** @internal The pool of rte_eventdev structures. */ - -/** - * Schedule one or more events in the event dev. - * - * An event dev implementation may define this is a NOOP, for instance if - * the event dev performs its scheduling in hardware. - * - * @param dev_id - * The identifier of the device. - */ -static inline void -rte_event_schedule(uint8_t dev_id) -{ - struct rte_eventdev *dev = &rte_eventdevs[dev_id]; - if (*dev->schedule) - (*dev->schedule)(dev); -} - static __rte_always_inline uint16_t __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id, const struct rte_event ev[], uint16_t nb_events, @@ -1144,6 +1180,9 @@ __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id, * The *nb_events* parameter is the number of event objects to enqueue which are * supplied in the *ev* array of *rte_event* structure. * + * Event operations RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE must only be + * enqueued to the same port that their associated events were dequeued from. + * * The rte_event_enqueue_burst() function returns the number of * events objects it actually enqueued. A return value equal to *nb_events* * means that all event objects have been enqueued. @@ -1346,6 +1385,9 @@ rte_event_dequeue_timeout_ticks(uint8_t dev_id, uint64_t ns, * with RTE_EVENT_OP_RELEASE operation can be used to release the * contexts early. * + * Event operations RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE must only be + * enqueued to the same port that their associated events were dequeued from. + * * @param dev_id * The identifier of the device. * @param port_id @@ -1545,6 +1587,24 @@ rte_event_port_links_get(uint8_t dev_id, uint8_t port_id, uint8_t queues[], uint8_t priorities[]); /** + * Retrieve the service ID of the event dev. If the adapter doesn't use + * a rte_service function, this function returns -ESRCH. + * + * @param dev_id + * The identifier of the device. + * + * @param [out] service_id + * A pointer to a uint32_t, to be filled in with the service id. + * + * @return + * - 0: Success + * - <0: Error code on failure, if the event dev doesn't use a rte_service + * function, this function returns -ESRCH. + */ +int +rte_event_dev_service_id_get(uint8_t dev_id, uint32_t *service_id); + +/** * Dump internal information about *dev_id* to the FILE* provided in *f*. * * @param dev_id diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h index 3d72acf3..7a206c56 100644 --- a/lib/librte_eventdev/rte_eventdev_pmd.h +++ b/lib/librte_eventdev/rte_eventdev_pmd.h @@ -83,9 +83,19 @@ extern "C" { } \ } while (0) +#define RTE_EVENT_ETH_RX_ADAPTER_SW_CAP \ + ((RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) | \ + (RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ)) + +/**< Ethernet Rx adapter cap to return If the packet transfers from + * the ethdev to eventdev use a SW service function + */ + #define RTE_EVENTDEV_DETACHED (0) #define RTE_EVENTDEV_ATTACHED (1) +struct rte_eth_dev; + /** Global structure used for maintaining state of allocated event devices */ struct rte_eventdev_global { uint8_t nb_devs; /**< Number of devices found */ @@ -429,6 +439,163 @@ typedef int (*eventdev_xstats_get_names_t)(const struct rte_eventdev *dev, typedef uint64_t (*eventdev_xstats_get_by_name)(const struct rte_eventdev *dev, const char *name, unsigned int *id); + +/** + * Retrieve the event device's ethdev Rx adapter capabilities for the + * specified ethernet port + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @param[out] caps + * A pointer to memory filled with Rx event adapter capabilities. + * + * @return + * - 0: Success, driver provides Rx event adapter capabilities for the + * ethernet device. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_eth_rx_adapter_caps_get_t) + (const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps); + +struct rte_event_eth_rx_adapter_queue_conf *queue_conf; + +/** + * Add ethernet Rx queues to event device. This callback is invoked if + * the caps returned from rte_eventdev_eth_rx_adapter_caps_get(, eth_port_id) + * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set. + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @param rx_queue_id + * Ethernet device receive queue index + * + * @param queue_conf + * Additional configuration structure + + * @return + * - 0: Success, ethernet receive queue added successfully. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_eth_rx_adapter_queue_add_t)( + const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); + +/** + * Delete ethernet Rx queues from event device. This callback is invoked if + * the caps returned from eventdev_eth_rx_adapter_caps_get(, eth_port_id) + * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set. + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @param rx_queue_id + * Ethernet device receive queue index + * + * @return + * - 0: Success, ethernet receive queue deleted successfully. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_eth_rx_adapter_queue_del_t) + (const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); + +/** + * Start ethernet Rx adapter. This callback is invoked if + * the caps returned from eventdev_eth_rx_adapter_caps_get(.., eth_port_id) + * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set and Rx queues + * from eth_port_id have been added to the event device. + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @return + * - 0: Success, ethernet Rx adapter started successfully. + * - <0: Error code returned by the driver function. + */ +typedef int (*eventdev_eth_rx_adapter_start_t) + (const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev); + +/** + * Stop ethernet Rx adapter. This callback is invoked if + * the caps returned from eventdev_eth_rx_adapter_caps_get(..,eth_port_id) + * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set and Rx queues + * from eth_port_id have been added to the event device. + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @return + * - 0: Success, ethernet Rx adapter stopped successfully. + * - <0: Error code returned by the driver function. + */ +typedef int (*eventdev_eth_rx_adapter_stop_t) + (const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev); + +struct rte_event_eth_rx_adapter_stats *stats; + +/** + * Retrieve ethernet Rx adapter statistics. + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @param[out] stats + * Pointer to stats structure + * + * @return + * Return 0 on success. + */ + +typedef int (*eventdev_eth_rx_adapter_stats_get) + (const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_stats *stats); +/** + * Reset ethernet Rx adapter statistics. + * + * @param dev + * Event device pointer + * + * @param eth_dev + * Ethernet device pointer + * + * @return + * Return 0 on success. + */ +typedef int (*eventdev_eth_rx_adapter_stats_reset) + (const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev); + /** Event device operations function pointer table */ struct rte_eventdev_ops { eventdev_info_get_t dev_infos_get; /**< Get device info. */ @@ -468,6 +635,21 @@ struct rte_eventdev_ops { /**< Get one value by name. */ eventdev_xstats_reset_t xstats_reset; /**< Reset the statistics values in xstats. */ + + eventdev_eth_rx_adapter_caps_get_t eth_rx_adapter_caps_get; + /**< Get ethernet Rx adapter capabilities */ + eventdev_eth_rx_adapter_queue_add_t eth_rx_adapter_queue_add; + /**< Add Rx queues to ethernet Rx adapter */ + eventdev_eth_rx_adapter_queue_del_t eth_rx_adapter_queue_del; + /**< Delete Rx queues from ethernet Rx adapter */ + eventdev_eth_rx_adapter_start_t eth_rx_adapter_start; + /**< Start ethernet Rx adapter */ + eventdev_eth_rx_adapter_stop_t eth_rx_adapter_stop; + /**< Stop ethernet Rx adapter */ + eventdev_eth_rx_adapter_stats_get eth_rx_adapter_stats_get; + /**< Get ethernet Rx stats */ + eventdev_eth_rx_adapter_stats_reset eth_rx_adapter_stats_reset; + /**< Reset ethernet Rx stats */ }; /** diff --git a/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/lib/librte_eventdev/rte_eventdev_pmd_pci.h index b6bd7319..ade32b5d 100644 --- a/lib/librte_eventdev/rte_eventdev_pmd_pci.h +++ b/lib/librte_eventdev/rte_eventdev_pmd_pci.h @@ -50,6 +50,7 @@ extern "C" { #include <rte_eal.h> #include <rte_lcore.h> #include <rte_pci.h> +#include <rte_bus_pci.h> #include "rte_eventdev_pmd.h" diff --git a/lib/librte_eventdev/rte_eventdev_pmd_vdev.h b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h index 135e8b80..56232dec 100644 --- a/lib/librte_eventdev/rte_eventdev_pmd_vdev.h +++ b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h @@ -48,7 +48,7 @@ extern "C" { #include <rte_debug.h> #include <rte_eal.h> -#include <rte_vdev.h> +#include <rte_bus_vdev.h> #include "rte_eventdev_pmd.h" diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map index 4c48e5f0..108ae61f 100644 --- a/lib/librte_eventdev/rte_eventdev_version.map +++ b/lib/librte_eventdev/rte_eventdev_version.map @@ -19,17 +19,12 @@ DPDK_17.05 { rte_event_port_default_conf_get; rte_event_port_setup; - rte_event_port_dequeue_depth; - rte_event_port_enqueue_depth; - rte_event_port_count; rte_event_port_link; rte_event_port_unlink; rte_event_port_links_get; rte_event_queue_default_conf_get; rte_event_queue_setup; - rte_event_queue_count; - rte_event_queue_priority; rte_event_dequeue_timeout_ticks; @@ -51,3 +46,25 @@ DPDK_17.08 { rte_event_ring_init; rte_event_ring_lookup; } DPDK_17.05; + +DPDK_17.11 { + global: + + rte_event_dev_attr_get; + rte_event_dev_service_id_get; + rte_event_port_attr_get; + rte_event_queue_attr_get; + + rte_event_eth_rx_adapter_caps_get; + rte_event_eth_rx_adapter_create; + rte_event_eth_rx_adapter_create_ext; + rte_event_eth_rx_adapter_free; + rte_event_eth_rx_adapter_queue_add; + rte_event_eth_rx_adapter_queue_del; + rte_event_eth_rx_adapter_service_id_get; + rte_event_eth_rx_adapter_start; + rte_event_eth_rx_adapter_stats_get; + rte_event_eth_rx_adapter_stats_reset; + rte_event_eth_rx_adapter_stop; + +} DPDK_17.08; diff --git a/lib/librte_flow_classify/Makefile b/lib/librte_flow_classify/Makefile new file mode 100644 index 00000000..ea792f5d --- /dev/null +++ b/lib/librte_flow_classify/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_flow_classify.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_flow_classify_version.map + +LIBABIVER := 1 + +LDLIBS += -lrte_eal -lrte_ethdev -lrte_net -lrte_table -lrte_acl + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += rte_flow_classify.c +SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += rte_flow_classify_parse.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY)-include := rte_flow_classify.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c new file mode 100644 index 00000000..e6f44864 --- /dev/null +++ b/lib/librte_flow_classify/rte_flow_classify.c @@ -0,0 +1,691 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_flow_classify.h> +#include "rte_flow_classify_parse.h" +#include <rte_flow_driver.h> +#include <rte_table_acl.h> +#include <stdbool.h> + +int librte_flow_classify_logtype; + +static struct rte_eth_ntuple_filter ntuple_filter; +static uint32_t unique_id = 1; + + +struct rte_flow_classify_table_entry { + /* meta-data for classify rule */ + uint32_t rule_id; +}; + +struct rte_table { + /* Input parameters */ + struct rte_table_ops ops; + uint32_t entry_size; + enum rte_flow_classify_table_type type; + + /* Handle to the low-level table object */ + void *h_table; +}; + +#define RTE_FLOW_CLASSIFIER_MAX_NAME_SZ 256 + +struct rte_flow_classifier { + /* Input parameters */ + char name[RTE_FLOW_CLASSIFIER_MAX_NAME_SZ]; + int socket_id; + enum rte_flow_classify_table_type type; + + /* Internal tables */ + struct rte_table tables[RTE_FLOW_CLASSIFY_TABLE_MAX]; + uint32_t num_tables; + uint16_t nb_pkts; + struct rte_flow_classify_table_entry + *entries[RTE_PORT_IN_BURST_SIZE_MAX]; +} __rte_cache_aligned; + +enum { + PROTO_FIELD_IPV4, + SRC_FIELD_IPV4, + DST_FIELD_IPV4, + SRCP_FIELD_IPV4, + DSTP_FIELD_IPV4, + NUM_FIELDS_IPV4 +}; + +struct acl_keys { + struct rte_table_acl_rule_add_params key_add; /* add key */ + struct rte_table_acl_rule_delete_params key_del; /* delete key */ +}; + +struct classify_rules { + enum rte_flow_classify_rule_type type; + union { + struct rte_flow_classify_ipv4_5tuple ipv4_5tuple; + } u; +}; + +struct rte_flow_classify_rule { + uint32_t id; /* unique ID of classify rule */ + struct rte_flow_action action; /* action when match found */ + struct classify_rules rules; /* union of rules */ + union { + struct acl_keys key; + } u; + int key_found; /* rule key found in table */ + void *entry; /* pointer to buffer to hold rule meta data */ + void *entry_ptr; /* handle to the table entry for rule meta data */ +}; + +static int +flow_classify_parse_flow( + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_flow_item *items; + parse_filter_t parse_filter; + uint32_t item_num = 0; + uint32_t i = 0; + int ret; + + memset(&ntuple_filter, 0, sizeof(ntuple_filter)); + + /* Get the non-void item number of pattern */ + while ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_END) { + if ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_VOID) + item_num++; + i++; + } + item_num++; + + items = malloc(item_num * sizeof(struct rte_flow_item)); + if (!items) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "No memory for pattern items."); + return -ENOMEM; + } + + memset(items, 0, item_num * sizeof(struct rte_flow_item)); + classify_pattern_skip_void_item(items, pattern); + + parse_filter = classify_find_parse_filter_func(items); + if (!parse_filter) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + pattern, "Unsupported pattern"); + free(items); + return -EINVAL; + } + + ret = parse_filter(attr, items, actions, &ntuple_filter, error); + free(items); + return ret; +} + + +#define uint32_t_to_char(ip, a, b, c, d) do {\ + *a = (unsigned char)(ip >> 24 & 0xff);\ + *b = (unsigned char)(ip >> 16 & 0xff);\ + *c = (unsigned char)(ip >> 8 & 0xff);\ + *d = (unsigned char)(ip & 0xff);\ + } while (0) + +static inline void +print_acl_ipv4_key_add(struct rte_table_acl_rule_add_params *key) +{ + unsigned char a, b, c, d; + + printf("%s: 0x%02hhx/0x%hhx ", __func__, + key->field_value[PROTO_FIELD_IPV4].value.u8, + key->field_value[PROTO_FIELD_IPV4].mask_range.u8); + + uint32_t_to_char(key->field_value[SRC_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf(" %hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d, + key->field_value[SRC_FIELD_IPV4].mask_range.u32); + + uint32_t_to_char(key->field_value[DST_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf("%hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d, + key->field_value[DST_FIELD_IPV4].mask_range.u32); + + printf("%hu : 0x%x %hu : 0x%x", + key->field_value[SRCP_FIELD_IPV4].value.u16, + key->field_value[SRCP_FIELD_IPV4].mask_range.u16, + key->field_value[DSTP_FIELD_IPV4].value.u16, + key->field_value[DSTP_FIELD_IPV4].mask_range.u16); + + printf(" priority: 0x%x\n", key->priority); +} + +static inline void +print_acl_ipv4_key_delete(struct rte_table_acl_rule_delete_params *key) +{ + unsigned char a, b, c, d; + + printf("%s: 0x%02hhx/0x%hhx ", __func__, + key->field_value[PROTO_FIELD_IPV4].value.u8, + key->field_value[PROTO_FIELD_IPV4].mask_range.u8); + + uint32_t_to_char(key->field_value[SRC_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf(" %hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d, + key->field_value[SRC_FIELD_IPV4].mask_range.u32); + + uint32_t_to_char(key->field_value[DST_FIELD_IPV4].value.u32, + &a, &b, &c, &d); + printf("%hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d, + key->field_value[DST_FIELD_IPV4].mask_range.u32); + + printf("%hu : 0x%x %hu : 0x%x\n", + key->field_value[SRCP_FIELD_IPV4].value.u16, + key->field_value[SRCP_FIELD_IPV4].mask_range.u16, + key->field_value[DSTP_FIELD_IPV4].value.u16, + key->field_value[DSTP_FIELD_IPV4].mask_range.u16); +} + +static int +rte_flow_classifier_check_params(struct rte_flow_classifier_params *params) +{ + if (params == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: Incorrect value for parameter params\n", __func__); + return -EINVAL; + } + + /* name */ + if (params->name == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: Incorrect value for parameter name\n", __func__); + return -EINVAL; + } + + /* socket */ + if ((params->socket_id < 0) || + (params->socket_id >= RTE_MAX_NUMA_NODES)) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: Incorrect value for parameter socket_id\n", + __func__); + return -EINVAL; + } + + return 0; +} + +struct rte_flow_classifier * +rte_flow_classifier_create(struct rte_flow_classifier_params *params) +{ + struct rte_flow_classifier *cls; + int ret; + + /* Check input parameters */ + ret = rte_flow_classifier_check_params(params); + if (ret != 0) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: flow classifier params check failed (%d)\n", + __func__, ret); + return NULL; + } + + /* Allocate memory for the flow classifier */ + cls = rte_zmalloc_socket("FLOW_CLASSIFIER", + sizeof(struct rte_flow_classifier), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (cls == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: flow classifier memory allocation failed\n", + __func__); + return NULL; + } + + /* Save input parameters */ + snprintf(cls->name, RTE_FLOW_CLASSIFIER_MAX_NAME_SZ, "%s", + params->name); + cls->socket_id = params->socket_id; + cls->type = params->type; + + /* Initialize flow classifier internal data structure */ + cls->num_tables = 0; + + return cls; +} + +static void +rte_flow_classify_table_free(struct rte_table *table) +{ + if (table->ops.f_free != NULL) + table->ops.f_free(table->h_table); +} + +int +rte_flow_classifier_free(struct rte_flow_classifier *cls) +{ + uint32_t i; + + /* Check input parameters */ + if (cls == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: rte_flow_classifier parameter is NULL\n", + __func__); + return -EINVAL; + } + + /* Free tables */ + for (i = 0; i < cls->num_tables; i++) { + struct rte_table *table = &cls->tables[i]; + + rte_flow_classify_table_free(table); + } + + /* Free flow classifier memory */ + rte_free(cls); + + return 0; +} + +static int +rte_table_check_params(struct rte_flow_classifier *cls, + struct rte_flow_classify_table_params *params, + uint32_t *table_id) +{ + if (cls == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: flow classifier parameter is NULL\n", + __func__); + return -EINVAL; + } + if (params == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, "%s: params parameter is NULL\n", + __func__); + return -EINVAL; + } + if (table_id == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, "%s: table_id parameter is NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, "%s: params->ops is NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: f_create function pointer is NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_lookup == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: f_lookup function pointer is NULL\n", __func__); + return -EINVAL; + } + + /* De we have room for one more table? */ + if (cls->num_tables == RTE_FLOW_CLASSIFY_TABLE_MAX) { + RTE_FLOW_CLASSIFY_LOG(ERR, + "%s: Incorrect value for num_tables parameter\n", + __func__); + return -EINVAL; + } + + return 0; +} + +int +rte_flow_classify_table_create(struct rte_flow_classifier *cls, + struct rte_flow_classify_table_params *params, + uint32_t *table_id) +{ + struct rte_table *table; + void *h_table; + uint32_t entry_size, id; + int ret; + + /* Check input arguments */ + ret = rte_table_check_params(cls, params, table_id); + if (ret != 0) + return ret; + + id = cls->num_tables; + table = &cls->tables[id]; + + /* calculate table entry size */ + entry_size = sizeof(struct rte_flow_classify_table_entry); + + /* Create the table */ + h_table = params->ops->f_create(params->arg_create, cls->socket_id, + entry_size); + if (h_table == NULL) { + RTE_FLOW_CLASSIFY_LOG(ERR, "%s: Table creation failed\n", + __func__); + return -EINVAL; + } + + /* Commit current table to the classifier */ + cls->num_tables++; + *table_id = id; + + /* Save input parameters */ + memcpy(&table->ops, params->ops, sizeof(struct rte_table_ops)); + + /* Initialize table internal data structure */ + table->entry_size = entry_size; + table->h_table = h_table; + + return 0; +} + +static struct rte_flow_classify_rule * +allocate_acl_ipv4_5tuple_rule(void) +{ + struct rte_flow_classify_rule *rule; + int log_level; + + rule = malloc(sizeof(struct rte_flow_classify_rule)); + if (!rule) + return rule; + + memset(rule, 0, sizeof(struct rte_flow_classify_rule)); + rule->id = unique_id++; + rule->rules.type = RTE_FLOW_CLASSIFY_RULE_TYPE_IPV4_5TUPLE; + + memcpy(&rule->action, classify_get_flow_action(), + sizeof(struct rte_flow_action)); + + /* key add values */ + rule->u.key.key_add.priority = ntuple_filter.priority; + rule->u.key.key_add.field_value[PROTO_FIELD_IPV4].mask_range.u8 = + ntuple_filter.proto_mask; + rule->u.key.key_add.field_value[PROTO_FIELD_IPV4].value.u8 = + ntuple_filter.proto; + rule->rules.u.ipv4_5tuple.proto = ntuple_filter.proto; + rule->rules.u.ipv4_5tuple.proto_mask = ntuple_filter.proto_mask; + + rule->u.key.key_add.field_value[SRC_FIELD_IPV4].mask_range.u32 = + ntuple_filter.src_ip_mask; + rule->u.key.key_add.field_value[SRC_FIELD_IPV4].value.u32 = + ntuple_filter.src_ip; + rule->rules.u.ipv4_5tuple.src_ip_mask = ntuple_filter.src_ip_mask; + rule->rules.u.ipv4_5tuple.src_ip = ntuple_filter.src_ip; + + rule->u.key.key_add.field_value[DST_FIELD_IPV4].mask_range.u32 = + ntuple_filter.dst_ip_mask; + rule->u.key.key_add.field_value[DST_FIELD_IPV4].value.u32 = + ntuple_filter.dst_ip; + rule->rules.u.ipv4_5tuple.dst_ip_mask = ntuple_filter.dst_ip_mask; + rule->rules.u.ipv4_5tuple.dst_ip = ntuple_filter.dst_ip; + + rule->u.key.key_add.field_value[SRCP_FIELD_IPV4].mask_range.u16 = + ntuple_filter.src_port_mask; + rule->u.key.key_add.field_value[SRCP_FIELD_IPV4].value.u16 = + ntuple_filter.src_port; + rule->rules.u.ipv4_5tuple.src_port_mask = ntuple_filter.src_port_mask; + rule->rules.u.ipv4_5tuple.src_port = ntuple_filter.src_port; + + rule->u.key.key_add.field_value[DSTP_FIELD_IPV4].mask_range.u16 = + ntuple_filter.dst_port_mask; + rule->u.key.key_add.field_value[DSTP_FIELD_IPV4].value.u16 = + ntuple_filter.dst_port; + rule->rules.u.ipv4_5tuple.dst_port_mask = ntuple_filter.dst_port_mask; + rule->rules.u.ipv4_5tuple.dst_port = ntuple_filter.dst_port; + + log_level = rte_log_get_level(librte_flow_classify_logtype); + + if (log_level == RTE_LOG_DEBUG) + print_acl_ipv4_key_add(&rule->u.key.key_add); + + /* key delete values */ + memcpy(&rule->u.key.key_del.field_value[PROTO_FIELD_IPV4], + &rule->u.key.key_add.field_value[PROTO_FIELD_IPV4], + NUM_FIELDS_IPV4 * sizeof(struct rte_acl_field)); + + if (log_level == RTE_LOG_DEBUG) + print_acl_ipv4_key_delete(&rule->u.key.key_del); + + return rule; +} + +struct rte_flow_classify_rule * +rte_flow_classify_table_entry_add(struct rte_flow_classifier *cls, + uint32_t table_id, + int *key_found, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_flow_classify_rule *rule; + struct rte_flow_classify_table_entry *table_entry; + int ret; + + if (!error) + return NULL; + + if (!cls) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "NULL classifier."); + return NULL; + } + + if (table_id >= cls->num_tables) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "invalid table_id."); + return NULL; + } + + if (key_found == NULL) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "NULL key_found."); + return NULL; + } + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return NULL; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return NULL; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return NULL; + } + + /* parse attr, pattern and actions */ + ret = flow_classify_parse_flow(attr, pattern, actions, error); + if (ret < 0) + return NULL; + + switch (cls->type) { + case RTE_FLOW_CLASSIFY_TABLE_TYPE_ACL: + rule = allocate_acl_ipv4_5tuple_rule(); + if (!rule) + return NULL; + break; + default: + return NULL; + } + + rule->entry = malloc(sizeof(struct rte_flow_classify_table_entry)); + if (!rule->entry) { + free(rule); + return NULL; + } + + table_entry = rule->entry; + table_entry->rule_id = rule->id; + + if (cls->tables[table_id].ops.f_add != NULL) { + ret = cls->tables[table_id].ops.f_add( + cls->tables[table_id].h_table, + &rule->u.key.key_add, + rule->entry, + &rule->key_found, + &rule->entry_ptr); + if (ret) { + free(rule->entry); + free(rule); + return NULL; + } + *key_found = rule->key_found; + } + return rule; +} + +int +rte_flow_classify_table_entry_delete(struct rte_flow_classifier *cls, + uint32_t table_id, + struct rte_flow_classify_rule *rule) +{ + int ret = -EINVAL; + + if (!cls || !rule || table_id >= cls->num_tables) + return ret; + + if (cls->tables[table_id].ops.f_delete != NULL) + ret = cls->tables[table_id].ops.f_delete( + cls->tables[table_id].h_table, + &rule->u.key.key_del, + &rule->key_found, + &rule->entry); + + return ret; +} + +static int +flow_classifier_lookup(struct rte_flow_classifier *cls, + uint32_t table_id, + struct rte_mbuf **pkts, + const uint16_t nb_pkts) +{ + int ret = -EINVAL; + uint64_t pkts_mask; + uint64_t lookup_hit_mask; + + pkts_mask = RTE_LEN2MASK(nb_pkts, uint64_t); + ret = cls->tables[table_id].ops.f_lookup( + cls->tables[table_id].h_table, + pkts, pkts_mask, &lookup_hit_mask, + (void **)cls->entries); + + if (!ret && lookup_hit_mask) + cls->nb_pkts = nb_pkts; + else + cls->nb_pkts = 0; + + return ret; +} + +static int +action_apply(struct rte_flow_classifier *cls, + struct rte_flow_classify_rule *rule, + struct rte_flow_classify_stats *stats) +{ + struct rte_flow_classify_ipv4_5tuple_stats *ntuple_stats; + uint64_t count = 0; + int i; + int ret = -EINVAL; + + switch (rule->action.type) { + case RTE_FLOW_ACTION_TYPE_COUNT: + for (i = 0; i < cls->nb_pkts; i++) { + if (rule->id == cls->entries[i]->rule_id) + count++; + } + if (count) { + ret = 0; + ntuple_stats = + (struct rte_flow_classify_ipv4_5tuple_stats *) + stats->stats; + ntuple_stats->counter1 = count; + ntuple_stats->ipv4_5tuple = rule->rules.u.ipv4_5tuple; + } + break; + default: + ret = -ENOTSUP; + break; + } + + return ret; +} + +int +rte_flow_classifier_query(struct rte_flow_classifier *cls, + uint32_t table_id, + struct rte_mbuf **pkts, + const uint16_t nb_pkts, + struct rte_flow_classify_rule *rule, + struct rte_flow_classify_stats *stats) +{ + int ret = -EINVAL; + + if (!cls || !rule || !stats || !pkts || nb_pkts == 0 || + table_id >= cls->num_tables) + return ret; + + ret = flow_classifier_lookup(cls, table_id, pkts, nb_pkts); + if (!ret) + ret = action_apply(cls, rule, stats); + return ret; +} + +RTE_INIT(librte_flow_classify_init_log); + +static void +librte_flow_classify_init_log(void) +{ + librte_flow_classify_logtype = + rte_log_register("librte.flow_classify"); + if (librte_flow_classify_logtype >= 0) + rte_log_set_level(librte_flow_classify_logtype, RTE_LOG_INFO); +} diff --git a/lib/librte_flow_classify/rte_flow_classify.h b/lib/librte_flow_classify/rte_flow_classify.h new file mode 100644 index 00000000..1211873a --- /dev/null +++ b/lib/librte_flow_classify/rte_flow_classify.h @@ -0,0 +1,289 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_FLOW_CLASSIFY_H_ +#define _RTE_FLOW_CLASSIFY_H_ + +/** + * @file + * + * RTE Flow Classify Library + * + * @b EXPERIMENTAL: this API may change without prior notice + * + * This library provides flow record information with some measured properties. + * + * Application should define the flow and measurement criteria (action) for it. + * + * The Library doesn't maintain any flow records itself, instead flow + * information is returned to upper layer only for given packets. + * + * It is application's responsibility to call rte_flow_classifier_query() + * for a burst of packets, just after receiving them or before transmitting + * them. + * Application should provide the flow type interested in, measurement to apply + * to that flow in rte_flow_classify_table_entry_add() API, and should provide + * the rte_flow_classifier object and storage to put results in for the + * rte_flow_classifier_query() API. + * + * Usage: + * - application calls rte_flow_classifier_create() to create an + * rte_flow_classifier object. + * - application calls rte_flow_classify_table_create() to create a table + * in the rte_flow_classifier object. + * - application calls rte_flow_classify_table_entry_add() to add a rule to + * the table in the rte_flow_classifier object. + * - application calls rte_flow_classifier_query() in a polling manner, + * preferably after rte_eth_rx_burst(). This will cause the library to + * match packet information to flow information with some measurements. + * - rte_flow_classifier object can be destroyed when it is no longer needed + * with rte_flow_classifier_free() + */ + +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_flow.h> +#include <rte_acl.h> +#include <rte_table_acl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern int librte_flow_classify_logtype; + +#define RTE_FLOW_CLASSIFY_LOG(level, fmt, args...) \ +rte_log(RTE_LOG_ ## level, librte_flow_classify_logtype, "%s(): " fmt, \ + __func__, ## args) + +/** Opaque data type for flow classifier */ +struct rte_flow_classifier; + +/** Opaque data type for flow classify rule */ +struct rte_flow_classify_rule; + +/** Flow classify rule type */ +enum rte_flow_classify_rule_type { + /** no type */ + RTE_FLOW_CLASSIFY_RULE_TYPE_NONE, + /** IPv4 5tuple type */ + RTE_FLOW_CLASSIFY_RULE_TYPE_IPV4_5TUPLE, +}; + +/** Flow classify table type */ +enum rte_flow_classify_table_type { + /** no type */ + RTE_FLOW_CLASSIFY_TABLE_TYPE_NONE, + /** ACL type */ + RTE_FLOW_CLASSIFY_TABLE_TYPE_ACL, +}; + +/** + * Maximum number of tables allowed for any Flow Classifier instance. + * The value of this parameter cannot be changed. + */ +#define RTE_FLOW_CLASSIFY_TABLE_MAX 64 + +/** Parameters for flow classifier creation */ +struct rte_flow_classifier_params { + /** flow classifier name */ + const char *name; + + /** CPU socket ID where memory for the flow classifier and its */ + /** elements (tables) should be allocated */ + int socket_id; + + /** Table type */ + enum rte_flow_classify_table_type type; +}; + +/** Parameters for table creation */ +struct rte_flow_classify_table_params { + /** Table operations (specific to each table type) */ + struct rte_table_ops *ops; + + /** Opaque param to be passed to the table create operation */ + void *arg_create; +}; + +/** IPv4 5-tuple data */ +struct rte_flow_classify_ipv4_5tuple { + uint32_t dst_ip; /**< Destination IP address in big endian. */ + uint32_t dst_ip_mask; /**< Mask of destination IP address. */ + uint32_t src_ip; /**< Source IP address in big endian. */ + uint32_t src_ip_mask; /**< Mask of destination IP address. */ + uint16_t dst_port; /**< Destination port in big endian. */ + uint16_t dst_port_mask; /**< Mask of destination port. */ + uint16_t src_port; /**< Source Port in big endian. */ + uint16_t src_port_mask; /**< Mask of source port. */ + uint8_t proto; /**< L4 protocol. */ + uint8_t proto_mask; /**< Mask of L4 protocol. */ +}; + +/** + * Flow stats + * + * For the count action, stats can be returned by the query API. + * + * Storage for stats is provided by application. + */ +struct rte_flow_classify_stats { + void *stats; +}; + +struct rte_flow_classify_ipv4_5tuple_stats { + /** count of packets that match IPv4 5tuple pattern */ + uint64_t counter1; + /** IPv4 5tuple data */ + struct rte_flow_classify_ipv4_5tuple ipv4_5tuple; +}; + +/** + * Flow classifier create + * + * @param params + * Parameters for flow classifier creation + * @return + * Handle to flow classifier instance on success or NULL otherwise + */ +struct rte_flow_classifier * +rte_flow_classifier_create(struct rte_flow_classifier_params *params); + +/** + * Flow classifier free + * + * @param cls + * Handle to flow classifier instance + * @return + * 0 on success, error code otherwise + */ +int +rte_flow_classifier_free(struct rte_flow_classifier *cls); + +/** + * Flow classify table create + * + * @param cls + * Handle to flow classifier instance + * @param params + * Parameters for flow_classify table creation + * @param table_id + * Table ID. Valid only within the scope of table IDs of the current + * classifier. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int +rte_flow_classify_table_create(struct rte_flow_classifier *cls, + struct rte_flow_classify_table_params *params, + uint32_t *table_id); + +/** + * Add a flow classify rule to the flow_classifer table. + * + * @param[in] cls + * Flow classifier handle + * @param[in] table_id + * id of table + * @param[out] key_found + * returns 1 if key present already, 0 otherwise. + * @param[in] attr + * Flow rule attributes + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END pattern item). + * @param[out] error + * Perform verbose error reporting if not NULL. Structure + * initialised in case of error only. + * @return + * A valid handle in case of success, NULL otherwise. + */ +struct rte_flow_classify_rule * +rte_flow_classify_table_entry_add(struct rte_flow_classifier *cls, + uint32_t table_id, + int *key_found, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Delete a flow classify rule from the flow_classifer table. + * + * @param[in] cls + * Flow classifier handle + * @param[in] table_id + * id of table + * @param[in] rule + * Flow classify rule + * @return + * 0 on success, error code otherwise. + */ +int +rte_flow_classify_table_entry_delete(struct rte_flow_classifier *cls, + uint32_t table_id, + struct rte_flow_classify_rule *rule); + +/** + * Query flow classifier for given rule. + * + * @param[in] cls + * Flow classifier handle + * @param[in] table_id + * id of table + * @param[in] pkts + * Pointer to packets to process + * @param[in] nb_pkts + * Number of packets to process + * @param[in] rule + * Flow classify rule + * @param[in] stats + * Flow classify stats + * + * @return + * 0 on success, error code otherwise. + */ +int +rte_flow_classifier_query(struct rte_flow_classifier *cls, + uint32_t table_id, + struct rte_mbuf **pkts, + const uint16_t nb_pkts, + struct rte_flow_classify_rule *rule, + struct rte_flow_classify_stats *stats); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_FLOW_CLASSIFY_H_ */ diff --git a/lib/librte_flow_classify/rte_flow_classify_parse.c b/lib/librte_flow_classify/rte_flow_classify_parse.c new file mode 100644 index 00000000..dbfa1115 --- /dev/null +++ b/lib/librte_flow_classify/rte_flow_classify_parse.c @@ -0,0 +1,546 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_flow_classify.h> +#include "rte_flow_classify_parse.h" +#include <rte_flow_driver.h> + +struct classify_valid_pattern { + enum rte_flow_item_type *items; + parse_filter_t parse_filter; +}; + +static struct rte_flow_action action; + +/* Pattern for IPv4 5-tuple UDP filter */ +static enum rte_flow_item_type pattern_ntuple_1[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +/* Pattern for IPv4 5-tuple TCP filter */ +static enum rte_flow_item_type pattern_ntuple_2[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +/* Pattern for IPv4 5-tuple SCTP filter */ +static enum rte_flow_item_type pattern_ntuple_3[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static int +classify_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error); + +static struct classify_valid_pattern classify_supported_patterns[] = { + /* ntuple */ + { pattern_ntuple_1, classify_parse_ntuple_filter }, + { pattern_ntuple_2, classify_parse_ntuple_filter }, + { pattern_ntuple_3, classify_parse_ntuple_filter }, +}; + +struct rte_flow_action * +classify_get_flow_action(void) +{ + return &action; +} + +/* Find the first VOID or non-VOID item pointer */ +const struct rte_flow_item * +classify_find_first_item(const struct rte_flow_item *item, bool is_void) +{ + bool is_find; + + while (item->type != RTE_FLOW_ITEM_TYPE_END) { + if (is_void) + is_find = item->type == RTE_FLOW_ITEM_TYPE_VOID; + else + is_find = item->type != RTE_FLOW_ITEM_TYPE_VOID; + if (is_find) + break; + item++; + } + return item; +} + +/* Skip all VOID items of the pattern */ +void +classify_pattern_skip_void_item(struct rte_flow_item *items, + const struct rte_flow_item *pattern) +{ + uint32_t cpy_count = 0; + const struct rte_flow_item *pb = pattern, *pe = pattern; + + for (;;) { + /* Find a non-void item first */ + pb = classify_find_first_item(pb, false); + if (pb->type == RTE_FLOW_ITEM_TYPE_END) { + pe = pb; + break; + } + + /* Find a void item */ + pe = classify_find_first_item(pb + 1, true); + + cpy_count = pe - pb; + rte_memcpy(items, pb, sizeof(struct rte_flow_item) * cpy_count); + + items += cpy_count; + + if (pe->type == RTE_FLOW_ITEM_TYPE_END) { + pb = pe; + break; + } + + pb = pe + 1; + } + /* Copy the END item. */ + rte_memcpy(items, pe, sizeof(struct rte_flow_item)); +} + +/* Check if the pattern matches a supported item type array */ +static bool +classify_match_pattern(enum rte_flow_item_type *item_array, + struct rte_flow_item *pattern) +{ + struct rte_flow_item *item = pattern; + + while ((*item_array == item->type) && + (*item_array != RTE_FLOW_ITEM_TYPE_END)) { + item_array++; + item++; + } + + return (*item_array == RTE_FLOW_ITEM_TYPE_END && + item->type == RTE_FLOW_ITEM_TYPE_END); +} + +/* Find if there's parse filter function matched */ +parse_filter_t +classify_find_parse_filter_func(struct rte_flow_item *pattern) +{ + parse_filter_t parse_filter = NULL; + uint8_t i = 0; + + for (; i < RTE_DIM(classify_supported_patterns); i++) { + if (classify_match_pattern(classify_supported_patterns[i].items, + pattern)) { + parse_filter = + classify_supported_patterns[i].parse_filter; + break; + } + } + + return parse_filter; +} + +#define FLOW_RULE_MIN_PRIORITY 8 +#define FLOW_RULE_MAX_PRIORITY 0 + +#define NEXT_ITEM_OF_PATTERN(item, pattern, index)\ + do {\ + item = pattern + index;\ + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {\ + index++;\ + item = pattern + index;\ + } \ + } while (0) + +#define NEXT_ITEM_OF_ACTION(act, actions, index)\ + do {\ + act = actions + index;\ + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {\ + index++;\ + act = actions + index;\ + } \ + } while (0) + +/** + * Please aware there's an assumption for all the parsers. + * rte_flow_item is using big endian, rte_flow_attr and + * rte_flow_action are using CPU order. + * Because the pattern is used to describe the packets, + * normally the packets should use network order. + */ + +/** + * Parse the rule to see if it is a n-tuple rule. + * And get the n-tuple filter info BTW. + * pattern: + * The first not void item can be ETH or IPV4. + * The second not void item must be IPV4 if the first one is ETH. + * The third not void item must be UDP or TCP. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4 src_addr 192.168.1.20 0xFFFFFFFF + * dst_addr 192.167.3.50 0xFFFFFFFF + * next_proto_id 17 0xFF + * UDP/TCP/ src_port 80 0xFFFF + * SCTP dst_port 80 0xFFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +classify_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_ipv4 *ipv4_spec; + const struct rte_flow_item_ipv4 *ipv4_mask; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_item_udp *udp_spec; + const struct rte_flow_item_udp *udp_mask; + const struct rte_flow_item_sctp *sctp_spec; + const struct rte_flow_item_sctp *sctp_mask; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -EINVAL; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -EINVAL; + } + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -EINVAL; + } + + /* parse pattern */ + index = 0; + + /* the first not void item can be MAC or IPv4 */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + /* Skip Ethernet */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, + "Not supported last point for range"); + return -EINVAL; + + } + /* if the first item is MAC, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not supported by ntuple filter"); + return -EINVAL; + } + /* check if the next not void item is IPv4 */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not supported by ntuple filter"); + return -EINVAL; + } + } + + /* get the IPv4 info */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ntuple mask"); + return -EINVAL; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -EINVAL; + + } + + ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask; + /** + * Only support src & dst addresses, protocol, + * others should be masked. + */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.type_of_service || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.time_to_live || + ipv4_mask->hdr.hdr_checksum) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + filter->dst_ip_mask = ipv4_mask->hdr.dst_addr; + filter->src_ip_mask = ipv4_mask->hdr.src_addr; + filter->proto_mask = ipv4_mask->hdr.next_proto_id; + + ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec; + filter->dst_ip = ipv4_spec->hdr.dst_addr; + filter->src_ip = ipv4_spec->hdr.src_addr; + filter->proto = ipv4_spec->hdr.next_proto_id; + + /* check if the next not void item is TCP or UDP or SCTP */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_SCTP) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + /* get the TCP/UDP info */ + if (!item->spec || !item->mask) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ntuple mask"); + return -EINVAL; + } + + /*Not supported last point for range*/ + if (item->last) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -EINVAL; + + } + + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + + /** + * Only support src & dst ports, tcp flags, + * others should be masked. + */ + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + filter->dst_port_mask = tcp_mask->hdr.dst_port; + filter->src_port_mask = tcp_mask->hdr.src_port; + if (tcp_mask->hdr.tcp_flags == 0xFF) { + filter->flags |= RTE_NTUPLE_FLAGS_TCP_FLAG; + } else if (!tcp_mask->hdr.tcp_flags) { + filter->flags &= ~RTE_NTUPLE_FLAGS_TCP_FLAG; + } else { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + filter->dst_port = tcp_spec->hdr.dst_port; + filter->src_port = tcp_spec->hdr.src_port; + filter->tcp_flags = tcp_spec->hdr.tcp_flags; + } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { + udp_mask = (const struct rte_flow_item_udp *)item->mask; + + /** + * Only support src & dst ports, + * others should be masked. + */ + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + filter->dst_port_mask = udp_mask->hdr.dst_port; + filter->src_port_mask = udp_mask->hdr.src_port; + + udp_spec = (const struct rte_flow_item_udp *)item->spec; + filter->dst_port = udp_spec->hdr.dst_port; + filter->src_port = udp_spec->hdr.src_port; + } else { + sctp_mask = (const struct rte_flow_item_sctp *)item->mask; + + /** + * Only support src & dst ports, + * others should be masked. + */ + if (sctp_mask->hdr.tag || + sctp_mask->hdr.cksum) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + filter->dst_port_mask = sctp_mask->hdr.dst_port; + filter->src_port_mask = sctp_mask->hdr.src_port; + + sctp_spec = (const struct rte_flow_item_sctp *)item->spec; + filter->dst_port = sctp_spec->hdr.dst_port; + filter->src_port = sctp_spec->hdr.src_port; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -EINVAL; + } + + /* parse action */ + index = 0; + + /** + * n-tuple only supports count, + * check if the first not void action is COUNT. + */ + memset(&action, 0, sizeof(action)); + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_COUNT) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + item, "Not supported action."); + return -EINVAL; + } + action.type = RTE_FLOW_ACTION_TYPE_COUNT; + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -EINVAL; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -EINVAL; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -EINVAL; + } + + if (attr->priority > 0xFFFF) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Error priority."); + return -EINVAL; + } + filter->priority = (uint16_t)attr->priority; + if (attr->priority > FLOW_RULE_MIN_PRIORITY) + filter->priority = FLOW_RULE_MAX_PRIORITY; + + return 0; +} diff --git a/lib/librte_flow_classify/rte_flow_classify_parse.h b/lib/librte_flow_classify/rte_flow_classify_parse.h new file mode 100644 index 00000000..1d4708a7 --- /dev/null +++ b/lib/librte_flow_classify/rte_flow_classify_parse.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_FLOW_CLASSIFY_PARSE_H_ +#define _RTE_FLOW_CLASSIFY_PARSE_H_ + +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_flow.h> +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*parse_filter_t)(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error); + +/* Skip all VOID items of the pattern */ +void +classify_pattern_skip_void_item(struct rte_flow_item *items, + const struct rte_flow_item *pattern); + +/* Find the first VOID or non-VOID item pointer */ +const struct rte_flow_item * +classify_find_first_item(const struct rte_flow_item *item, bool is_void); + + +/* Find if there's parse filter function matched */ +parse_filter_t +classify_find_parse_filter_func(struct rte_flow_item *pattern); + +/* get action data */ +struct rte_flow_action * +classify_get_flow_action(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_FLOW_CLASSIFY_PARSE_H_ */ diff --git a/lib/librte_flow_classify/rte_flow_classify_version.map b/lib/librte_flow_classify/rte_flow_classify_version.map new file mode 100644 index 00000000..f7695cbf --- /dev/null +++ b/lib/librte_flow_classify/rte_flow_classify_version.map @@ -0,0 +1,12 @@ +EXPERIMENTAL { + global: + + rte_flow_classifier_create; + rte_flow_classifier_free; + rte_flow_classifier_query; + rte_flow_classify_table_create; + rte_flow_classify_table_entry_add; + rte_flow_classify_table_entry_delete; + + local: *; +}; diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile index 747eeec9..eb423ccb 100644 --- a/lib/librte_gro/Makefile +++ b/lib/librte_gro/Makefile @@ -36,6 +36,7 @@ LIB = librte_gro.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_net EXPORT_MAP := rte_gro_version.map diff --git a/lib/librte_gro/rte_gro_version.map b/lib/librte_gro/rte_gro_version.map index bb40bb41..1606b6dc 100644 --- a/lib/librte_gro/rte_gro_version.map +++ b/lib/librte_gro/rte_gro_version.map @@ -1,8 +1,8 @@ DPDK_17.08 { global: - rte_gro_ctrl_create; - rte_gro_ctrl_destroy; + rte_gro_ctx_create; + rte_gro_ctx_destroy; rte_gro_get_pkt_count; rte_gro_reassemble; rte_gro_reassemble_burst; diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile new file mode 100644 index 00000000..ea5ad742 --- /dev/null +++ b/lib/librte_gso/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_gso.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_net +LDLIBS += -lrte_mempool + +EXPORT_MAP := rte_gso_version.map + +LIBABIVER := 1 + +#source files +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c new file mode 100644 index 00000000..ee75d4cd --- /dev/null +++ b/lib/librte_gso/gso_common.c @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdbool.h> +#include <errno.h> + +#include <rte_memcpy.h> +#include <rte_mempool.h> + +#include "gso_common.h" + +static inline void +hdr_segment_init(struct rte_mbuf *hdr_segment, struct rte_mbuf *pkt, + uint16_t pkt_hdr_offset) +{ + /* Copy MBUF metadata */ + hdr_segment->nb_segs = 1; + hdr_segment->port = pkt->port; + hdr_segment->ol_flags = pkt->ol_flags; + hdr_segment->packet_type = pkt->packet_type; + hdr_segment->pkt_len = pkt_hdr_offset; + hdr_segment->data_len = pkt_hdr_offset; + hdr_segment->tx_offload = pkt->tx_offload; + + /* Copy the packet header */ + rte_memcpy(rte_pktmbuf_mtod(hdr_segment, char *), + rte_pktmbuf_mtod(pkt, char *), + pkt_hdr_offset); +} + +static inline void +free_gso_segment(struct rte_mbuf **pkts, uint16_t nb_pkts) +{ + uint16_t i; + + for (i = 0; i < nb_pkts; i++) + rte_pktmbuf_free(pkts[i]); +} + +int +gso_do_segment(struct rte_mbuf *pkt, + uint16_t pkt_hdr_offset, + uint16_t pyld_unit_size, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out) +{ + struct rte_mbuf *pkt_in; + struct rte_mbuf *hdr_segment, *pyld_segment, *prev_segment; + uint16_t pkt_in_data_pos, segment_bytes_remaining; + uint16_t pyld_len, nb_segs; + bool more_in_pkt, more_out_segs; + + pkt_in = pkt; + nb_segs = 0; + more_in_pkt = 1; + pkt_in_data_pos = pkt_hdr_offset; + + while (more_in_pkt) { + if (unlikely(nb_segs >= nb_pkts_out)) { + free_gso_segment(pkts_out, nb_segs); + return -EINVAL; + } + + /* Allocate a direct MBUF */ + hdr_segment = rte_pktmbuf_alloc(direct_pool); + if (unlikely(hdr_segment == NULL)) { + free_gso_segment(pkts_out, nb_segs); + return -ENOMEM; + } + /* Fill the packet header */ + hdr_segment_init(hdr_segment, pkt, pkt_hdr_offset); + + prev_segment = hdr_segment; + segment_bytes_remaining = pyld_unit_size; + more_out_segs = 1; + + while (more_out_segs && more_in_pkt) { + /* Allocate an indirect MBUF */ + pyld_segment = rte_pktmbuf_alloc(indirect_pool); + if (unlikely(pyld_segment == NULL)) { + rte_pktmbuf_free(hdr_segment); + free_gso_segment(pkts_out, nb_segs); + return -ENOMEM; + } + /* Attach to current MBUF segment of pkt */ + rte_pktmbuf_attach(pyld_segment, pkt_in); + + prev_segment->next = pyld_segment; + prev_segment = pyld_segment; + + pyld_len = segment_bytes_remaining; + if (pyld_len + pkt_in_data_pos > pkt_in->data_len) + pyld_len = pkt_in->data_len - pkt_in_data_pos; + + pyld_segment->data_off = pkt_in_data_pos + + pkt_in->data_off; + pyld_segment->data_len = pyld_len; + + /* Update header segment */ + hdr_segment->pkt_len += pyld_len; + hdr_segment->nb_segs++; + + pkt_in_data_pos += pyld_len; + segment_bytes_remaining -= pyld_len; + + /* Finish processing a MBUF segment of pkt */ + if (pkt_in_data_pos == pkt_in->data_len) { + pkt_in = pkt_in->next; + pkt_in_data_pos = 0; + if (pkt_in == NULL) + more_in_pkt = 0; + } + + /* Finish generating a GSO segment */ + if (segment_bytes_remaining == 0) + more_out_segs = 0; + } + pkts_out[nb_segs++] = hdr_segment; + } + return nb_segs; +} diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h new file mode 100644 index 00000000..145ea495 --- /dev/null +++ b/lib/librte_gso/gso_common.h @@ -0,0 +1,171 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GSO_COMMON_H_ +#define _GSO_COMMON_H_ + +#include <stdint.h> + +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> + +#define IS_FRAGMENTED(frag_off) (((frag_off) & IPV4_HDR_OFFSET_MASK) != 0 \ + || ((frag_off) & IPV4_HDR_MF_FLAG) == IPV4_HDR_MF_FLAG) + +#define TCP_HDR_PSH_MASK ((uint8_t)0x08) +#define TCP_HDR_FIN_MASK ((uint8_t)0x01) + +#define IS_IPV4_TCP(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4)) == \ + (PKT_TX_TCP_SEG | PKT_TX_IPV4)) + +#define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \ + PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \ + (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ + PKT_TX_TUNNEL_VXLAN)) + +#define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \ + PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_GRE)) == \ + (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ + PKT_TX_TUNNEL_GRE)) + +/** + * Internal function which updates the UDP header of a packet, following + * segmentation. This is required to update the header's datagram length field. + * + * @param pkt + * The packet containing the UDP header. + * @param udp_offset + * The offset of the UDP header from the start of the packet. + */ +static inline void +update_udp_header(struct rte_mbuf *pkt, uint16_t udp_offset) +{ + struct udp_hdr *udp_hdr; + + udp_hdr = (struct udp_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + udp_offset); + udp_hdr->dgram_len = rte_cpu_to_be_16(pkt->pkt_len - udp_offset); +} + +/** + * Internal function which updates the TCP header of a packet, following + * segmentation. This is required to update the header's 'sent' sequence + * number, and also to clear 'PSH' and 'FIN' flags for non-tail segments. + * + * @param pkt + * The packet containing the TCP header. + * @param l4_offset + * The offset of the TCP header from the start of the packet. + * @param sent_seq + * The sent sequence number. + * @param non-tail + * Indicates whether or not this is a tail segment. + */ +static inline void +update_tcp_header(struct rte_mbuf *pkt, uint16_t l4_offset, uint32_t sent_seq, + uint8_t non_tail) +{ + struct tcp_hdr *tcp_hdr; + + tcp_hdr = (struct tcp_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + l4_offset); + tcp_hdr->sent_seq = rte_cpu_to_be_32(sent_seq); + if (likely(non_tail)) + tcp_hdr->tcp_flags &= (~(TCP_HDR_PSH_MASK | + TCP_HDR_FIN_MASK)); +} + +/** + * Internal function which updates the IPv4 header of a packet, following + * segmentation. This is required to update the header's 'total_length' field, + * to reflect the reduced length of the now-segmented packet. Furthermore, the + * header's 'packet_id' field must be updated to reflect the new ID of the + * now-segmented packet. + * + * @param pkt + * The packet containing the IPv4 header. + * @param l3_offset + * The offset of the IPv4 header from the start of the packet. + * @param id + * The new ID of the packet. + */ +static inline void +update_ipv4_header(struct rte_mbuf *pkt, uint16_t l3_offset, uint16_t id) +{ + struct ipv4_hdr *ipv4_hdr; + + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + l3_offset); + ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - l3_offset); + ipv4_hdr->packet_id = rte_cpu_to_be_16(id); +} + +/** + * Internal function which divides the input packet into small segments. + * Each of the newly-created segments is organized as a two-segment MBUF, + * where the first segment is a standard mbuf, which stores a copy of + * packet header, and the second is an indirect mbuf which points to a + * section of data in the input packet. + * + * @param pkt + * Packet to segment. + * @param pkt_hdr_offset + * Packet header offset, measured in bytes. + * @param pyld_unit_size + * The max payload length of a GSO segment. + * @param direct_pool + * MBUF pool used for allocating direct buffers for output segments. + * @param indirect_pool + * MBUF pool used for allocating indirect buffers for output segments. + * @param pkts_out + * Pointer array used to keep the mbuf addresses of output segments. If + * the memory space in pkts_out is insufficient, gso_do_segment() fails + * and returns -EINVAL. + * @param nb_pkts_out + * The max number of items that pkts_out can keep. + * + * @return + * - The number of segments created in the event of success. + * - Return -ENOMEM if run out of memory in MBUF pools. + * - Return -EINVAL for invalid parameters. + */ +int gso_do_segment(struct rte_mbuf *pkt, + uint16_t pkt_hdr_offset, + uint16_t pyld_unit_size, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out); +#endif diff --git a/lib/librte_gso/gso_tcp4.c b/lib/librte_gso/gso_tcp4.c new file mode 100644 index 00000000..0c628cb1 --- /dev/null +++ b/lib/librte_gso/gso_tcp4.c @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gso_common.h" +#include "gso_tcp4.h" + +static void +update_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta, + struct rte_mbuf **segs, uint16_t nb_segs) +{ + struct ipv4_hdr *ipv4_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t sent_seq; + uint16_t id, tail_idx, i; + uint16_t l3_offset = pkt->l2_len; + uint16_t l4_offset = l3_offset + pkt->l3_len; + + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char*) + + l3_offset); + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + tail_idx = nb_segs - 1; + + for (i = 0; i < nb_segs; i++) { + update_ipv4_header(segs[i], l3_offset, id); + update_tcp_header(segs[i], l4_offset, sent_seq, i < tail_idx); + id += ipid_delta; + sent_seq += (segs[i]->pkt_len - segs[i]->data_len); + } +} + +int +gso_tcp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + uint8_t ipid_delta, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out) +{ + struct ipv4_hdr *ipv4_hdr; + uint16_t pyld_unit_size, hdr_offset; + uint16_t frag_off; + int ret; + + /* Don't process the fragmented packet */ + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + pkt->l2_len); + frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); + if (unlikely(IS_FRAGMENTED(frag_off))) { + pkts_out[0] = pkt; + return 1; + } + + /* Don't process the packet without data */ + hdr_offset = pkt->l2_len + pkt->l3_len + pkt->l4_len; + if (unlikely(hdr_offset >= pkt->pkt_len)) { + pkts_out[0] = pkt; + return 1; + } + + pyld_unit_size = gso_size - hdr_offset; + + /* Segment the payload */ + ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool, + indirect_pool, pkts_out, nb_pkts_out); + if (ret > 1) + update_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret); + + return ret; +} diff --git a/lib/librte_gso/gso_tcp4.h b/lib/librte_gso/gso_tcp4.h new file mode 100644 index 00000000..1c574412 --- /dev/null +++ b/lib/librte_gso/gso_tcp4.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GSO_TCP4_H_ +#define _GSO_TCP4_H_ + +#include <stdint.h> +#include <rte_mbuf.h> + +/** + * Segment an IPv4/TCP packet. This function doesn't check if the input + * packet has correct checksums, and doesn't update checksums for output + * GSO segments. Furthermore, it doesn't process IP fragment packets. + * + * @param pkt + * The packet mbuf to segment. + * @param gso_size + * The max length of a GSO segment, measured in bytes. + * @param ipid_delta + * The increasing unit of IP ids. + * @param direct_pool + * MBUF pool used for allocating direct buffers for output segments. + * @param indirect_pool + * MBUF pool used for allocating indirect buffers for output segments. + * @param pkts_out + * Pointer array used to store the MBUF addresses of output GSO + * segments, when the function succeeds. If the memory space in + * pkts_out is insufficient, it fails and returns -EINVAL. + * @param nb_pkts_out + * The max number of items that 'pkts_out' can keep. + * + * @return + * - The number of GSO segments filled in pkts_out on success. + * - Return -ENOMEM if run out of memory in MBUF pools. + * - Return -EINVAL for invalid parameters. + */ +int gso_tcp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + uint8_t ip_delta, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out); +#endif diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c new file mode 100644 index 00000000..8d0cfd7a --- /dev/null +++ b/lib/librte_gso/gso_tunnel_tcp4.c @@ -0,0 +1,126 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gso_common.h" +#include "gso_tunnel_tcp4.h" + +static void +update_tunnel_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta, + struct rte_mbuf **segs, uint16_t nb_segs) +{ + struct ipv4_hdr *ipv4_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t sent_seq; + uint16_t outer_id, inner_id, tail_idx, i; + uint16_t outer_ipv4_offset, inner_ipv4_offset; + uint16_t udp_gre_offset, tcp_offset; + uint8_t update_udp_hdr; + + outer_ipv4_offset = pkt->outer_l2_len; + udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len; + inner_ipv4_offset = udp_gre_offset + pkt->l2_len; + tcp_offset = inner_ipv4_offset + pkt->l3_len; + + /* Outer IPv4 header. */ + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + outer_ipv4_offset); + outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + + /* Inner IPv4 header. */ + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + inner_ipv4_offset); + inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + tail_idx = nb_segs - 1; + + /* Only update UDP header for VxLAN packets. */ + update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0; + + for (i = 0; i < nb_segs; i++) { + update_ipv4_header(segs[i], outer_ipv4_offset, outer_id); + if (update_udp_hdr) + update_udp_header(segs[i], udp_gre_offset); + update_ipv4_header(segs[i], inner_ipv4_offset, inner_id); + update_tcp_header(segs[i], tcp_offset, sent_seq, i < tail_idx); + outer_id++; + inner_id += ipid_delta; + sent_seq += (segs[i]->pkt_len - segs[i]->data_len); + } +} + +int +gso_tunnel_tcp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + uint8_t ipid_delta, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out) +{ + struct ipv4_hdr *inner_ipv4_hdr; + uint16_t pyld_unit_size, hdr_offset, frag_off; + int ret = 1; + + hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len; + inner_ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + hdr_offset); + /* + * Don't process the packet whose MF bit or offset in the inner + * IPv4 header are non-zero. + */ + frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset); + if (unlikely(IS_FRAGMENTED(frag_off))) { + pkts_out[0] = pkt; + return 1; + } + + hdr_offset += pkt->l3_len + pkt->l4_len; + /* Don't process the packet without data */ + if (hdr_offset >= pkt->pkt_len) { + pkts_out[0] = pkt; + return 1; + } + pyld_unit_size = gso_size - hdr_offset; + + /* Segment the payload */ + ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool, + indirect_pool, pkts_out, nb_pkts_out); + if (ret <= 1) + return ret; + + update_tunnel_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret); + + return ret; +} diff --git a/lib/librte_gso/gso_tunnel_tcp4.h b/lib/librte_gso/gso_tunnel_tcp4.h new file mode 100644 index 00000000..3c67f0cd --- /dev/null +++ b/lib/librte_gso/gso_tunnel_tcp4.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GSO_TUNNEL_TCP4_H_ +#define _GSO_TUNNEL_TCP4_H_ + +#include <stdint.h> +#include <rte_mbuf.h> + +/** + * Segment a tunneling packet with inner TCP/IPv4 headers. This function + * doesn't check if the input packet has correct checksums, and doesn't + * update checksums for output GSO segments. Furthermore, it doesn't + * process IP fragment packets. + * + * @param pkt + * The packet mbuf to segment. + * @param gso_size + * The max length of a GSO segment, measured in bytes. + * @param ipid_delta + * The increasing unit of IP ids. + * @param direct_pool + * MBUF pool used for allocating direct buffers for output segments. + * @param indirect_pool + * MBUF pool used for allocating indirect buffers for output segments. + * @param pkts_out + * Pointer array used to store the MBUF addresses of output GSO + * segments, when it succeeds. If the memory space in pkts_out is + * insufficient, it fails and returns -EINVAL. + * @param nb_pkts_out + * The max number of items that 'pkts_out' can keep. + * + * @return + * - The number of GSO segments filled in pkts_out on success. + * - Return -ENOMEM if run out of memory in MBUF pools. + * - Return -EINVAL for invalid parameters. + */ +int gso_tunnel_tcp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + uint8_t ipid_delta, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out); +#endif diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c new file mode 100644 index 00000000..f86e6541 --- /dev/null +++ b/lib/librte_gso/rte_gso.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> + +#include <rte_log.h> +#include <rte_ethdev.h> + +#include "rte_gso.h" +#include "gso_common.h" +#include "gso_tcp4.h" +#include "gso_tunnel_tcp4.h" + +int +rte_gso_segment(struct rte_mbuf *pkt, + const struct rte_gso_ctx *gso_ctx, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out) +{ + struct rte_mempool *direct_pool, *indirect_pool; + struct rte_mbuf *pkt_seg; + uint64_t ol_flags; + uint16_t gso_size; + uint8_t ipid_delta; + int ret = 1; + + if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL || + nb_pkts_out < 1 || + gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN || + ((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0)) + return -EINVAL; + + if (gso_ctx->gso_size >= pkt->pkt_len) { + pkt->ol_flags &= (~PKT_TX_TCP_SEG); + pkts_out[0] = pkt; + return 1; + } + + direct_pool = gso_ctx->direct_pool; + indirect_pool = gso_ctx->indirect_pool; + gso_size = gso_ctx->gso_size; + ipid_delta = (gso_ctx->flag != RTE_GSO_FLAG_IPID_FIXED); + ol_flags = pkt->ol_flags; + + if ((IS_IPV4_VXLAN_TCP4(pkt->ol_flags) && + (gso_ctx->gso_types & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) || + ((IS_IPV4_GRE_TCP4(pkt->ol_flags) && + (gso_ctx->gso_types & DEV_TX_OFFLOAD_GRE_TNL_TSO)))) { + pkt->ol_flags &= (~PKT_TX_TCP_SEG); + ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta, + direct_pool, indirect_pool, + pkts_out, nb_pkts_out); + } else if (IS_IPV4_TCP(pkt->ol_flags) && + (gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) { + pkt->ol_flags &= (~PKT_TX_TCP_SEG); + ret = gso_tcp4_segment(pkt, gso_size, ipid_delta, + direct_pool, indirect_pool, + pkts_out, nb_pkts_out); + } else { + /* unsupported packet, skip */ + pkts_out[0] = pkt; + RTE_LOG(DEBUG, GSO, "Unsupported packet type\n"); + return 1; + } + + if (ret > 1) { + pkt_seg = pkt; + while (pkt_seg) { + rte_mbuf_refcnt_update(pkt_seg, -1); + pkt_seg = pkt_seg->next; + } + } else if (ret < 0) { + /* Revert the ol_flags in the event of failure. */ + pkt->ol_flags = ol_flags; + } + + return ret; +} diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h new file mode 100644 index 00000000..4b77176f --- /dev/null +++ b/lib/librte_gso/rte_gso.h @@ -0,0 +1,148 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_GSO_H_ +#define _RTE_GSO_H_ + +/** + * @file + * Interface to GSO library + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <rte_mbuf.h> + +/* Minimum GSO segment size. */ +#define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \ + sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1) + +/* GSO flags for rte_gso_ctx. */ +#define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0) +/**< Use fixed IP ids for output GSO segments. Setting + * 0 indicates using incremental IP ids. + */ + +/** + * GSO context structure. + */ +struct rte_gso_ctx { + struct rte_mempool *direct_pool; + /**< MBUF pool for allocating direct buffers, which are used + * to store packet headers for GSO segments. + */ + struct rte_mempool *indirect_pool; + /**< MBUF pool for allocating indirect buffers, which are used + * to locate packet payloads for GSO segments. The indirect + * buffer doesn't contain any data, but simply points to an + * offset within the packet to segment. + */ + uint64_t flag; + /**< flag that controls specific attributes of output segments, + * such as the type of IP ID generated (i.e. fixed or incremental). + */ + uint32_t gso_types; + /**< the bit mask of required GSO types. The GSO library + * uses the same macros as that of describing device TX + * offloading capabilities (i.e. DEV_TX_OFFLOAD_*_TSO) for + * gso_types. + * + * For example, if applications want to segment TCP/IPv4 + * packets, set DEV_TX_OFFLOAD_TCP_TSO in gso_types. + */ + uint16_t gso_size; + /**< maximum size of an output GSO segment, including packet + * header and payload, measured in bytes. Must exceed + * RTE_GSO_SEG_SIZE_MIN. + */ +}; + +/** + * Segmentation function, which supports processing of both single- and + * multi- MBUF packets. + * + * Note that we refer to the packets that are segmented from the input + * packet as 'GSO segments'. rte_gso_segment() doesn't check if the + * input packet has correct checksums, and doesn't update checksums for + * output GSO segments. Additionally, it doesn't process IP fragment + * packets. + * + * Before calling rte_gso_segment(), applications must set proper ol_flags + * for the packet. The GSO library uses the same macros as that of TSO. + * For example, set PKT_TX_TCP_SEG and PKT_TX_IPV4 in ol_flags to segment + * a TCP/IPv4 packet. If rte_gso_segment() succceds, the PKT_TX_TCP_SEG + * flag is removed for all GSO segments and the input packet. + * + * Each of the newly-created GSO segments is organized as a two-segment + * MBUF, where the first segment is a standard MBUF, which stores a copy + * of packet header, and the second is an indirect MBUF which points to + * a section of data in the input packet. Since each GSO segment has + * multiple MBUFs (i.e. typically 2 MBUFs), the driver of the interface which + * the GSO segments are sent to should support transmission of multi-segment + * packets. + * + * If the input packet is GSO'd, its mbuf refcnt reduces by 1. Therefore, + * when all GSO segments are freed, the input packet is freed automatically. + * + * If the memory space in pkts_out or MBUF pools is insufficient, this + * function fails, and it returns (-1) * errno. Otherwise, GSO succeeds, + * and this function returns the number of output GSO segments filled in + * pkts_out. + * + * @param pkt + * The packet mbuf to segment. + * @param ctx + * GSO context object pointer. + * @param pkts_out + * Pointer array used to store the MBUF addresses of output GSO + * segments, when rte_gso_segment() succeeds. + * @param nb_pkts_out + * The max number of items that pkts_out can keep. + * + * @return + * - The number of GSO segments filled in pkts_out on success. + * - Return -ENOMEM if run out of memory in MBUF pools. + * - Return -EINVAL for invalid parameters. + */ +int rte_gso_segment(struct rte_mbuf *pkt, + const struct rte_gso_ctx *ctx, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out); +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_GSO_H_ */ diff --git a/lib/librte_gso/rte_gso_version.map b/lib/librte_gso/rte_gso_version.map new file mode 100644 index 00000000..e1fd453e --- /dev/null +++ b/lib/librte_gso/rte_gso_version.map @@ -0,0 +1,7 @@ +DPDK_17.11 { + global: + + rte_gso_segment; + + local: *; +}; diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index 9cf13a04..1655b601 100644 --- a/lib/librte_hash/Makefile +++ b/lib/librte_hash/Makefile @@ -36,6 +36,7 @@ LIB = librte_hash.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_ring EXPORT_MAP := rte_hash_version.map diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h index 774428be..a3c216bb 100644 --- a/lib/librte_hash/rte_crc_arm64.h +++ b/lib/librte_hash/rte_crc_arm64.h @@ -116,8 +116,7 @@ rte_hash_crc_set_alg(uint8_t alg) } /* Setting the best available algorithm */ -static inline void __attribute__((constructor)) -rte_hash_crc_init_alg(void) +RTE_INIT(rte_hash_crc_init_alg) { rte_hash_crc_set_alg(CRC32_ARM64); } diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 87b25c01..55fd7bdc 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -44,7 +44,6 @@ #include <rte_memcpy.h> #include <rte_prefetch.h> #include <rte_branch_prediction.h> -#include <rte_memzone.h> #include <rte_malloc.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -417,9 +416,9 @@ rte_hash_reset(struct rte_hash *h) /* Search for an entry that can be pushed to its alternative location */ static inline int -make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) +make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt, + unsigned int *nr_pushes) { - static unsigned int nr_pushes; unsigned i, j; int ret; uint32_t next_bucket_idx; @@ -456,15 +455,14 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) break; /* All entries have been pushed, so entry cannot be added */ - if (i == RTE_HASH_BUCKET_ENTRIES || nr_pushes > RTE_HASH_MAX_PUSHES) + if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES) return -ENOSPC; /* Set flag to indicate that this entry is going to be pushed */ bkt->flag[i] = 1; - nr_pushes++; /* Need room in alternative bucket to insert the pushed entry */ - ret = make_space_bucket(h, next_bkt[i]); + ret = make_space_bucket(h, next_bkt[i], nr_pushes); /* * After recursive function. * Clear flags and insert the pushed entry @@ -472,7 +470,6 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) * or return error */ bkt->flag[i] = 0; - nr_pushes = 0; if (ret >= 0) { next_bkt[i]->sig_alt[ret] = bkt->sig_current[i]; next_bkt[i]->sig_current[ret] = bkt->sig_alt[i]; @@ -515,6 +512,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, unsigned n_slots; unsigned lcore_id; struct lcore_cache *cached_free_slots = NULL; + unsigned int nr_pushes = 0; if (h->add_key == ADD_KEY_MULTIWRITER) rte_spinlock_lock(h->multiwriter_lock); @@ -648,7 +646,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, * if successful or return error and * store the new slot back in the ring */ - ret = make_space_bucket(h, prim_bkt); + ret = make_space_bucket(h, prim_bkt, &nr_pushes); if (ret >= 0) { prim_bkt->sig_current[ret] = sig; prim_bkt->sig_alt[ret] = alt_hash; diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c index 55c9f358..c87719fb 100644 --- a/lib/librte_hash/rte_fbk_hash.c +++ b/lib/librte_hash/rte_fbk_hash.c @@ -39,7 +39,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> #include <rte_malloc.h> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index ea6be522..4f815aea 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -480,8 +480,7 @@ rte_hash_crc_set_alg(uint8_t alg) } /* Setting the best available algorithm */ -static inline void __attribute__((constructor)) -rte_hash_crc_init_alg(void) +RTE_INIT(rte_hash_crc_init_alg) { rte_hash_crc_set_alg(CRC32_SSE42_x64); } diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h index 207478c2..3eca1385 100644 --- a/lib/librte_hash/rte_jhash.h +++ b/lib/librte_hash/rte_jhash.h @@ -290,7 +290,10 @@ rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t /** * The most generic version, hashes an arbitrary sequence * of bytes. No alignment or length assumptions are made about - * the input key. + * the input key. For keys not aligned to four byte boundaries + * or a multiple of four bytes in length, the memory region + * just after may be read (but not used in the computation). + * This may cross a page boundary. * * @param key * Key to calculate hash of. diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 2fffd61d..4fa5e07a 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -207,15 +207,14 @@ static inline uint32_t rte_softrss(uint32_t *input_tuple, uint32_t input_len, const uint8_t *rss_key) { - uint32_t i, j, ret = 0; + uint32_t i, j, map, ret = 0; for (j = 0; j < input_len; j++) { - for (i = 0; i < 32; i++) { - if (input_tuple[j] & (1 << (31 - i))) { - ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << i | + for (map = input_tuple[j]; map; map &= (map - 1)) { + i = rte_bsf32(map); + ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << (31 - i) | (uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >> - (32 - i)); - } + (i + 1)); } } return ret; @@ -238,14 +237,13 @@ static inline uint32_t rte_softrss_be(uint32_t *input_tuple, uint32_t input_len, const uint8_t *rss_key) { - uint32_t i, j, ret = 0; + uint32_t i, j, map, ret = 0; for (j = 0; j < input_len; j++) { - for (i = 0; i < 32; i++) { - if (input_tuple[j] & (1 << (31 - i))) { - ret ^= ((const uint32_t *)rss_key)[j] << i | - (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (32 - i)); - } + for (map = input_tuple[j]; map; map &= (map - 1)) { + i = rte_bsf32(map); + ret ^= ((const uint32_t *)rss_key)[j] << (31 - i) | + (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (i + 1)); } } return ret; diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile index 4e693bf8..aff94b8c 100644 --- a/lib/librte_ip_frag/Makefile +++ b/lib/librte_ip_frag/Makefile @@ -36,8 +36,10 @@ LIB = librte_ip_frag.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev +LDLIBS += -lrte_hash -EXPORT_MAP := rte_ipfrag_version.map +EXPORT_MAP := rte_ip_frag_version.map LIBABIVER := 1 diff --git a/lib/librte_ip_frag/rte_ipfrag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map index d1acf07c..d1acf07c 100644 --- a/lib/librte_ip_frag/rte_ipfrag_version.map +++ b/lib/librte_ip_frag/rte_ip_frag_version.map diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile index 561a0678..d0bddd12 100644 --- a/lib/librte_jobstats/Makefile +++ b/lib/librte_jobstats/Makefile @@ -36,6 +36,7 @@ LIB = librte_jobstats.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal EXPORT_MAP := rte_jobstats_version.map diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h index 7e76fd50..70e034ca 100644 --- a/lib/librte_jobstats/rte_jobstats.h +++ b/lib/librte_jobstats/rte_jobstats.h @@ -117,7 +117,7 @@ struct rte_jobstats_context { /**< Minimum loop execute time. */ uint64_t max_exec_time; - /**< Minimum loop execute time. */ + /**< Maximum loop execute time. */ /** * Sum of time that is not the execute time (ex: from job finish to next diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile index 70f1ca8f..56b19760 100644 --- a/lib/librte_kni/Makefile +++ b/lib/librte_kni/Makefile @@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_kni.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev EXPORT_MAP := rte_kni_version.map diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index 8c483c1f..5ee38e9a 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -456,7 +456,7 @@ va2pa(struct rte_mbuf *m) { return (void *)((unsigned long)m - ((unsigned long)m->buf_addr - - (unsigned long)m->buf_physaddr)); + (unsigned long)m->buf_iova)); } static void diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h index 37deb472..d1950791 100644 --- a/lib/librte_kni/rte_kni.h +++ b/lib/librte_kni/rte_kni.h @@ -63,13 +63,13 @@ struct rte_mbuf; * Structure which has the function pointers for KNI interface. */ struct rte_kni_ops { - uint8_t port_id; /* Port ID */ + uint16_t port_id; /* Port ID */ /* Pointer to function of changing MTU */ - int (*change_mtu)(uint8_t port_id, unsigned new_mtu); + int (*change_mtu)(uint16_t port_id, unsigned int new_mtu); /* Pointer to function of configuring network interface */ - int (*config_network_if)(uint8_t port_id, uint8_t if_up); + int (*config_network_if)(uint16_t port_id, uint8_t if_up); }; /** @@ -118,7 +118,7 @@ void rte_kni_init(unsigned int max_kni_ifaces); * elements for each KNI interface allocated. * * @param pktmbuf_pool - * The mempool for allocting mbufs for packets. + * The mempool for allocating mbufs for packets. * @param conf * The pointer to the configurations of the KNI device. * @param ops diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile index 564dd310..4eaa9334 100644 --- a/lib/librte_kvargs/Makefile +++ b/lib/librte_kvargs/Makefile @@ -37,6 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_kvargs.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal EXPORT_MAP := rte_kvargs_version.map diff --git a/lib/librte_latencystats/Makefile b/lib/librte_latencystats/Makefile index eaacbb73..665c7b41 100644 --- a/lib/librte_latencystats/Makefile +++ b/lib/librte_latencystats/Makefile @@ -36,6 +36,7 @@ LIB = librte_latencystats.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 LDLIBS += -lm LDLIBS += -lpthread +LDLIBS += -lrte_eal -lrte_metrics -lrte_ethdev -lrte_mbuf EXPORT_MAP := rte_latencystats_version.map diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c index ce029a12..d6ad13c4 100644 --- a/lib/librte_latencystats/rte_latencystats.c +++ b/lib/librte_latencystats/rte_latencystats.c @@ -135,7 +135,7 @@ rte_latencystats_fill_values(struct rte_metric_value *values) } static uint16_t -add_time_stamps(uint8_t pid __rte_unused, +add_time_stamps(uint16_t pid __rte_unused, uint16_t qid __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, @@ -165,7 +165,7 @@ add_time_stamps(uint8_t pid __rte_unused, } static uint16_t -calc_latency(uint8_t pid __rte_unused, +calc_latency(uint16_t pid __rte_unused, uint16_t qid __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, @@ -226,10 +226,10 @@ rte_latencystats_init(uint64_t app_samp_intvl, rte_latency_stats_flow_type_fn user_cb) { unsigned int i; - uint8_t pid; + uint16_t pid; uint16_t qid; struct rxtx_cbs *cbs = NULL; - const uint8_t nb_ports = rte_eth_dev_count(); + const uint16_t nb_ports = rte_eth_dev_count(); const char *ptr_strings[NUM_LATENCY_STATS] = {0}; const struct rte_memzone *mz = NULL; const unsigned int flags = 0; @@ -290,11 +290,11 @@ rte_latencystats_init(uint64_t app_samp_intvl, int rte_latencystats_uninit(void) { - uint8_t pid; + uint16_t pid; uint16_t qid; int ret = 0; struct rxtx_cbs *cbs = NULL; - const uint8_t nb_ports = rte_eth_dev_count(); + const uint16_t nb_ports = rte_eth_dev_count(); /** De register Rx/Tx callbacks */ for (pid = 0; pid < nb_ports; pid++) { diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile index 32be46b3..2e8749e8 100644 --- a/lib/librte_lpm/Makefile +++ b/lib/librte_lpm/Makefile @@ -36,6 +36,7 @@ LIB = librte_lpm.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal EXPORT_MAP := rte_lpm_version.map diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c index 64c074e9..e1f1fad5 100644 --- a/lib/librte_lpm/rte_lpm.c +++ b/lib/librte_lpm/rte_lpm.c @@ -43,7 +43,6 @@ #include <rte_common.h> #include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */ #include <rte_malloc.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> #include <rte_per_lcore.h> @@ -218,6 +217,7 @@ rte_lpm_create_v20(const char *name, int socket_id, int max_rules, te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0); if (te == NULL) { RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n"); + rte_errno = ENOMEM; goto exit; } @@ -227,6 +227,7 @@ rte_lpm_create_v20(const char *name, int socket_id, int max_rules, if (lpm == NULL) { RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); rte_free(te); + rte_errno = ENOMEM; goto exit; } @@ -292,6 +293,7 @@ rte_lpm_create_v1604(const char *name, int socket_id, te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0); if (te == NULL) { RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n"); + rte_errno = ENOMEM; goto exit; } @@ -301,6 +303,7 @@ rte_lpm_create_v1604(const char *name, int socket_id, if (lpm == NULL) { RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); rte_free(te); + rte_errno = ENOMEM; goto exit; } @@ -312,6 +315,7 @@ rte_lpm_create_v1604(const char *name, int socket_id, rte_free(lpm); lpm = NULL; rte_free(te); + rte_errno = ENOMEM; goto exit; } @@ -324,6 +328,7 @@ rte_lpm_create_v1604(const char *name, int socket_id, rte_free(lpm); lpm = NULL; rte_free(te); + rte_errno = ENOMEM; goto exit; } diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c index b4a7df34..03668d9e 100644 --- a/lib/librte_lpm/rte_lpm6.c +++ b/lib/librte_lpm/rte_lpm6.c @@ -42,7 +42,6 @@ #include <rte_common.h> #include <rte_memory.h> #include <rte_malloc.h> -#include <rte_memzone.h> #include <rte_memcpy.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -191,6 +190,7 @@ rte_lpm6_create(const char *name, int socket_id, te = rte_zmalloc("LPM6_TAILQ_ENTRY", sizeof(*te), 0); if (te == NULL) { RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n"); + rte_errno = ENOMEM; goto exit; } @@ -201,6 +201,7 @@ rte_lpm6_create(const char *name, int socket_id, if (lpm == NULL) { RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); rte_free(te); + rte_errno = ENOMEM; goto exit; } @@ -212,6 +213,7 @@ rte_lpm6_create(const char *name, int socket_id, rte_free(lpm); lpm = NULL; rte_free(te); + rte_errno = ENOMEM; goto exit; } @@ -518,7 +520,7 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint32_t next_hop) { struct rte_lpm6_tbl_entry *tbl; - struct rte_lpm6_tbl_entry *tbl_next; + struct rte_lpm6_tbl_entry *tbl_next = NULL; int32_t rule_index; int status; uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE]; diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index 54827305..f6be3536 100644 --- a/lib/librte_mbuf/Makefile +++ b/lib/librte_mbuf/Makefile @@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_mbuf.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal -lrte_mempool EXPORT_MAP := rte_mbuf_version.map diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c index 26a62b8e..2e08b9e9 100644 --- a/lib/librte_mbuf/rte_mbuf.c +++ b/lib/librte_mbuf/rte_mbuf.c @@ -46,7 +46,6 @@ #include <rte_common.h> #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_per_lcore.h> @@ -135,7 +134,7 @@ rte_pktmbuf_init(struct rte_mempool *mp, /* start of buffer is after mbuf structure and priv data */ m->priv_size = priv_size; m->buf_addr = (char *)m + mbuf_size; - m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; + m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size; m->buf_len = (uint16_t)buf_len; /* keep some headroom between start of buffer and data */ @@ -144,7 +143,7 @@ rte_pktmbuf_init(struct rte_mempool *mp, /* init some constant fields */ m->pool = mp; m->nb_segs = 1; - m->port = 0xff; + m->port = MBUF_INVALID_PORT; rte_mbuf_refcnt_set(m, 1); m->next = NULL; } @@ -157,6 +156,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n, { struct rte_mempool *mp; struct rte_pktmbuf_pool_private mbp_priv; + const char *mp_ops_name; unsigned elt_size; int ret; @@ -176,8 +176,8 @@ rte_pktmbuf_pool_create(const char *name, unsigned n, if (mp == NULL) return NULL; - ret = rte_mempool_set_ops_byname(mp, - RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); + mp_ops_name = rte_eal_mbuf_default_mempool_ops(); + ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL); if (ret != 0) { RTE_LOG(ERR, MBUF, "error setting mempool handler\n"); rte_mempool_free(mp); @@ -211,8 +211,8 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header) /* generic checks */ if (m->pool == NULL) rte_panic("bad mbuf pool\n"); - if (m->buf_physaddr == 0) - rte_panic("bad phys addr\n"); + if (m->buf_iova == 0) + rte_panic("bad IO addr\n"); if (m->buf_addr == NULL) rte_panic("bad virt addr\n"); @@ -243,8 +243,8 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) __rte_mbuf_sanity_check(m, 1); - fprintf(f, "dump mbuf at %p, phys=%"PRIx64", buf_len=%u\n", - m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len); + fprintf(f, "dump mbuf at %p, iova=%"PRIx64", buf_len=%u\n", + m, (uint64_t)m->buf_iova, (unsigned)m->buf_len); fprintf(f, " pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, " "in_port=%u\n", m->pkt_len, m->ol_flags, (unsigned)m->nb_segs, (unsigned)m->port); @@ -307,7 +307,7 @@ const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, const char *rte_get_rx_ol_flag_name(uint64_t mask) { switch (mask) { - case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT"; + case PKT_RX_VLAN: return "PKT_RX_VLAN"; case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH"; case PKT_RX_FDIR: return "PKT_RX_FDIR"; case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD"; @@ -323,6 +323,8 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask) case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED"; case PKT_RX_LRO: return "PKT_RX_LRO"; case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP"; + case PKT_RX_SEC_OFFLOAD: return "PKT_RX_SEC_OFFLOAD"; + case PKT_RX_SEC_OFFLOAD_FAILED: return "PKT_RX_SEC_OFFLOAD_FAILED"; default: return NULL; } } @@ -338,7 +340,7 @@ int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) { const struct flag_mask rx_flags[] = { - { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL }, + { PKT_RX_VLAN, PKT_RX_VLAN, NULL }, { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL }, { PKT_RX_FDIR, PKT_RX_FDIR, NULL }, { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL }, @@ -358,6 +360,9 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL }, { PKT_RX_LRO, PKT_RX_LRO, NULL }, { PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL }, + { PKT_RX_SEC_OFFLOAD, PKT_RX_SEC_OFFLOAD, NULL }, + { PKT_RX_SEC_OFFLOAD_FAILED, PKT_RX_SEC_OFFLOAD_FAILED, NULL }, + { PKT_RX_QINQ, PKT_RX_QINQ, NULL }, }; const char *name; unsigned int i; @@ -410,6 +415,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP"; case PKT_TX_MACSEC: return "PKT_TX_MACSEC"; + case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD"; default: return NULL; } } @@ -443,6 +449,7 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK, "PKT_TX_TUNNEL_NONE" }, { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL }, + { PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL }, }; const char *name; unsigned int i; diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index eaed7eee..6d91f7d3 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -89,12 +89,13 @@ extern "C" { */ /** - * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when - * the packet is recognized as a VLAN, but the behavior between PMDs - * was not the same. This flag is kept for some time to avoid breaking - * applications and should be replaced by PKT_RX_VLAN_STRIPPED. + * The RX packet is a 802.1q VLAN packet, and the tci has been + * saved in in mbuf->vlan_tci. + * If the flag PKT_RX_VLAN_STRIPPED is also present, the VLAN + * header has been stripped from mbuf data, else it is still + * present. */ -#define PKT_RX_VLAN_PKT (1ULL << 0) +#define PKT_RX_VLAN (1ULL << 0) #define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */ #define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */ @@ -123,6 +124,7 @@ extern "C" { * A vlan has been stripped by the hardware and its tci is saved in * mbuf->vlan_tci. This can only happen if vlan stripping is enabled * in the RX configuration of the PMD. + * When PKT_RX_VLAN_STRIPPED is set, PKT_RX_VLAN must also be set. */ #define PKT_RX_VLAN_STRIPPED (1ULL << 6) @@ -165,19 +167,13 @@ extern "C" { * The 2 vlans have been stripped by the hardware and their tci are * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer). * This can only happen if vlan stripping is enabled in the RX - * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED - * must also be set. + * configuration of the PMD. If this flag is set, + * When PKT_RX_QINQ_STRIPPED is set, the flags (PKT_RX_VLAN | + * PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ) must also be set. */ #define PKT_RX_QINQ_STRIPPED (1ULL << 15) /** - * Deprecated. - * RX packet with double VLAN stripped. - * This flag is replaced by PKT_RX_QINQ_STRIPPED. - */ -#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED - -/** * When packets are coalesced by a hardware or virtual driver, this flag * can be set in the RX mbuf, meaning that the m->tso_segsz field is * valid and is set to the segment size of original packets. @@ -189,11 +185,35 @@ extern "C" { */ #define PKT_RX_TIMESTAMP (1ULL << 17) +/** + * Indicate that security offload processing was applied on the RX packet. + */ +#define PKT_RX_SEC_OFFLOAD (1ULL << 18) + +/** + * Indicate that security offload processing failed on the RX packet. + */ +#define PKT_RX_SEC_OFFLOAD_FAILED (1ULL << 19) + +/** + * The RX packet is a double VLAN, and the outer tci has been + * saved in in mbuf->vlan_tci_outer. + * If the flag PKT_RX_QINQ_STRIPPED is also present, both VLANs + * headers have been stripped from mbuf data, else they are still + * present. + */ +#define PKT_RX_QINQ (1ULL << 20) + /* add new RX flags here */ /* add new TX flags here */ /** + * Request security offload processing on the TX packet. + */ +#define PKT_TX_SEC_OFFLOAD (1ULL << 43) + +/** * Offload the MACsec. This flag must be set by the application to enable * this offload feature for a packet to be transmitted. */ @@ -316,7 +336,8 @@ extern "C" { PKT_TX_QINQ_PKT | \ PKT_TX_VLAN_PKT | \ PKT_TX_TUNNEL_MASK | \ - PKT_TX_MACSEC) + PKT_TX_MACSEC | \ + PKT_TX_SEC_OFFLOAD) #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ @@ -411,7 +432,11 @@ struct rte_mbuf { * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes * working on vector drivers easier. */ - phys_addr_t buf_physaddr __rte_aligned(sizeof(phys_addr_t)); + RTE_STD_C11 + union { + rte_iova_t buf_iova; + rte_iova_t buf_physaddr; /**< deprecated */ + } __rte_aligned(sizeof(rte_iova_t)); /* next 8 bytes are initialised on RX descriptor rearm */ MARKER64 rearm_data; @@ -456,8 +481,21 @@ struct rte_mbuf { uint32_t l3_type:4; /**< (Outer) L3 type. */ uint32_t l4_type:4; /**< (Outer) L4 type. */ uint32_t tun_type:4; /**< Tunnel type. */ - uint32_t inner_l2_type:4; /**< Inner L2 type. */ - uint32_t inner_l3_type:4; /**< Inner L3 type. */ + RTE_STD_C11 + union { + uint8_t inner_esp_next_proto; + /**< ESP next protocol type, valid if + * RTE_PTYPE_TUNNEL_ESP tunnel type is set + * on both Tx and Rx. + */ + __extension__ + struct { + uint8_t inner_l2_type:4; + /**< Inner L2 type. */ + uint8_t inner_l3_type:4; + /**< Inner L3 type. */ + }; + }; uint32_t inner_l4_type:4; /**< Inner L4 type. */ }; }; @@ -587,21 +625,28 @@ rte_mbuf_prefetch_part2(struct rte_mbuf *m) static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp); /** - * Return the DMA address of the beginning of the mbuf data + * Return the IO address of the beginning of the mbuf data * * @param mb * The pointer to the mbuf. * @return - * The physical address of the beginning of the mbuf data + * The IO address of the beginning of the mbuf data */ +static inline rte_iova_t +rte_mbuf_data_iova(const struct rte_mbuf *mb) +{ + return mb->buf_iova + mb->data_off; +} + +__rte_deprecated static inline phys_addr_t rte_mbuf_data_dma_addr(const struct rte_mbuf *mb) { - return mb->buf_physaddr + mb->data_off; + return rte_mbuf_data_iova(mb); } /** - * Return the default DMA address of the beginning of the mbuf data + * Return the default IO address of the beginning of the mbuf data * * This function is used by drivers in their receive function, as it * returns the location where data should be written by the NIC, taking @@ -610,12 +655,19 @@ rte_mbuf_data_dma_addr(const struct rte_mbuf *mb) * @param mb * The pointer to the mbuf. * @return - * The physical address of the beginning of the mbuf data + * The IO address of the beginning of the mbuf data */ +static inline rte_iova_t +rte_mbuf_data_iova_default(const struct rte_mbuf *mb) +{ + return mb->buf_iova + RTE_PKTMBUF_HEADROOM; +} + +__rte_deprecated static inline phys_addr_t rte_mbuf_data_dma_addr_default(const struct rte_mbuf *mb) { - return mb->buf_physaddr + RTE_PKTMBUF_HEADROOM; + return rte_mbuf_data_iova_default(mb); } /** @@ -806,7 +858,7 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header); * For standard needs, prefer rte_pktmbuf_alloc(). * * The caller can expect that the following fields of the mbuf structure - * are initialized: buf_addr, buf_physaddr, buf_len, refcnt=1, nb_segs=1, + * are initialized: buf_addr, buf_iova, buf_len, refcnt=1, nb_segs=1, * next=NULL, pool, priv_size. The other fields must be initialized * by the caller. * @@ -1087,6 +1139,8 @@ static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) * @param m * The packet mbuf to be resetted. */ +#define MBUF_INVALID_PORT UINT16_MAX + static inline void rte_pktmbuf_reset(struct rte_mbuf *m) { m->next = NULL; @@ -1095,7 +1149,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) m->vlan_tci = 0; m->vlan_tci_outer = 0; m->nb_segs = 1; - m->port = 0xff; + m->port = MBUF_INVALID_PORT; m->ol_flags = 0; m->packet_type = 0; @@ -1214,7 +1268,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) rte_mbuf_refcnt_update(md, 1); mi->priv_size = m->priv_size; - mi->buf_physaddr = m->buf_physaddr; + mi->buf_iova = m->buf_iova; mi->buf_addr = m->buf_addr; mi->buf_len = m->buf_len; @@ -1262,7 +1316,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) m->priv_size = priv_size; m->buf_addr = (char *)m + mbuf_size; - m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; + m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size; m->buf_len = (uint16_t)buf_len; rte_pktmbuf_reset_headroom(m); m->data_len = 0; @@ -1524,7 +1578,7 @@ static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) #define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0) /** - * A macro that returns the physical address that points to an offset of the + * A macro that returns the IO address that points to an offset of the * start of the data in the mbuf * * @param m @@ -1532,17 +1586,24 @@ static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) * @param o * The offset into the data to calculate address from. */ +#define rte_pktmbuf_iova_offset(m, o) \ + (rte_iova_t)((m)->buf_iova + (m)->data_off + (o)) + +/* deprecated */ #define rte_pktmbuf_mtophys_offset(m, o) \ - (phys_addr_t)((m)->buf_physaddr + (m)->data_off + (o)) + rte_pktmbuf_iova_offset(m, o) /** - * A macro that returns the physical address that points to the start of the + * A macro that returns the IO address that points to the start of the * data in the mbuf * * @param m * The packet mbuf. */ -#define rte_pktmbuf_mtophys(m) rte_pktmbuf_mtophys_offset(m, 0) +#define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0) + +/* deprecated */ +#define rte_pktmbuf_mtophys(m) rte_pktmbuf_iova(m) /** * A macro that returns the length of the packet. diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c index e5c4fae3..a623226c 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.c +++ b/lib/librte_mbuf/rte_mbuf_ptype.c @@ -89,6 +89,9 @@ const char *rte_get_ptype_tunnel_name(uint32_t ptype) case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE"; case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE"; case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT"; + case RTE_PTYPE_TUNNEL_GTPC: return "TUNNEL_GTPC"; + case RTE_PTYPE_TUNNEL_GTPU: return "TUNNEL_GTPU"; + case RTE_PTYPE_TUNNEL_ESP: return "TUNNEL_ESP"; default: return "TUNNEL_UNKNOWN"; } } diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h index acd70bb6..5c62435c 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.h +++ b/lib/librte_mbuf/rte_mbuf_ptype.h @@ -383,6 +383,49 @@ extern "C" { */ #define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 /** + * GTP-C (GPRS Tunnelling Protocol) control tunneling packet type. + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=2123> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=2123> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'source port'=2123> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'source port'=2123> + */ +#define RTE_PTYPE_TUNNEL_GTPC 0x00007000 +/** + * GTP-U (GPRS Tunnelling Protocol) user data tunneling packet type. + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=2152> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=2152> + */ +#define RTE_PTYPE_TUNNEL_GTPU 0x00008000 +/** + * ESP (IP Encapsulating Security Payload) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=51> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=51> + */ +#define RTE_PTYPE_TUNNEL_ESP 0x00009000 +/** * Mask of tunneling packet types. */ #define RTE_PTYPE_TUNNEL_MASK 0x0000f000 diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_member/Makefile index be51a82a..f4cf101e 100644 --- a/lib/librte_eal/linuxapp/xen_dom0/Makefile +++ b/lib/librte_member/Makefile @@ -1,6 +1,6 @@ # BSD LICENSE # -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# Copyright(c) 2017 Intel Corporation. All rights reserved. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,23 +31,22 @@ include $(RTE_SDK)/mk/rte.vars.mk -# -# module name and path -# -MODULE = rte_dom0_mm +# library name +LIB = librte_member.a -# -# CFLAGS -# -MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 -MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h -MODULE_CFLAGS += -Wall -Werror +CFLAGS := -I$(SRCDIR) $(CFLAGS) +CFLAGS += $(WERROR_FLAGS) -O3 -# -# all source are stored in SRCS-y -# +LDLIBS += -lm +LDLIBS += -lrte_eal -lrte_hash + +EXPORT_MAP := rte_member_version.map -SRCS-y += dom0_mm_misc.c +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_MEMBER) += rte_member.c rte_member_ht.c rte_member_vbf.c +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_MEMBER)-include := rte_member.h -include $(RTE_SDK)/mk/rte.module.mk +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c new file mode 100644 index 00000000..cc9ea84a --- /dev/null +++ b/lib/librte_member/rte_member.c @@ -0,0 +1,336 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> + +#include <rte_eal.h> +#include <rte_eal_memconfig.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_errno.h> + +#include "rte_member.h" +#include "rte_member_ht.h" +#include "rte_member_vbf.h" + +int librte_member_logtype; + +TAILQ_HEAD(rte_member_list, rte_tailq_entry); +static struct rte_tailq_elem rte_member_tailq = { + .name = "RTE_MEMBER", +}; +EAL_REGISTER_TAILQ(rte_member_tailq) + +struct rte_member_setsum * +rte_member_find_existing(const char *name) +{ + struct rte_member_setsum *setsum = NULL; + struct rte_tailq_entry *te; + struct rte_member_list *member_list; + + member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, member_list, next) { + setsum = (struct rte_member_setsum *) te->data; + if (strncmp(name, setsum->name, RTE_MEMBER_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return setsum; +} + +void +rte_member_free(struct rte_member_setsum *setsum) +{ + struct rte_member_list *member_list; + struct rte_tailq_entry *te; + + if (setsum == NULL) + return; + member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list); + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, member_list, next) { + if (te->data == (void *)setsum) + break; + } + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + TAILQ_REMOVE(member_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + rte_member_free_ht(setsum); + break; + case RTE_MEMBER_TYPE_VBF: + rte_member_free_vbf(setsum); + break; + default: + break; + } + rte_free(setsum); + rte_free(te); +} + +struct rte_member_setsum * +rte_member_create(const struct rte_member_parameters *params) +{ + struct rte_tailq_entry *te; + struct rte_member_list *member_list; + struct rte_member_setsum *setsum; + int ret; + + if (params == NULL) { + rte_errno = EINVAL; + return NULL; + } + + if (params->key_len == 0 || + params->prim_hash_seed == params->sec_hash_seed) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, "Create setsummary with " + "invalid parameters\n"); + return NULL; + } + + member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, member_list, next) { + setsum = (struct rte_member_setsum *) te->data; + if (strncmp(params->name, setsum->name, + RTE_MEMBER_NAMESIZE) == 0) + break; + } + setsum = NULL; + if (te != NULL) { + rte_errno = EEXIST; + te = NULL; + goto error_unlock_exit; + } + te = rte_zmalloc("MEMBER_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_MEMBER_LOG(ERR, "tailq entry allocation failed\n"); + goto error_unlock_exit; + } + + /* Create a new setsum structure */ + setsum = (struct rte_member_setsum *) rte_zmalloc_socket(params->name, + sizeof(struct rte_member_setsum), RTE_CACHE_LINE_SIZE, + params->socket_id); + if (setsum == NULL) { + RTE_MEMBER_LOG(ERR, "Create setsummary failed\n"); + goto error_unlock_exit; + } + snprintf(setsum->name, sizeof(setsum->name), "%s", params->name); + setsum->type = params->type; + setsum->socket_id = params->socket_id; + setsum->key_len = params->key_len; + setsum->num_set = params->num_set; + setsum->prim_hash_seed = params->prim_hash_seed; + setsum->sec_hash_seed = params->sec_hash_seed; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + ret = rte_member_create_ht(setsum, params); + break; + case RTE_MEMBER_TYPE_VBF: + ret = rte_member_create_vbf(setsum, params); + break; + default: + goto error_unlock_exit; + } + if (ret < 0) + goto error_unlock_exit; + + RTE_MEMBER_LOG(DEBUG, "Creating a setsummary table with " + "mode %u\n", setsum->type); + + te->data = (void *)setsum; + TAILQ_INSERT_TAIL(member_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return setsum; + +error_unlock_exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + rte_member_free(setsum); + return NULL; +} + +int +rte_member_add(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id) +{ + if (setsum == NULL || key == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_add_ht(setsum, key, set_id); + case RTE_MEMBER_TYPE_VBF: + return rte_member_add_vbf(setsum, key, set_id); + default: + return -EINVAL; + } +} + +int +rte_member_lookup(const struct rte_member_setsum *setsum, const void *key, + member_set_t *set_id) +{ + if (setsum == NULL || key == NULL || set_id == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_ht(setsum, key, set_id); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_vbf(setsum, key, set_id); + default: + return -EINVAL; + } +} + +int +rte_member_lookup_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids) +{ + if (setsum == NULL || keys == NULL || set_ids == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_bulk_ht(setsum, keys, num_keys, + set_ids); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_bulk_vbf(setsum, keys, num_keys, + set_ids); + default: + return -EINVAL; + } +} + +int +rte_member_lookup_multi(const struct rte_member_setsum *setsum, const void *key, + uint32_t match_per_key, member_set_t *set_id) +{ + if (setsum == NULL || key == NULL || set_id == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_multi_ht(setsum, key, match_per_key, + set_id); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_multi_vbf(setsum, key, match_per_key, + set_id); + default: + return -EINVAL; + } +} + +int +rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + uint32_t max_match_per_key, uint32_t *match_count, + member_set_t *set_ids) +{ + if (setsum == NULL || keys == NULL || set_ids == NULL || + match_count == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_multi_bulk_ht(setsum, keys, num_keys, + max_match_per_key, match_count, set_ids); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_multi_bulk_vbf(setsum, keys, num_keys, + max_match_per_key, match_count, set_ids); + default: + return -EINVAL; + } +} + +int +rte_member_delete(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id) +{ + if (setsum == NULL || key == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_delete_ht(setsum, key, set_id); + /* current vBF implementation does not support delete function */ + case RTE_MEMBER_TYPE_VBF: + default: + return -EINVAL; + } +} + +void +rte_member_reset(const struct rte_member_setsum *setsum) +{ + if (setsum == NULL) + return; + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + rte_member_reset_ht(setsum); + return; + case RTE_MEMBER_TYPE_VBF: + rte_member_reset_vbf(setsum); + return; + default: + return; + } +} + +RTE_INIT(librte_member_init_log); + +static void +librte_member_init_log(void) +{ + librte_member_logtype = rte_log_register("librte.member"); + if (librte_member_logtype >= 0) + rte_log_set_level(librte_member_logtype, RTE_LOG_DEBUG); +} diff --git a/lib/librte_member/rte_member.h b/lib/librte_member/rte_member.h new file mode 100644 index 00000000..9b0c8f99 --- /dev/null +++ b/lib/librte_member/rte_member.h @@ -0,0 +1,513 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * RTE Membership Library + * + * The Membership Library is an extension and generalization of a traditional + * filter (for example Bloom Filter and cuckoo filter) structure that has + * multiple usages in a variety of workloads and applications. The library is + * used to test if a key belongs to certain sets. Two types of such + * "set-summary" structures are implemented: hash-table based (HT) and vector + * bloom filter (vBF). For HT setsummary, two subtypes or modes are available, + * cache and non-cache modes. The table below summarize some properties of + * the different implementations. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +/** + * <!-- + * +==========+=====================+================+=========================+ + * | type | vbf | HT-cache | HT-non-cache | + * +==========+=====================+==========================================+ + * |structure | bloom-filter array | hash-table like without storing key | + * +----------+---------------------+------------------------------------------+ + * |set id | limited by bf count | [1, 0x7fff] | + * | | up to 32. | | + * +----------+---------------------+------------------------------------------+ + * |usages & | small set range, | can delete, | cache most recent keys, | + * |properties| user-specified | big set range, | have both false-positive| + * | | false-positive rate,| small false | and false-negative | + * | | no deletion support.| positive depend| depend on table size, | + * | | | on table size, | automatic overwritten. | + * | | | new key does | | + * | | | not overwrite | | + * | | | existing key. | | + * +----------+---------------------+----------------+-------------------------+ + * --> + */ + +#ifndef _RTE_MEMBER_H_ +#define _RTE_MEMBER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +/** The set ID type that stored internally in hash table based set summary. */ +typedef uint16_t member_set_t; +/** Invalid set ID used to mean no match found. */ +#define RTE_MEMBER_NO_MATCH 0 +/** Maximum size of hash table that can be created. */ +#define RTE_MEMBER_ENTRIES_MAX (1 << 30) +/** Maximum number of keys that can be searched as a bulk */ +#define RTE_MEMBER_LOOKUP_BULK_MAX 64 +/** Entry count per bucket in hash table based mode. */ +#define RTE_MEMBER_BUCKET_ENTRIES 16 +/** Maximum number of characters in setsum name. */ +#define RTE_MEMBER_NAMESIZE 32 + +/** @internal Hash function used by membership library. */ +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#include <rte_hash_crc.h> +#define MEMBER_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define MEMBER_HASH_FUNC rte_jhash +#endif + +extern int librte_member_logtype; + +#define RTE_MEMBER_LOG(level, fmt, args...) \ +rte_log(RTE_LOG_ ## level, librte_member_logtype, "%s(): " fmt, \ + __func__, ## args) + +/** @internal setsummary structure. */ +struct rte_member_setsum; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Parameter struct used to create set summary + */ +struct rte_member_parameters; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Define different set summary types + */ +enum rte_member_setsum_type { + RTE_MEMBER_TYPE_HT = 0, /**< Hash table based set summary. */ + RTE_MEMBER_TYPE_VBF, /**< Vector of bloom filters. */ + RTE_MEMBER_NUM_TYPE +}; + +/** @internal compare function for different arch. */ +enum rte_member_sig_compare_function { + RTE_MEMBER_COMPARE_SCALAR = 0, + RTE_MEMBER_COMPARE_AVX2, + RTE_MEMBER_COMPARE_NUM +}; + +/** @internal setsummary structure. */ +struct rte_member_setsum { + enum rte_member_setsum_type type; /* Type of the set summary. */ + uint32_t key_len; /* Length of key. */ + uint32_t prim_hash_seed; /* Primary hash function seed. */ + uint32_t sec_hash_seed; /* Secondary hash function seed. */ + + /* Hash table based. */ + uint32_t bucket_cnt; /* Number of buckets. */ + uint32_t bucket_mask; /* Bit mask to get bucket index. */ + /* For runtime selecting AVX, scalar, etc for signature comparison. */ + enum rte_member_sig_compare_function sig_cmp_fn; + uint8_t cache; /* If it is cache mode for ht based. */ + + /* Vector bloom filter. */ + uint32_t num_set; /* Number of set (bf) in vbf. */ + uint32_t bits; /* Number of bits in each bf. */ + uint32_t bit_mask; /* Bit mask to get bit location in bf. */ + uint32_t num_hashes; /* Number of hash values to index bf. */ + + uint32_t mul_shift; /* vbf internal variable used during bit test. */ + uint32_t div_shift; /* vbf internal variable used during bit test. */ + + void *table; /* This is the handler of hash table or vBF array. */ + + + /* Second cache line should start here. */ + uint32_t socket_id; /* NUMA Socket ID for memory. */ + char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */ +} __rte_cache_aligned; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Parameters used when create the set summary table. Currently user can + * specify two types of setsummary: HT based and vBF. For HT based, user can + * specify cache or non-cache mode. Here is a table to describe some differences + * + */ +struct rte_member_parameters { + const char *name; /**< Name of the hash. */ + + /** + * User to specify the type of the setsummary from one of + * rte_member_setsum_type. + * + * HT based setsummary is implemented like a hash table. User should use + * this type when there are many sets. + * + * vBF setsummary is a vector of bloom filters. It is used when number + * of sets is not big (less than 32 for current implementation). + */ + enum rte_member_setsum_type type; + + /** + * is_cache is only used for HT based setsummary. + * + * If it is HT based setsummary, user to specify the subtype or mode + * of the setsummary. It could be cache, or non-cache mode. + * Set is_cache to be 1 if to use as cache mode. + * + * For cache mode, keys can be evicted out of the HT setsummary. Keys + * with the same signature and map to the same bucket + * will overwrite each other in the setsummary table. + * This mode is useful for the case that the set-summary only + * needs to keep record of the recently inserted keys. Both + * false-negative and false-positive could happen. + * + * For non-cache mode, keys cannot be evicted out of the cache. So for + * this mode the setsummary will become full eventually. Keys with the + * same signature but map to the same bucket will still occupy multiple + * entries. This mode does not give false-negative result. + */ + uint8_t is_cache; + + /** + * For HT setsummary, num_keys equals to the number of entries of the + * table. When the number of keys inserted in the HT setsummary + * approaches this number, eviction could happen. For cache mode, + * keys could be evicted out of the table. For non-cache mode, keys will + * be evicted to other buckets like cuckoo hash. The table will also + * likely to become full before the number of inserted keys equal to the + * total number of entries. + * + * For vBF, num_keys equal to the expected number of keys that will + * be inserted into the vBF. The implementation assumes the keys are + * evenly distributed to each BF in vBF. This is used to calculate the + * number of bits we need for each BF. User does not specify the size of + * each BF directly because the optimal size depends on the num_keys + * and false positive rate. + */ + uint32_t num_keys; + + /** + * The length of key is used for hash calculation. Since key is not + * stored in set-summary, large key does not require more memory space. + */ + uint32_t key_len; + + /** + * num_set is only used for vBF, but not used for HT setsummary. + * + * num_set is equal to the number of BFs in vBF. For current + * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set + * summary. If other number of sets are needed, for example 5, the user + * should allocate the minimum available value that larger than 5, + * which is 8. + */ + uint32_t num_set; + + /** + * false_positive_rate is only used for vBF, but not used for HT + * setsummary. + * + * For vBF, false_positive_rate is the user-defined false positive rate + * given expected number of inserted keys (num_keys). It is used to + * calculate the total number of bits for each BF, and the number of + * hash values used during lookup and insertion. For details please + * refer to vBF implementation and membership library documentation. + * + * For HT, This parameter is not directly set by users. + * HT setsummary's false positive rate is in the order of: + * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature. + * This is because two keys needs to map to same bucket and same + * signature to have a collision (false positive). bucket_count is equal + * to number of entries (num_keys) divided by entry count per bucket + * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not + * directly set by users for HT mode. + */ + float false_positive_rate; + + /** + * We use two seeds to calculate two independent hashes for each key. + * + * For HT type, one hash is used as signature, and the other is used + * for bucket location. + * For vBF type, these two hashes and their combinations are used as + * hash locations to index the bit array. + */ + uint32_t prim_hash_seed; + + /** + * The secondary seed should be a different value from the primary seed. + */ + uint32_t sec_hash_seed; + + int socket_id; /**< NUMA Socket ID for memory. */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Find an existing set-summary and return a pointer to it. + * + * @param name + * Name of the set-summary. + * @return + * Pointer to the set-summary or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_member_setsum * +rte_member_find_existing(const char *name); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create set-summary (SS). + * + * @param params + * Parameters to initialize the setsummary. + * @return + * Return the pointer to the setsummary. + * Return value is NULL if the creation failed. + */ +struct rte_member_setsum * +rte_member_create(const struct rte_member_parameters *params); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup key in set-summary (SS). + * Single key lookup and return as soon as the first match found + * + * @param setsum + * Pointer of a setsummary. + * @param key + * Pointer of the key to be looked up. + * @param set_id + * Output the set id matches the key. + * @return + * Return 1 for found a match and 0 for not found a match. + */ +int +rte_member_lookup(const struct rte_member_setsum *setsum, const void *key, + member_set_t *set_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup bulk of keys in set-summary (SS). + * Each key lookup returns as soon as the first match found + * + * @param setsum + * Pointer of a setsummary. + * @param keys + * Pointer of the bulk of keys to be looked up. + * @param num_keys + * Number of keys that will be lookup. + * @param set_ids + * Output set ids for all the keys to this array. + * User should preallocate array that can contain all results, which size is + * the num_keys. + * @return + * The number of keys that found a match. + */ +int +rte_member_lookup_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup a key in set-summary (SS) for multiple matches. + * The key lookup will find all matched entries (multiple match). + * Note that for cache mode of HT, each key can have at most one match. This is + * because keys with same signature that maps to same bucket will overwrite + * each other. So multi-match lookup should be used for vBF and non-cache HT. + * + * @param setsum + * Pointer of a set-summary. + * @param key + * Pointer of the key that to be looked up. + * @param max_match_per_key + * User specified maximum number of matches for each key. The function returns + * as soon as this number of matches found for the key. + * @param set_id + * Output set ids for all the matches of the key. User needs to preallocate + * the array that can contain max_match_per_key number of results. + * @return + * The number of matches that found for the key. + * For cache mode HT set-summary, the number should be at most 1. + */ +int +rte_member_lookup_multi(const struct rte_member_setsum *setsum, + const void *key, uint32_t max_match_per_key, + member_set_t *set_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup a bulk of keys in set-summary (SS) for multiple matches each key. + * Each key lookup will find all matched entries (multiple match). + * Note that for cache mode HT, each key can have at most one match. So + * multi-match function is mainly used for vBF and non-cache mode HT. + * + * @param setsum + * Pointer of a setsummary. + * @param keys + * Pointer of the keys to be looked up. + * @param num_keys + * The number of keys that will be lookup. + * @param max_match_per_key + * The possible maximum number of matches for each key. + * @param match_count + * Output the number of matches for each key in an array. + * @param set_ids + * Return set ids for all the matches of all keys. Users pass in a + * preallocated 2D array with first dimension as key index and second + * dimension as match index. For example set_ids[bulk_size][max_match_per_key] + * @return + * The number of keys that found one or more matches in the set-summary. + */ +int +rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + uint32_t max_match_per_key, + uint32_t *match_count, + member_set_t *set_ids); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Insert key into set-summary (SS). + * + * @param setsum + * Pointer of a set-summary. + * @param key + * Pointer of the key to be added. + * @param set_id + * The set id associated with the key that needs to be added. Different mode + * supports different set_id ranges. 0 cannot be used as set_id since + * RTE_MEMBER_NO_MATCH by default is set as 0. + * For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved. + * For vBF mode the set id is limited by the num_set parameter when create + * the set-summary. + * @return + * HT (cache mode) and vBF should never fail unless the set_id is not in the + * valid range. In such case -EINVAL is returned. + * For HT (non-cache mode) it could fail with -ENOSPC error code when table is + * full. + * For success it returns different values for different modes to provide + * extra information for users. + * Return 0 for HT (cache mode) if the add does not cause + * eviction, return 1 otherwise. Return 0 for non-cache mode if success, + * -ENOSPC for full, and 1 if cuckoo eviction happens. + * Always returns 0 for vBF mode. + */ +int +rte_member_add(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * De-allocate memory used by set-summary. + * + * @param setsum + * Pointer to the set summary. + */ +void +rte_member_free(struct rte_member_setsum *setsum); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset the set-summary tables. E.g. reset bits to be 0 in BF, + * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS. + * + * @param setsum + * Pointer to the set-summary. + */ +void +rte_member_reset(const struct rte_member_setsum *setsum); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Delete items from the set-summary. Note that vBF does not support deletion + * in current implementation. For vBF, error code of -EINVAL will be returned. + * + * @param setsum + * Pointer to the set-summary. + * @param key + * Pointer of the key to be deleted. + * @param set_id + * For HT mode, we need both key and its corresponding set_id to + * properly delete the key. Without set_id, we may delete other keys with the + * same signature. + * @return + * If no entry found to delete, an error code of -ENOENT could be returned. + */ +int +rte_member_delete(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_H_ */ diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c new file mode 100644 index 00000000..59332d56 --- /dev/null +++ b/lib/librte_member/rte_member_ht.c @@ -0,0 +1,586 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_errno.h> +#include <rte_malloc.h> +#include <rte_prefetch.h> +#include <rte_random.h> +#include <rte_log.h> + +#include "rte_member.h" +#include "rte_member_ht.h" + +#if defined(RTE_ARCH_X86) +#include "rte_member_x86.h" +#endif + +/* Search bucket for entry with tmp_sig and update set_id */ +static inline int +update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t set_id) +{ + uint32_t i; + + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (buckets[bucket_id].sigs[i] == tmp_sig) { + buckets[bucket_id].sets[i] = set_id; + return 1; + } + } + return 0; +} + +static inline int +search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t *set_id) +{ + uint32_t iter; + + for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) { + if (tmp_sig == buckets[bucket_id].sigs[iter] && + buckets[bucket_id].sets[iter] != + RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket_id].sets[iter]; + return 1; + } + } + return 0; +} + +static inline void +search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t matches_per_key, + member_set_t *set_id) +{ + uint32_t iter; + + for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) { + if (tmp_sig == buckets[bucket_id].sigs[iter] && + buckets[bucket_id].sets[iter] != + RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket_id].sets[iter]; + (*counter)++; + if (*counter >= matches_per_key) + return; + } + } +} + +int +rte_member_create_ht(struct rte_member_setsum *ss, + const struct rte_member_parameters *params) +{ + uint32_t i, j; + uint32_t size_bucket_t; + uint32_t num_entries = rte_align32pow2(params->num_keys); + + if ((num_entries > RTE_MEMBER_ENTRIES_MAX) || + !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) || + num_entries < RTE_MEMBER_BUCKET_ENTRIES) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, + "Membership HT create with invalid parameters\n"); + return -EINVAL; + } + + uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES; + + size_bucket_t = sizeof(struct member_ht_bucket); + + struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL, + num_buckets * size_bucket_t, + RTE_CACHE_LINE_SIZE, ss->socket_id); + + if (buckets == NULL) { + RTE_MEMBER_LOG(ERR, "memory allocation failed for HT " + "setsummary\n"); + return -ENOMEM; + } + + ss->table = buckets; + ss->bucket_cnt = num_buckets; + ss->bucket_mask = num_buckets - 1; + ss->cache = params->is_cache; + + for (i = 0; i < num_buckets; i++) { + for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) + buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; + } +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && + RTE_MEMBER_BUCKET_ENTRIES == 16) + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2; + else +#endif + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR; + + RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, " + "the table has %u entries, %u buckets\n", + num_entries, num_buckets); + return 0; +} + +static inline void +get_buckets_index(const struct rte_member_setsum *ss, const void *key, + uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig) +{ + uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len, + ss->prim_hash_seed); + uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t), + ss->sec_hash_seed); + /* + * We use the first hash value for the signature, and the second hash + * value to derive the primary and secondary bucket locations. + * + * For non-cache mode, we use the lower bits for the primary bucket + * location. Then we xor primary bucket location and the signature + * to get the secondary bucket location. This is called "partial-key + * cuckoo hashing" proposed by B. Fan, et al's paper + * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use + * xor is that one could derive the alternative bucket location + * by only using the current bucket location and the signature. This is + * generally required by non-cache mode's eviction and deletion + * process without the need to store alternative hash value nor the full + * key. + * + * For cache mode, we use the lower bits for the primary bucket + * location and the higher bits for the secondary bucket location. In + * cache mode, keys are simply overwritten if bucket is full. We do not + * use xor since lower/higher bits are more independent hash values thus + * should provide slightly better table load. + */ + *sig = first_hash; + if (ss->cache) { + *prim_bkt = sec_hash & ss->bucket_mask; + *sec_bkt = (sec_hash >> 16) & ss->bucket_mask; + } else { + *prim_bkt = sec_hash & ss->bucket_mask; + *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask; + } +} + +int +rte_member_lookup_ht(const struct rte_member_setsum *ss, + const void *key, member_set_t *set_id) +{ + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + + *set_id = RTE_MEMBER_NO_MATCH; + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single_avx(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + break; +#endif + default: + if (search_bucket_single(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + } + + return 0; +} + +uint32_t +rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, member_set_t *set_id) +{ + uint32_t i; + uint32_t num_matches = 0; + struct member_ht_bucket *buckets = ss->table; + member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + + for (i = 0; i < num_keys; i++) { + get_buckets_index(ss, keys[i], &prim_buckets[i], + &sec_buckets[i], &tmp_sig[i]); + rte_prefetch0(&buckets[prim_buckets[i]]); + rte_prefetch0(&buckets[sec_buckets[i]]); + } + + for (i = 0; i < num_keys; i++) { + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_buckets[i], + tmp_sig[i], buckets, &set_id[i]) || + search_bucket_single_avx(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + num_matches++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + break; +#endif + default: + if (search_bucket_single(prim_buckets[i], tmp_sig[i], + buckets, &set_id[i]) || + search_bucket_single(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + num_matches++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + } + } + return num_matches; +} + +uint32_t +rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, + const void *key, uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t num_matches = 0; + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, + &num_matches, match_per_key, set_id); + if (num_matches < match_per_key) + search_bucket_multi_avx(sec_bucket, tmp_sig, + buckets, &num_matches, match_per_key, set_id); + return num_matches; +#endif + default: + search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches, + match_per_key, set_id); + if (num_matches < match_per_key) + search_bucket_multi(sec_bucket, tmp_sig, + buckets, &num_matches, match_per_key, set_id); + return num_matches; + } +} + +uint32_t +rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids) +{ + uint32_t i; + uint32_t num_matches = 0; + struct member_ht_bucket *buckets = ss->table; + uint32_t match_cnt_tmp; + member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + + for (i = 0; i < num_keys; i++) { + get_buckets_index(ss, keys[i], &prim_buckets[i], + &sec_buckets[i], &tmp_sig[i]); + rte_prefetch0(&buckets[prim_buckets[i]]); + rte_prefetch0(&buckets[sec_buckets[i]]); + } + for (i = 0; i < num_keys; i++) { + match_cnt_tmp = 0; + + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_tmp < match_per_key) + search_bucket_multi_avx(sec_buckets[i], + tmp_sig[i], buckets, &match_cnt_tmp, + match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_tmp; + if (match_cnt_tmp != 0) + num_matches++; + break; +#endif + default: + search_bucket_multi(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_tmp < match_per_key) + search_bucket_multi(sec_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_tmp; + if (match_cnt_tmp != 0) + num_matches++; + } + } + return num_matches; +} + +static inline int +try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, + member_sig_t sig, member_set_t set_id) +{ + int i; + /* If not full then insert into one slot */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) { + buckets[prim].sigs[i] = sig; + buckets[prim].sets[i] = set_id; + return 0; + } + } + /* If prim failed, we need to access second bucket */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) { + buckets[sec].sigs[i] = sig; + buckets[sec].sets[i] = set_id; + return 0; + } + } + return -1; +} + +static inline int +try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, + member_sig_t sig, member_set_t set_id, + enum rte_member_sig_compare_function cmp_fn) +{ + switch (cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (update_entry_search_avx(prim, sig, buckets, set_id) || + update_entry_search_avx(sec, sig, buckets, + set_id)) + return 0; + break; +#endif + default: + if (update_entry_search(prim, sig, buckets, set_id) || + update_entry_search(sec, sig, buckets, + set_id)) + return 0; + } + return -1; +} + +static inline int +evict_from_bucket(void) +{ + /* For now, we randomly pick one entry to evict */ + return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1); +} + +/* + * This function is similar to the cuckoo hash make_space function in hash + * library + */ +static inline int +make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx, + unsigned int *nr_pushes) +{ + unsigned int i, j; + int ret; + struct member_ht_bucket *buckets = ss->table; + uint32_t next_bucket_idx; + struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES]; + struct member_ht_bucket *bkt = &buckets[bkt_idx]; + /* MSB is set to indicate if an entry has been already pushed */ + member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1); + + /* + * Push existing item (search for bucket with space in + * alternative locations) to its alternative location + */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + /* Search for space in alternative locations */ + next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask; + next_bkt[i] = &buckets[next_bucket_idx]; + for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) { + if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH) + break; + } + + if (j != RTE_MEMBER_BUCKET_ENTRIES) + break; + } + + /* Alternative location has spare room (end of recursive function) */ + if (i != RTE_MEMBER_BUCKET_ENTRIES) { + next_bkt[i]->sigs[j] = bkt->sigs[i]; + next_bkt[i]->sets[j] = bkt->sets[i]; + return i; + } + + /* Pick entry that has not been pushed yet */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) + if ((bkt->sets[i] & flag_mask) == 0) + break; + + /* All entries have been pushed, so entry cannot be added */ + if (i == RTE_MEMBER_BUCKET_ENTRIES || + ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES) + return -ENOSPC; + + next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask; + /* Set flag to indicate that this entry is going to be pushed */ + bkt->sets[i] |= flag_mask; + + /* Need room in alternative bucket to insert the pushed entry */ + ret = make_space_bucket(ss, next_bucket_idx, nr_pushes); + /* + * After recursive function. + * Clear flags and insert the pushed entry + * in its alternative location if successful, + * or return error + */ + bkt->sets[i] &= ~flag_mask; + if (ret >= 0) { + next_bkt[i]->sigs[ret] = bkt->sigs[i]; + next_bkt[i]->sets[ret] = bkt->sets[i]; + return i; + } else + return ret; +} + +int +rte_member_add_ht(const struct rte_member_setsum *ss, + const void *key, member_set_t set_id) +{ + int ret; + unsigned int nr_pushes = 0; + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1); + + if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0) + return -EINVAL; + + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + /* + * If it is cache based setsummary, we try overwriting (updating) + * existing entry with the same signature first. In cache mode, we allow + * false negatives and only cache the most recent keys. + * + * For non-cache mode, we do not update existing entry with the same + * signature. This is because if two keys with same signature update + * each other, false negative may happen, which is not the expected + * behavior for non-cache setsummary. + */ + if (ss->cache) { + ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig, + set_id, ss->sig_cmp_fn); + if (ret != -1) + return ret; + } + /* If not full then insert into one slot */ + ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id); + if (ret != -1) + return ret; + + /* Random pick prim or sec for recursive displacement */ + uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket; + if (ss->cache) { + ret = evict_from_bucket(); + buckets[select_bucket].sigs[ret] = tmp_sig; + buckets[select_bucket].sets[ret] = set_id; + return 1; + } + + ret = make_space_bucket(ss, select_bucket, &nr_pushes); + if (ret >= 0) { + buckets[select_bucket].sigs[ret] = tmp_sig; + buckets[select_bucket].sets[ret] = set_id; + ret = 1; + } + + return ret; +} + +void +rte_member_free_ht(struct rte_member_setsum *ss) +{ + rte_free(ss->table); +} + +int +rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key, + member_set_t set_id) +{ + int i; + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (tmp_sig == buckets[prim_bucket].sigs[i] && + set_id == buckets[prim_bucket].sets[i]) { + buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH; + return 0; + } + } + + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (tmp_sig == buckets[sec_bucket].sigs[i] && + set_id == buckets[sec_bucket].sets[i]) { + buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH; + return 0; + } + } + return -ENOENT; +} + +void +rte_member_reset_ht(const struct rte_member_setsum *ss) +{ + uint32_t i, j; + struct member_ht_bucket *buckets = ss->table; + + for (i = 0; i < ss->bucket_cnt; i++) { + for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) + buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; + } +} diff --git a/lib/librte_member/rte_member_ht.h b/lib/librte_member/rte_member_ht.h new file mode 100644 index 00000000..3148a492 --- /dev/null +++ b/lib/librte_member/rte_member_ht.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMBER_HT_H_ +#define _RTE_MEMBER_HT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Maximum number of pushes for cuckoo path in HT mode. */ +#define RTE_MEMBER_MAX_PUSHES 50 + +typedef uint16_t member_sig_t; /* signature size is 16 bit */ + +/* The bucket struct for ht setsum */ +struct member_ht_bucket { + member_sig_t sigs[RTE_MEMBER_BUCKET_ENTRIES]; /* 2-byte signature */ + member_set_t sets[RTE_MEMBER_BUCKET_ENTRIES]; /* 2-byte set */ +} __rte_cache_aligned; + +int +rte_member_create_ht(struct rte_member_setsum *ss, + const struct rte_member_parameters *params); + +int +rte_member_lookup_ht(const struct rte_member_setsum *setsum, + const void *key, member_set_t *set_id); + +uint32_t +rte_member_lookup_bulk_ht(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids); + +uint32_t +rte_member_lookup_multi_ht(const struct rte_member_setsum *setsum, + const void *key, uint32_t match_per_key, + member_set_t *set_id); + +uint32_t +rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids); + +int +rte_member_add_ht(const struct rte_member_setsum *setsum, + const void *key, member_set_t set_id); + +void +rte_member_free_ht(struct rte_member_setsum *setsum); + +int +rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key, + member_set_t set_id); + +void +rte_member_reset_ht(const struct rte_member_setsum *setsum); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_HT_H_ */ diff --git a/lib/librte_member/rte_member_vbf.c b/lib/librte_member/rte_member_vbf.c new file mode 100644 index 00000000..1a98ac84 --- /dev/null +++ b/lib/librte_member/rte_member_vbf.c @@ -0,0 +1,350 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <math.h> +#include <string.h> + +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_errno.h> +#include <rte_log.h> + +#include "rte_member.h" +#include "rte_member_vbf.h" + +/* + * vBF currently implemented as a big array. + * The BFs have a vertical layout. Bits in same location of all bfs will stay + * in the same cache line. + * For example, if we have 32 bloom filters, we use a uint32_t array to + * represent all of them. array[0] represent the first location of all the + * bloom filters, array[1] represents the second location of all the + * bloom filters, etc. The advantage of this layout is to minimize the average + * number of memory accesses to test all bloom filters. + * + * Currently the implementation supports vBF containing 1,2,4,8,16,32 BFs. + */ +int +rte_member_create_vbf(struct rte_member_setsum *ss, + const struct rte_member_parameters *params) +{ + + if (params->num_set > RTE_MEMBER_MAX_BF || + !rte_is_power_of_2(params->num_set) || + params->num_keys == 0 || + params->false_positive_rate == 0 || + params->false_positive_rate > 1) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, "Membership vBF create with invalid parameters\n"); + return -EINVAL; + } + + /* We assume expected keys evenly distribute to all BFs */ + uint32_t num_keys_per_bf = 1 + (params->num_keys - 1) / ss->num_set; + + /* + * Note that the false positive rate is for all BFs in the vBF + * such that the single BF's false positive rate needs to be + * calculated. + * Assume each BF's False positive rate is fp_one_bf. The total false + * positive rate is fp = 1-(1-fp_one_bf)^n. + * => fp_one_bf = 1 - (1-fp)^(1/n) + */ + + float fp_one_bf = 1 - pow((1 - params->false_positive_rate), + 1.0 / ss->num_set); + + if (fp_one_bf == 0) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, "Membership BF false positive rate is too small\n"); + return -EINVAL; + } + + uint32_t bits = ceil((num_keys_per_bf * + log(fp_one_bf)) / + log(1.0 / (pow(2.0, log(2.0))))); + + /* We round to power of 2 for performance during lookup */ + ss->bits = rte_align32pow2(bits); + + ss->num_hashes = (uint32_t)(log(2.0) * bits / num_keys_per_bf); + ss->bit_mask = ss->bits - 1; + + /* + * Since we round the bits to power of 2, the final false positive + * rate will probably not be same as the user specified. We log the + * new value as debug message. + */ + float new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf * + ss->num_hashes)), ss->num_hashes); + new_fp = 1 - pow((1 - new_fp), ss->num_set); + + /* + * Reduce hash function count, until we approach the user specified + * false-positive rate. Otherwise it is too conservative + */ + int tmp_num_hash = ss->num_hashes; + + while (tmp_num_hash > 1) { + float tmp_fp = new_fp; + + tmp_num_hash--; + new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf * + tmp_num_hash)), tmp_num_hash); + new_fp = 1 - pow((1 - new_fp), ss->num_set); + + if (new_fp > params->false_positive_rate) { + new_fp = tmp_fp; + tmp_num_hash++; + break; + } + } + + ss->num_hashes = tmp_num_hash; + + /* + * To avoid multiplication and division: + * mul_shift is used for multiplication shift during bit test + * div_shift is used for division shift, to be divided by number of bits + * represented by a uint32_t variable + */ + ss->mul_shift = __builtin_ctzl(ss->num_set); + ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift); + + RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, " + "each bloom filter expects %u keys, needs %u bits, %u hashes, " + "with false positive rate set as %.5f, " + "The new calculated vBF false positive rate is %.5f\n", + num_keys_per_bf, ss->bits, ss->num_hashes, fp_one_bf, new_fp); + + ss->table = rte_zmalloc_socket(NULL, ss->num_set * (ss->bits >> 3), + RTE_CACHE_LINE_SIZE, ss->socket_id); + if (ss->table == NULL) + return -ENOMEM; + + return 0; +} + +static inline uint32_t +test_bit(uint32_t bit_loc, const struct rte_member_setsum *ss) +{ + uint32_t *vbf = ss->table; + uint32_t n = ss->num_set; + uint32_t div_shift = ss->div_shift; + uint32_t mul_shift = ss->mul_shift; + /* + * a is how many bits in one BF are represented by one 32bit + * variable. + */ + uint32_t a = 32 >> mul_shift; + /* + * x>>b is the divide, x & (a-1) is the mod, & (1<<n-1) to mask out bits + * we do not need + */ + return (vbf[bit_loc >> div_shift] >> + ((bit_loc & (a - 1)) << mul_shift)) & ((1ULL << n) - 1); +} + +static inline void +set_bit(uint32_t bit_loc, const struct rte_member_setsum *ss, int32_t set) +{ + uint32_t *vbf = ss->table; + uint32_t div_shift = ss->div_shift; + uint32_t mul_shift = ss->mul_shift; + uint32_t a = 32 >> mul_shift; + + vbf[bit_loc >> div_shift] |= + 1UL << (((bit_loc & (a - 1)) << mul_shift) + set - 1); +} + +int +rte_member_lookup_vbf(const struct rte_member_setsum *ss, const void *key, + member_set_t *set_id) +{ + uint32_t j; + uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); + uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), + ss->sec_hash_seed); + uint32_t mask = ~0; + uint32_t bit_loc; + + for (j = 0; j < ss->num_hashes; j++) { + bit_loc = (h1 + j * h2) & ss->bit_mask; + mask &= test_bit(bit_loc, ss); + } + + if (mask) { + *set_id = __builtin_ctzl(mask) + 1; + return 1; + } + + *set_id = RTE_MEMBER_NO_MATCH; + return 0; +} + +uint32_t +rte_member_lookup_bulk_vbf(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, member_set_t *set_ids) +{ + uint32_t i, k; + uint32_t num_matches = 0; + uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t bit_loc; + + for (i = 0; i < num_keys; i++) + h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, + ss->prim_hash_seed); + for (i = 0; i < num_keys; i++) + h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), + ss->sec_hash_seed); + for (i = 0; i < num_keys; i++) { + mask[i] = ~0; + for (k = 0; k < ss->num_hashes; k++) { + bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; + mask[i] &= test_bit(bit_loc, ss); + } + } + for (i = 0; i < num_keys; i++) { + if (mask[i]) { + set_ids[i] = __builtin_ctzl(mask[i]) + 1; + num_matches++; + } else + set_ids[i] = RTE_MEMBER_NO_MATCH; + } + return num_matches; +} + +uint32_t +rte_member_lookup_multi_vbf(const struct rte_member_setsum *ss, + const void *key, uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t num_matches = 0; + uint32_t j; + uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); + uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), + ss->sec_hash_seed); + uint32_t mask = ~0; + uint32_t bit_loc; + + for (j = 0; j < ss->num_hashes; j++) { + bit_loc = (h1 + j * h2) & ss->bit_mask; + mask &= test_bit(bit_loc, ss); + } + while (mask) { + uint32_t loc = __builtin_ctzl(mask); + set_id[num_matches] = loc + 1; + num_matches++; + if (num_matches >= match_per_key) + return num_matches; + mask &= ~(1UL << loc); + } + return num_matches; +} + +uint32_t +rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids) +{ + uint32_t i, k; + uint32_t num_matches = 0; + uint32_t match_cnt_t; + uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t bit_loc; + + for (i = 0; i < num_keys; i++) + h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, + ss->prim_hash_seed); + for (i = 0; i < num_keys; i++) + h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), + ss->sec_hash_seed); + for (i = 0; i < num_keys; i++) { + mask[i] = ~0; + for (k = 0; k < ss->num_hashes; k++) { + bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; + mask[i] &= test_bit(bit_loc, ss); + } + } + for (i = 0; i < num_keys; i++) { + match_cnt_t = 0; + while (mask[i]) { + uint32_t loc = __builtin_ctzl(mask[i]); + set_ids[i * match_per_key + match_cnt_t] = loc + 1; + match_cnt_t++; + if (match_cnt_t >= match_per_key) + break; + mask[i] &= ~(1UL << loc); + } + match_count[i] = match_cnt_t; + if (match_cnt_t != 0) + num_matches++; + } + return num_matches; +} + +int +rte_member_add_vbf(const struct rte_member_setsum *ss, + const void *key, member_set_t set_id) +{ + uint32_t i, h1, h2; + uint32_t bit_loc; + + if (set_id > ss->num_set || set_id == RTE_MEMBER_NO_MATCH) + return -EINVAL; + + h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); + h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); + + for (i = 0; i < ss->num_hashes; i++) { + bit_loc = (h1 + i * h2) & ss->bit_mask; + set_bit(bit_loc, ss, set_id); + } + return 0; +} + +void +rte_member_free_vbf(struct rte_member_setsum *ss) +{ + rte_free(ss->table); +} + +void +rte_member_reset_vbf(const struct rte_member_setsum *ss) +{ + uint32_t *vbf = ss->table; + memset(vbf, 0, (ss->num_set * ss->bits) >> 3); +} diff --git a/lib/librte_member/rte_member_vbf.h b/lib/librte_member/rte_member_vbf.h new file mode 100644 index 00000000..5bc158b9 --- /dev/null +++ b/lib/librte_member/rte_member_vbf.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMBER_VBF_H_ +#define _RTE_MEMBER_VBF_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Currently we only support up to 32 sets in vBF */ +#define RTE_MEMBER_MAX_BF 32 + +int +rte_member_create_vbf(struct rte_member_setsum *ss, + const struct rte_member_parameters *params); + +int +rte_member_lookup_vbf(const struct rte_member_setsum *setsum, + const void *key, member_set_t *set_id); + +uint32_t +rte_member_lookup_bulk_vbf(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids); + +uint32_t +rte_member_lookup_multi_vbf(const struct rte_member_setsum *setsum, + const void *key, uint32_t match_per_key, + member_set_t *set_id); + +uint32_t +rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids); + +int +rte_member_add_vbf(const struct rte_member_setsum *setsum, + const void *key, member_set_t set_id); + +void +rte_member_free_vbf(struct rte_member_setsum *ss); + +void +rte_member_reset_vbf(const struct rte_member_setsum *setsum); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_VBF_H_ */ diff --git a/lib/librte_member/rte_member_version.map b/lib/librte_member/rte_member_version.map new file mode 100644 index 00000000..019e4cd9 --- /dev/null +++ b/lib/librte_member/rte_member_version.map @@ -0,0 +1,16 @@ +DPDK_17.11 { + global: + + rte_member_add; + rte_member_create; + rte_member_delete; + rte_member_find_existing; + rte_member_free; + rte_member_lookup; + rte_member_lookup_bulk; + rte_member_lookup_multi; + rte_member_lookup_multi_bulk; + rte_member_reset; + + local: *; +}; diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h new file mode 100644 index 00000000..d29dd3fe --- /dev/null +++ b/lib/librte_member/rte_member_x86.h @@ -0,0 +1,107 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMBER_X86_H_ +#define _RTE_MEMBER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <x86intrin.h> + +#if defined(RTE_MACHINE_CPUFLAG_AVX2) + +static inline int +update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + if (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + buckets[bucket_id].sets[hit_idx] = set_id; + return 1; + } + return 0; +} + +static inline int +search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket_id].sets[hit_idx]; + return 1; + } + hitmask &= ~(3U << ((hit_idx) << 1)); + } + return 0; +} + +static inline void +search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket_id].sets[hit_idx]; + (*counter)++; + if (*counter >= match_per_key) + return; + } + hitmask &= ~(3U << ((hit_idx) << 1)); + } +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_X86_H_ */ diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile index 7b5bdfee..46654e32 100644 --- a/lib/librte_mempool/Makefile +++ b/lib/librte_mempool/Makefile @@ -35,10 +35,11 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_mempool.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal -lrte_ring EXPORT_MAP := rte_mempool_version.map -LIBABIVER := 2 +LIBABIVER := 3 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 6fc3c9c7..d50dba49 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -128,7 +128,7 @@ static unsigned optimize_object_size(unsigned obj_size) } static void -mempool_add_elem(struct rte_mempool *mp, void *obj, phys_addr_t physaddr) +mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova) { struct rte_mempool_objhdr *hdr; struct rte_mempool_objtlr *tlr __rte_unused; @@ -136,7 +136,7 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, phys_addr_t physaddr) /* set mempool ptr in header */ hdr = RTE_PTR_SUB(obj, sizeof(*hdr)); hdr->mp = mp; - hdr->physaddr = physaddr; + hdr->iova = iova; STAILQ_INSERT_TAIL(&mp->elt_list, hdr, next); mp->populated_size++; @@ -238,9 +238,16 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, * Calculate maximum amount of memory required to store given number of objects. */ size_t -rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift) +rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift, + unsigned int flags) { size_t obj_per_page, pg_num, pg_sz; + unsigned int mask; + + mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG; + if ((flags & mask) == mask) + /* alignment need one additional object */ + elt_num += 1; if (total_elt_sz == 0) return 0; @@ -263,23 +270,29 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift) */ ssize_t rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num, - size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num, - uint32_t pg_shift) + size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num, + uint32_t pg_shift, unsigned int flags) { uint32_t elt_cnt = 0; - phys_addr_t start, end; - uint32_t paddr_idx; + rte_iova_t start, end; + uint32_t iova_idx; size_t pg_sz = (size_t)1 << pg_shift; + unsigned int mask; + + mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG; + if ((flags & mask) == mask) + /* alignment need one additional object */ + elt_num += 1; - /* if paddr is NULL, assume contiguous memory */ - if (paddr == NULL) { + /* if iova is NULL, assume contiguous memory */ + if (iova == NULL) { start = 0; end = pg_sz * pg_num; - paddr_idx = pg_num; + iova_idx = pg_num; } else { - start = paddr[0]; - end = paddr[0] + pg_sz; - paddr_idx = 1; + start = iova[0]; + end = iova[0] + pg_sz; + iova_idx = 1; } while (elt_cnt < elt_num) { @@ -287,15 +300,15 @@ rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num, /* enough contiguous memory, add an object */ start += total_elt_sz; elt_cnt++; - } else if (paddr_idx < pg_num) { + } else if (iova_idx < pg_num) { /* no room to store one obj, add a page */ - if (end == paddr[paddr_idx]) { + if (end == iova[iova_idx]) { end += pg_sz; } else { - start = paddr[paddr_idx]; - end = paddr[paddr_idx] + pg_sz; + start = iova[iova_idx]; + end = iova[iova_idx] + pg_sz; } - paddr_idx++; + iova_idx++; } else { /* no more page, return how many elements fit */ @@ -303,7 +316,7 @@ rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num, } } - return (size_t)paddr_idx << pg_shift; + return (size_t)iova_idx << pg_shift; } /* free a memchunk allocated with rte_memzone_reserve() */ @@ -344,8 +357,8 @@ rte_mempool_free_memchunks(struct rte_mempool *mp) * on error. */ int -rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, - phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, +rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) { unsigned total_elt_sz; @@ -354,6 +367,11 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, struct rte_mempool_memhdr *memhdr; int ret; + /* Notify memory area to mempool */ + ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len); + if (ret != -ENOTSUP && ret < 0) + return ret; + /* create the internal ring if not already done */ if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) { ret = rte_mempool_ops_alloc(mp); @@ -368,29 +386,42 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + /* Detect pool area has sufficient space for elements */ + if (mp->flags & MEMPOOL_F_CAPA_PHYS_CONTIG) { + if (len < total_elt_sz * mp->size) { + RTE_LOG(ERR, MEMPOOL, + "pool area %" PRIx64 " not enough\n", + (uint64_t)len); + return -ENOSPC; + } + } + memhdr = rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr), 0); if (memhdr == NULL) return -ENOMEM; memhdr->mp = mp; memhdr->addr = vaddr; - memhdr->phys_addr = paddr; + memhdr->iova = iova; memhdr->len = len; memhdr->free_cb = free_cb; memhdr->opaque = opaque; - if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN) + if (mp->flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS) + /* align object start address to a multiple of total_elt_sz */ + off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz); + else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN) off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr; else off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr; while (off + total_elt_sz <= len && mp->populated_size < mp->size) { off += mp->header_size; - if (paddr == RTE_BAD_PHYS_ADDR) + if (iova == RTE_BAD_IOVA) mempool_add_elem(mp, (char *)vaddr + off, - RTE_BAD_PHYS_ADDR); + RTE_BAD_IOVA); else - mempool_add_elem(mp, (char *)vaddr + off, paddr + off); + mempool_add_elem(mp, (char *)vaddr + off, iova + off); off += mp->elt_size + mp->trailer_size; i++; } @@ -404,12 +435,20 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, return i; } +int +rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, + phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, + void *opaque) +{ + return rte_mempool_populate_iova(mp, vaddr, paddr, len, free_cb, opaque); +} + /* Add objects in the pool, using a table of physical pages. Return the * number of objects added, or a negative value on error. */ int -rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, - const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, +rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr, + const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) { uint32_t i, n; @@ -421,18 +460,18 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, return -EEXIST; if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) - return rte_mempool_populate_phys(mp, vaddr, RTE_BAD_PHYS_ADDR, + return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA, pg_num * pg_sz, free_cb, opaque); for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) { /* populate with the largest group of contiguous pages */ for (n = 1; (i + n) < pg_num && - paddr[i + n - 1] + pg_sz == paddr[i + n]; n++) + iova[i + n - 1] + pg_sz == iova[i + n]; n++) ; - ret = rte_mempool_populate_phys(mp, vaddr + i * pg_sz, - paddr[i], n * pg_sz, free_cb, opaque); + ret = rte_mempool_populate_iova(mp, vaddr + i * pg_sz, + iova[i], n * pg_sz, free_cb, opaque); if (ret < 0) { rte_mempool_free_memchunks(mp); return ret; @@ -444,6 +483,15 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, return cnt; } +int +rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) +{ + return rte_mempool_populate_iova_tab(mp, vaddr, paddr, pg_num, pg_shift, + free_cb, opaque); +} + /* Populate the mempool with a virtual area. Return the number of * objects added, or a negative value on error. */ @@ -452,7 +500,7 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, size_t len, size_t pg_sz, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) { - phys_addr_t paddr; + rte_iova_t iova; size_t off, phys_len; int ret, cnt = 0; @@ -466,33 +514,30 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, return -EINVAL; if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) - return rte_mempool_populate_phys(mp, addr, RTE_BAD_PHYS_ADDR, + return rte_mempool_populate_iova(mp, addr, RTE_BAD_IOVA, len, free_cb, opaque); for (off = 0; off + pg_sz <= len && mp->populated_size < mp->size; off += phys_len) { - paddr = rte_mem_virt2phy(addr + off); - /* required for xen_dom0 to get the machine address */ - paddr = rte_mem_phy2mch(-1, paddr); + iova = rte_mem_virt2iova(addr + off); - if (paddr == RTE_BAD_PHYS_ADDR && rte_eal_has_hugepages()) { + if (iova == RTE_BAD_IOVA && rte_eal_has_hugepages()) { ret = -EINVAL; goto fail; } /* populate with the largest group of contiguous pages */ for (phys_len = pg_sz; off + phys_len < len; phys_len += pg_sz) { - phys_addr_t paddr_tmp; + rte_iova_t iova_tmp; - paddr_tmp = rte_mem_virt2phy(addr + off + phys_len); - paddr_tmp = rte_mem_phy2mch(-1, paddr_tmp); + iova_tmp = rte_mem_virt2iova(addr + off + phys_len); - if (paddr_tmp != paddr + phys_len) + if (iova_tmp != iova + phys_len) break; } - ret = rte_mempool_populate_phys(mp, addr + off, paddr, + ret = rte_mempool_populate_iova(mp, addr + off, iova, phys_len, free_cb, opaque); if (ret < 0) goto fail; @@ -515,23 +560,29 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, int rte_mempool_populate_default(struct rte_mempool *mp) { - int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; char mz_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; size_t size, total_elt_sz, align, pg_sz, pg_shift; - phys_addr_t paddr; + rte_iova_t iova; unsigned mz_id, n; + unsigned int mp_flags; int ret; /* mempool must not be populated */ if (mp->nb_mem_chunks != 0) return -EEXIST; - if (rte_xen_dom0_supported()) { - pg_sz = RTE_PGSIZE_2M; - pg_shift = rte_bsf32(pg_sz); - align = pg_sz; - } else if (rte_eal_has_hugepages()) { + /* Get mempool capabilities */ + mp_flags = 0; + ret = rte_mempool_ops_get_capabilities(mp, &mp_flags); + if ((ret < 0) && (ret != -ENOTSUP)) + return ret; + + /* update mempool capabilities */ + mp->flags |= mp_flags; + + if (rte_eal_has_hugepages()) { pg_shift = 0; /* not needed, zone is physically contiguous */ pg_sz = 0; align = RTE_CACHE_LINE_SIZE; @@ -543,7 +594,8 @@ rte_mempool_populate_default(struct rte_mempool *mp) total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) { - size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift); + size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift, + mp->flags); ret = snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id); @@ -564,13 +616,13 @@ rte_mempool_populate_default(struct rte_mempool *mp) } if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) - paddr = RTE_BAD_PHYS_ADDR; + iova = RTE_BAD_IOVA; else - paddr = mz->phys_addr; + iova = mz->iova; - if (rte_eal_has_hugepages() && !rte_xen_dom0_supported()) - ret = rte_mempool_populate_phys(mp, mz->addr, - paddr, mz->len, + if (rte_eal_has_hugepages()) + ret = rte_mempool_populate_iova(mp, mz->addr, + iova, mz->len, rte_mempool_memchunk_mz_free, (void *)(uintptr_t)mz); else @@ -600,7 +652,8 @@ get_anon_size(const struct rte_mempool *mp) pg_sz = getpagesize(); pg_shift = rte_bsf32(pg_sz); total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; - size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift); + size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift, + mp->flags); return size; } @@ -742,7 +795,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size, struct rte_tailq_entry *te = NULL; const struct rte_memzone *mz = NULL; size_t mempool_size; - int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; struct rte_mempool_objsz objsz; unsigned lcore_id; int ret; @@ -922,7 +975,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags, void *vaddr, - const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) + const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift) { struct rte_mempool *mp = NULL; int ret; @@ -934,7 +987,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, obj_init, obj_init_arg, socket_id, flags); /* check that we have both VA and PA */ - if (paddr == NULL) { + if (iova == NULL) { rte_errno = EINVAL; return NULL; } @@ -954,7 +1007,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, if (mp_init) mp_init(mp, mp_init_arg); - ret = rte_mempool_populate_phys_tab(mp, vaddr, paddr, pg_num, pg_shift, + ret = rte_mempool_populate_iova_tab(mp, vaddr, iova, pg_num, pg_shift, NULL, NULL); if (ret < 0 || ret != (int)mp->size) goto fail; @@ -1177,7 +1230,7 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, "mempool <%s>@%p\n", mp->name, mp); fprintf(f, " flags=%x\n", mp->flags); fprintf(f, " pool=%p\n", mp->pool_data); - fprintf(f, " phys_addr=0x%" PRIx64 "\n", mp->mz->phys_addr); + fprintf(f, " iova=0x%" PRIx64 "\n", mp->mz->iova); fprintf(f, " nb_mem_chunks=%u\n", mp->nb_mem_chunks); fprintf(f, " size=%"PRIu32"\n", mp->size); fprintf(f, " populated_size=%"PRIu32"\n", mp->populated_size); diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 76b5b3b1..721227f6 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -157,7 +157,11 @@ struct rte_mempool_objsz { struct rte_mempool_objhdr { STAILQ_ENTRY(rte_mempool_objhdr) next; /**< Next in list. */ struct rte_mempool *mp; /**< The mempool owning the object. */ - phys_addr_t physaddr; /**< Physical address of the object. */ + RTE_STD_C11 + union { + rte_iova_t iova; /**< IO address of the object. */ + phys_addr_t physaddr; /**< deprecated - Physical address of the object. */ + }; #ifdef RTE_LIBRTE_MEMPOOL_DEBUG uint64_t cookie; /**< Debug cookie. */ #endif @@ -203,7 +207,11 @@ struct rte_mempool_memhdr { STAILQ_ENTRY(rte_mempool_memhdr) next; /**< Next in list. */ struct rte_mempool *mp; /**< The mempool owning the chunk */ void *addr; /**< Virtual address of the chunk */ - phys_addr_t phys_addr; /**< Physical address of the chunk */ + RTE_STD_C11 + union { + rte_iova_t iova; /**< IO address of the chunk */ + phys_addr_t phys_addr; /**< Physical address of the chunk */ + }; size_t len; /**< length of the chunk */ rte_mempool_memchunk_free_cb_t *free_cb; /**< Free callback */ void *opaque; /**< Argument passed to the free callback */ @@ -226,7 +234,7 @@ struct rte_mempool { }; void *pool_config; /**< optional args for ops alloc. */ const struct rte_memzone *mz; /**< Memzone where pool is alloc'd. */ - int flags; /**< Flags of the mempool. */ + unsigned int flags; /**< Flags of the mempool. */ int socket_id; /**< Socket id passed at create. */ uint32_t size; /**< Max size of the mempool. */ uint32_t cache_size; @@ -265,6 +273,24 @@ struct rte_mempool { #define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/ #define MEMPOOL_F_POOL_CREATED 0x0010 /**< Internal: pool is created. */ #define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */ +/** + * This capability flag is advertised by a mempool handler, if the whole + * memory area containing the objects must be physically contiguous. + * Note: This flag should not be passed by application. + */ +#define MEMPOOL_F_CAPA_PHYS_CONTIG 0x0040 +/** + * This capability flag is advertised by a mempool handler. Used for a case + * where mempool driver wants object start address(vaddr) aligned to block + * size(/ total element size). + * + * Note: + * - This flag should not be passed by application. + * Flag used for mempool driver only. + * - Mempool driver must also set MEMPOOL_F_CAPA_PHYS_CONTIG flag along with + * MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS. + */ +#define MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS 0x0080 /** * @internal When debug is enabled, store some statistics. @@ -389,6 +415,18 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp, */ typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp); +/** + * Get the mempool capabilities. + */ +typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp, + unsigned int *flags); + +/** + * Notify new memory area to mempool. + */ +typedef int (*rte_mempool_ops_register_memory_area_t) +(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len); + /** Structure defining mempool operations structure */ struct rte_mempool_ops { char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */ @@ -397,6 +435,14 @@ struct rte_mempool_ops { rte_mempool_enqueue_t enqueue; /**< Enqueue an object. */ rte_mempool_dequeue_t dequeue; /**< Dequeue an object. */ rte_mempool_get_count get_count; /**< Get qty of available objs. */ + /** + * Get the mempool capabilities + */ + rte_mempool_get_capabilities_t get_capabilities; + /** + * Notify new memory area to mempool + */ + rte_mempool_ops_register_memory_area_t register_memory_area; } __rte_cache_aligned; #define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */ @@ -509,6 +555,43 @@ unsigned rte_mempool_ops_get_count(const struct rte_mempool *mp); /** + * @internal wrapper for mempool_ops get_capabilities callback. + * + * @param mp [in] + * Pointer to the memory pool. + * @param flags [out] + * Pointer to the mempool flags. + * @return + * - 0: Success; The mempool driver has advertised his pool capabilities in + * flags param. + * - -ENOTSUP - doesn't support get_capabilities ops (valid case). + * - Otherwise, pool create fails. + */ +int +rte_mempool_ops_get_capabilities(const struct rte_mempool *mp, + unsigned int *flags); +/** + * @internal wrapper for mempool_ops register_memory_area callback. + * API to notify the mempool handler when a new memory area is added to pool. + * + * @param mp + * Pointer to the memory pool. + * @param vaddr + * Pointer to the buffer virtual address. + * @param iova + * Pointer to the buffer IO address. + * @param len + * Pool size. + * @return + * - 0: Success; + * - -ENOTSUP - doesn't support register_memory_area ops (valid error case). + * - Otherwise, rte_mempool_populate_phys fails thus pool create fails. + */ +int +rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, + char *vaddr, rte_iova_t iova, size_t len); + +/** * @internal wrapper for mempool_ops free callback. * * @param mp @@ -722,11 +805,10 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, * @param vaddr * Virtual address of the externally allocated memory buffer. * Will be used to store mempool objects. - * @param paddr - * Array of physical addresses of the pages that comprises given memory - * buffer. + * @param iova + * Array of IO addresses of the pages that comprises given memory buffer. * @param pg_num - * Number of elements in the paddr array. + * Number of elements in the iova array. * @param pg_shift * LOG2 of the physical pages size. * @return @@ -739,7 +821,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags, void *vaddr, - const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift); /** * Create an empty mempool @@ -798,7 +880,7 @@ rte_mempool_free(struct rte_mempool *mp); * Add a virtually and physically contiguous memory chunk in the pool * where objects can be instantiated. * - * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR), + * If the given IO address is unknown (iova = RTE_BAD_IOVA), * the chunk doesn't need to be physically contiguous (only virtually), * and allocated objects may span two pages. * @@ -806,8 +888,8 @@ rte_mempool_free(struct rte_mempool *mp); * A pointer to the mempool structure. * @param vaddr * The virtual address of memory that should be used to store objects. - * @param paddr - * The physical address + * @param iova + * The IO address * @param len * The length of memory in bytes. * @param free_cb @@ -819,6 +901,11 @@ rte_mempool_free(struct rte_mempool *mp); * On error, the chunk is not added in the memory list of the * mempool and a negative errno is returned. */ +int rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, + void *opaque); + +__rte_deprecated int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque); @@ -827,18 +914,17 @@ int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, * Add physical memory for objects in the pool at init * * Add a virtually contiguous memory chunk in the pool where objects can - * be instantiated. The physical addresses corresponding to the virtual - * area are described in paddr[], pg_num, pg_shift. + * be instantiated. The IO addresses corresponding to the virtual + * area are described in iova[], pg_num, pg_shift. * * @param mp * A pointer to the mempool structure. * @param vaddr * The virtual address of memory that should be used to store objects. - * @param paddr - * An array of physical addresses of each page composing the virtual - * area. + * @param iova + * An array of IO addresses of each page composing the virtual area. * @param pg_num - * Number of elements in the paddr array. + * Number of elements in the iova array. * @param pg_shift * LOG2 of the physical pages size. * @param free_cb @@ -850,6 +936,11 @@ int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, * On error, the chunks are not added in the memory list of the * mempool and a negative errno is returned. */ +int rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr, + const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_memchunk_free_cb_t *free_cb, void *opaque); + +__rte_deprecated int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque); @@ -1034,13 +1125,10 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) * positive. * @param cache * A pointer to a mempool cache structure. May be NULL if not needed. - * @param flags - * The flags used for the mempool creation. - * Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers. */ static __rte_always_inline void __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache) + unsigned int n, struct rte_mempool_cache *cache) { void **cache_objs; @@ -1096,14 +1184,10 @@ ring_enqueue: * The number of objects to add in the mempool from the obj_table. * @param cache * A pointer to a mempool cache structure. May be NULL if not needed. - * @param flags - * The flags used for the mempool creation. - * Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers. */ static __rte_always_inline void rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, - __rte_unused int flags) + unsigned int n, struct rte_mempool_cache *cache) { __mempool_check_cookies(mp, obj_table, n, 0); __mempool_generic_put(mp, obj_table, n, cache); @@ -1125,11 +1209,11 @@ rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, */ static __rte_always_inline void rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, - unsigned n) + unsigned int n) { struct rte_mempool_cache *cache; cache = rte_mempool_default_cache(mp, rte_lcore_id()); - rte_mempool_generic_put(mp, obj_table, n, cache, mp->flags); + rte_mempool_generic_put(mp, obj_table, n, cache); } /** @@ -1160,16 +1244,13 @@ rte_mempool_put(struct rte_mempool *mp, void *obj) * The number of objects to get, must be strictly positive. * @param cache * A pointer to a mempool cache structure. May be NULL if not needed. - * @param flags - * The flags used for the mempool creation. - * Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers. * @return * - >=0: Success; number of objects supplied. * - <0: Error; code of ring dequeue function. */ static __rte_always_inline int __mempool_generic_get(struct rte_mempool *mp, void **obj_table, - unsigned n, struct rte_mempool_cache *cache) + unsigned int n, struct rte_mempool_cache *cache) { int ret; uint32_t index, len; @@ -1241,16 +1322,13 @@ ring_dequeue: * The number of objects to get from mempool to obj_table. * @param cache * A pointer to a mempool cache structure. May be NULL if not needed. - * @param flags - * The flags used for the mempool creation. - * Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers. * @return * - 0: Success; objects taken. * - -ENOENT: Not enough entries in the mempool; no object is retrieved. */ static __rte_always_inline int -rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, - struct rte_mempool_cache *cache, __rte_unused int flags) +rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, + unsigned int n, struct rte_mempool_cache *cache) { int ret; ret = __mempool_generic_get(mp, obj_table, n, cache); @@ -1282,11 +1360,11 @@ rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, * - -ENOENT: Not enough entries in the mempool; no object is retrieved. */ static __rte_always_inline int -rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned int n) { struct rte_mempool_cache *cache; cache = rte_mempool_default_cache(mp, rte_lcore_id()); - return rte_mempool_generic_get(mp, obj_table, n, cache, mp->flags); + return rte_mempool_generic_get(mp, obj_table, n, cache); } /** @@ -1383,24 +1461,29 @@ rte_mempool_empty(const struct rte_mempool *mp) } /** - * Return the physical address of elt, which is an element of the pool mp. + * Return the IO address of elt, which is an element of the pool mp. * - * @param mp - * A pointer to the mempool structure. * @param elt * A pointer (virtual address) to the element of the pool. * @return - * The physical address of the elt element. + * The IO address of the elt element. * If the mempool was created with MEMPOOL_F_NO_PHYS_CONTIG, the - * returned value is RTE_BAD_PHYS_ADDR. + * returned value is RTE_BAD_IOVA. */ -static inline phys_addr_t -rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt) +static inline rte_iova_t +rte_mempool_virt2iova(const void *elt) { const struct rte_mempool_objhdr *hdr; hdr = (const struct rte_mempool_objhdr *)RTE_PTR_SUB(elt, sizeof(*hdr)); - return hdr->physaddr; + return hdr->iova; +} + +__rte_deprecated +static inline phys_addr_t +rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt) +{ + return rte_mempool_virt2iova(elt); } /** @@ -1489,11 +1572,13 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, * by rte_mempool_calc_obj_size(). * @param pg_shift * LOG2 of the physical pages size. If set to 0, ignore page boundaries. + * @param flags + * The mempool flags. * @return * Required memory size aligned at page boundary. */ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, - uint32_t pg_shift); + uint32_t pg_shift, unsigned int flags); /** * Get the size of memory required to store mempool elements. @@ -1509,13 +1594,14 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, * @param total_elt_sz * The size of each element, including header and trailer, as returned * by rte_mempool_calc_obj_size(). - * @param paddr - * Array of physical addresses of the pages that comprises given memory - * buffer. + * @param iova + * Array of IO addresses of the pages that comprises given memory buffer. * @param pg_num - * Number of elements in the paddr array. + * Number of elements in the iova array. * @param pg_shift * LOG2 of the physical pages size. + * @param flags + * The mempool flags. * @return * On success, the number of bytes needed to store given number of * objects, aligned to the given page size. If the provided memory @@ -1523,8 +1609,8 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, * is the actual number of elements that can be stored in that buffer. */ ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, - size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num, - uint32_t pg_shift); + size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num, + uint32_t pg_shift, unsigned int flags); /** * Walk list of all memory pools diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c index 5f24de25..92b9f90c 100644 --- a/lib/librte_mempool/rte_mempool_ops.c +++ b/lib/librte_mempool/rte_mempool_ops.c @@ -37,6 +37,7 @@ #include <rte_mempool.h> #include <rte_errno.h> +#include <rte_dev.h> /* indirect jump table to support external memory pools. */ struct rte_mempool_ops_table rte_mempool_ops_table = { @@ -85,6 +86,8 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h) ops->enqueue = h->enqueue; ops->dequeue = h->dequeue; ops->get_count = h->get_count; + ops->get_capabilities = h->get_capabilities; + ops->register_memory_area = h->register_memory_area; rte_spinlock_unlock(&rte_mempool_ops_table.sl); @@ -123,6 +126,32 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp) return ops->get_count(mp); } +/* wrapper to get external mempool capabilities. */ +int +rte_mempool_ops_get_capabilities(const struct rte_mempool *mp, + unsigned int *flags) +{ + struct rte_mempool_ops *ops; + + ops = rte_mempool_get_ops(mp->ops_index); + + RTE_FUNC_PTR_OR_ERR_RET(ops->get_capabilities, -ENOTSUP); + return ops->get_capabilities(mp, flags); +} + +/* wrapper to notify new memory area to external mempool */ +int +rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr, + rte_iova_t iova, size_t len) +{ + struct rte_mempool_ops *ops; + + ops = rte_mempool_get_ops(mp->ops_index); + + RTE_FUNC_PTR_OR_ERR_RET(ops->register_memory_area, -ENOTSUP); + return ops->register_memory_area(mp, vaddr, iova, len); +} + /* sets mempool ops previously registered by rte_mempool_register_ops. */ int rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name, diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map index f9c07944..62b76f91 100644 --- a/lib/librte_mempool/rte_mempool_version.map +++ b/lib/librte_mempool/rte_mempool_version.map @@ -41,3 +41,13 @@ DPDK_16.07 { rte_mempool_set_ops_byname; } DPDK_2.0; + +DPDK_17.11 { + global: + + rte_mempool_ops_get_capabilities; + rte_mempool_ops_register_memory_area; + rte_mempool_populate_iova; + rte_mempool_populate_iova_tab; + +} DPDK_16.07; diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile index 539bfddd..bfeb5d60 100644 --- a/lib/librte_meter/Makefile +++ b/lib/librte_meter/Makefile @@ -40,6 +40,7 @@ CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) LDLIBS += -lm +LDLIBS += -lrte_eal EXPORT_MAP := rte_meter_version.map diff --git a/lib/librte_metrics/Makefile b/lib/librte_metrics/Makefile index d4990e83..a6efba4a 100644 --- a/lib/librte_metrics/Makefile +++ b/lib/librte_metrics/Makefile @@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_metrics.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal EXPORT_MAP := rte_metrics_version.map diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c index b66a72bb..d9404001 100644 --- a/lib/librte_metrics/rte_metrics.c +++ b/lib/librte_metrics/rte_metrics.c @@ -115,7 +115,7 @@ rte_metrics_reg_name(const char *name) int rte_metrics_reg_names(const char * const *names, uint16_t cnt_names) { - struct rte_metrics_meta_s *entry; + struct rte_metrics_meta_s *entry = NULL; struct rte_metrics_data_s *stats; const struct rte_memzone *memzone; uint16_t idx_name; diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile index 56727c4d..50c358e5 100644 --- a/lib/librte_net/Makefile +++ b/lib/librte_net/Makefile @@ -34,6 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_net.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_mbuf -lrte_eal EXPORT_MAP := rte_net_version.map LIBABIVER := 1 @@ -42,7 +43,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h diff --git a/lib/librte_net/rte_esp.h b/lib/librte_net/rte_esp.h new file mode 100644 index 00000000..e228af09 --- /dev/null +++ b/lib/librte_net/rte_esp.h @@ -0,0 +1,60 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ESP_H_ +#define _RTE_ESP_H_ + +/** + * @file + * + * ESP-related defines + */ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ESP Header + */ +struct esp_hdr { + uint32_t spi; /**< Security Parameters Index */ + uint32_t seq; /**< packet sequence number */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_ESP_H_ */ diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h index 917d42a1..06d7b486 100644 --- a/lib/librte_net/rte_ether.h +++ b/lib/librte_net/rte_ether.h @@ -358,7 +358,7 @@ static inline int rte_vlan_strip(struct rte_mbuf *m) return -1; struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); - m->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; + m->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); /* Copy ether header over rather than moving whole packet */ diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c index a8c7aff9..a3ca0403 100644 --- a/lib/librte_net/rte_net.c +++ b/lib/librte_net/rte_net.c @@ -396,6 +396,7 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m, if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0) return pkt_type; + hdr_lens->inner_l2_len = 0; if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) { eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); if (unlikely(eh == NULL)) diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 661fe322..0c1bf51a 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -205,8 +205,7 @@ rte_net_crc_calc(const void *data, } /* Select highest available crc algorithm as default one */ -static inline void __attribute__((constructor)) -rte_net_crc_init(void) +RTE_INIT(rte_net_crc_init) { enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR; diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile new file mode 100644 index 00000000..fe213ea6 --- /dev/null +++ b/lib/librte_pci/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2017 6WIND S.A. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of 6WIND nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_pci.a + +CFLAGS := -I$(SRCDIR) $(CFLAGS) +CFLAGS += $(WERROR_FLAGS) -O3 +LDLIBS += -lrte_eal + +EXPORT_MAP := rte_pci_version.map + +LIBABIVER := 1 + +SRCS-$(CONFIG_RTE_LIBRTE_PCI) += rte_pci.c + +SYMLINK-$(CONFIG_RTE_LIBRTE_PCI)-include += rte_pci.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c new file mode 100644 index 00000000..0160fc1e --- /dev/null +++ b/lib/librte_pci/rte_pci.c @@ -0,0 +1,212 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2013-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/queue.h> +#include <sys/mman.h> + +#include <rte_errno.h> +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_per_lcore.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_string_fns.h> +#include <rte_common.h> + +#include "rte_pci.h" + +static inline const char * +get_u8_pciaddr_field(const char *in, void *_u8, char dlm) +{ + unsigned long val; + uint8_t *u8 = _u8; + char *end; + + errno = 0; + val = strtoul(in, &end, 16); + if (errno != 0 || end[0] != dlm || val > UINT8_MAX) { + errno = errno ? errno : EINVAL; + return NULL; + } + *u8 = (uint8_t)val; + return end + 1; +} + +static int +pci_bdf_parse(const char *input, struct rte_pci_addr *dev_addr) +{ + const char *in = input; + + dev_addr->domain = 0; + in = get_u8_pciaddr_field(in, &dev_addr->bus, ':'); + if (in == NULL) + return -EINVAL; + in = get_u8_pciaddr_field(in, &dev_addr->devid, '.'); + if (in == NULL) + return -EINVAL; + in = get_u8_pciaddr_field(in, &dev_addr->function, '\0'); + if (in == NULL) + return -EINVAL; + return 0; +} + +static int +pci_dbdf_parse(const char *input, struct rte_pci_addr *dev_addr) +{ + const char *in = input; + unsigned long val; + char *end; + + errno = 0; + val = strtoul(in, &end, 16); + if (errno != 0 || end[0] != ':' || val > UINT16_MAX) + return -EINVAL; + dev_addr->domain = (uint16_t)val; + in = end + 1; + in = get_u8_pciaddr_field(in, &dev_addr->bus, ':'); + if (in == NULL) + return -EINVAL; + in = get_u8_pciaddr_field(in, &dev_addr->devid, '.'); + if (in == NULL) + return -EINVAL; + in = get_u8_pciaddr_field(in, &dev_addr->function, '\0'); + if (in == NULL) + return -EINVAL; + return 0; +} + +int +eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) +{ + return pci_bdf_parse(input, dev_addr); +} + +int +eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) +{ + return pci_dbdf_parse(input, dev_addr); +} + +void +rte_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size) +{ + RTE_VERIFY(size >= PCI_PRI_STR_SIZE); + RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, + addr->domain, addr->bus, + addr->devid, addr->function) >= 0); +} + +int +rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2) +{ + return rte_pci_addr_cmp(addr, addr2); +} + +int +rte_pci_addr_cmp(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2) +{ + uint64_t dev_addr, dev_addr2; + + if ((addr == NULL) || (addr2 == NULL)) + return -1; + + dev_addr = ((uint64_t)addr->domain << 24) | + (addr->bus << 16) | (addr->devid << 8) | addr->function; + dev_addr2 = ((uint64_t)addr2->domain << 24) | + (addr2->bus << 16) | (addr2->devid << 8) | addr2->function; + + if (dev_addr > dev_addr2) + return 1; + else if (dev_addr < dev_addr2) + return -1; + else + return 0; +} + +int +rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr) +{ + if (pci_bdf_parse(str, addr) == 0 || + pci_dbdf_parse(str, addr) == 0) + return 0; + return -1; +} + + +/* map a particular resource from a file */ +void * +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) +{ + void *mapaddr; + + /* Map the PCI memory resource of device */ + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, + MAP_SHARED | additional_flags, fd, offset); + if (mapaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", + __func__, fd, requested_addr, + (unsigned long)size, (unsigned long)offset, + strerror(errno), mapaddr); + } else + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); + + return mapaddr; +} + +/* unmap a particular resource */ +void +pci_unmap_resource(void *requested_addr, size_t size) +{ + if (requested_addr == NULL) + return; + + /* Unmap the PCI memory resource of device */ + if (munmap(requested_addr, size)) { + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", + __func__, requested_addr, (unsigned long)size, + strerror(errno)); + } else + RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", + requested_addr); +} diff --git a/lib/librte_pci/rte_pci.h b/lib/librte_pci/rte_pci.h new file mode 100644 index 00000000..4f2cd187 --- /dev/null +++ b/lib/librte_pci/rte_pci.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright 2013-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_H_ +#define _RTE_PCI_H_ + +/** + * @file + * + * RTE PCI Library + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include <sys/queue.h> +#include <stdint.h> +#include <inttypes.h> + +#include <rte_debug.h> +#include <rte_interrupts.h> + +/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ +#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 +#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X") + +/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ +#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Nb. of values in PCI device identifier format string. */ +#define PCI_FMT_NVAL 4 + +/** Nb. of values in PCI resource format. */ +#define PCI_RESOURCE_FMT_NVAL 3 + +/** Maximum number of PCI resources. */ +#define PCI_MAX_RESOURCE 6 + +/** + * A structure describing an ID for a PCI driver. Each driver provides a + * table of these IDs for each device that it supports. + */ +struct rte_pci_id { + uint32_t class_id; /**< Class ID or RTE_CLASS_ANY_ID. */ + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ + uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ +}; + +/** + * A structure describing the location of a PCI device. + */ +struct rte_pci_addr { + uint32_t domain; /**< Device domain */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ +}; + +/** Any PCI device identifier (vendor, device, ...) */ +#define PCI_ANY_ID (0xffff) +#define RTE_CLASS_ANY_ID (0xffffff) + +/** + * A structure describing a PCI mapping. + */ +struct pci_map { + void *addr; + char *path; + uint64_t offset; + uint64_t size; + uint64_t phaddr; +}; + +struct pci_msix_table { + int bar_index; + uint32_t offset; + uint32_t size; +}; + +/** + * A structure describing a mapped PCI resource. + * For multi-process we need to reproduce all PCI mappings in secondary + * processes, so save them in a tailq. + */ +struct mapped_pci_resource { + TAILQ_ENTRY(mapped_pci_resource) next; + + struct rte_pci_addr pci_addr; + char path[PATH_MAX]; + int nb_maps; + struct pci_map maps[PCI_MAX_RESOURCE]; + struct pci_msix_table msix_table; +}; + + +/** mapped pci device list */ +TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); + +/** + * @deprecated + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided without + * a domain prefix (i.e. domain returned is always 0) + * + * @param input + * The input string to be parsed. Should have the format XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned. + * Domain will always be returned as 0 + * @return + * 0 on success, negative on error. + */ +int eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr); + +/** + * @deprecated + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided including + * a domain prefix. + * + * @param input + * The input string to be parsed. Should have the format XXXX:XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned + * @return + * 0 on success, negative on error. + */ +int eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr); + +/** + * Utility function to write a pci device name, this device name can later be + * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* + * BDF helpers. + * + * @param addr + * The PCI Bus-Device-Function address + * @param output + * The output buffer string + * @param size + * The output buffer size + */ +void rte_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size); + +/** + * @deprecated + * Utility function to compare two PCI device addresses. + * + * @param addr + * The PCI Bus-Device-Function address to compare + * @param addr2 + * The PCI Bus-Device-Function address to compare + * @return + * 0 on equal PCI address. + * Positive on addr is greater than addr2. + * Negative on addr is less than addr2, or error. + */ +int rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2); + +/** + * Utility function to compare two PCI device addresses. + * + * @param addr + * The PCI Bus-Device-Function address to compare + * @param addr2 + * The PCI Bus-Device-Function address to compare + * @return + * 0 on equal PCI address. + * Positive on addr is greater than addr2. + * Negative on addr is less than addr2, or error. + */ +int rte_pci_addr_cmp(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2); + + +/** + * Utility function to parse a string into a PCI location. + * + * @param str + * The string to parse + * @param addr + * The reference to the structure where the location + * is stored. + * @return + * 0 on success + * <0 otherwise + */ +int rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr); + +/** + * Map a particular resource from a file. + * + * @param requested_addr + * The starting address for the new mapping range. + * @param fd + * The file descriptor. + * @param offset + * The offset for the mapping range. + * @param size + * The size for the mapping range. + * @param additional_flags + * The additional flags for the mapping range. + * @return + * - On success, the function returns a pointer to the mapped area. + * - On error, the value MAP_FAILED is returned. + */ +void *pci_map_resource(void *requested_addr, int fd, off_t offset, + size_t size, int additional_flags); + +/** + * Unmap a particular resource. + * + * @param requested_addr + * The address for the unmapping range. + * @param size + * The size for the unmapping range. + */ +void pci_unmap_resource(void *requested_addr, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PCI_H_ */ diff --git a/lib/librte_pci/rte_pci_version.map b/lib/librte_pci/rte_pci_version.map new file mode 100644 index 00000000..15d93d95 --- /dev/null +++ b/lib/librte_pci/rte_pci_version.map @@ -0,0 +1,15 @@ +DPDK_17.11 { + global: + + eal_parse_pci_BDF; + eal_parse_pci_DomBDF; + rte_pci_addr_cmp; + rte_pci_addr_parse; + rte_pci_device_name; + pci_map_resource; + pci_unmap_resource; + rte_eal_compare_pci_addr; + rte_pci_device_name; + + local: *; +}; diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile index 1c03bcbb..11c3e4e9 100644 --- a/lib/librte_pdump/Makefile +++ b/lib/librte_pdump/Makefile @@ -37,10 +37,11 @@ LIB = librte_pdump.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 CFLAGS += -D_GNU_SOURCE LDLIBS += -lpthread +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev EXPORT_MAP := rte_pdump_version.map -LIBABIVER := 1 +LIBABIVER := 2 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index 729e79a3..e6182d35 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -207,7 +207,7 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) } static uint16_t -pdump_rx(uint8_t port __rte_unused, uint16_t qidx __rte_unused, +pdump_rx(uint16_t port __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *user_params) @@ -217,7 +217,7 @@ pdump_rx(uint8_t port __rte_unused, uint16_t qidx __rte_unused, } static uint16_t -pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused, +pdump_tx(uint16_t port __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) { pdump_copy(pkts, nb_pkts, user_params); @@ -225,7 +225,7 @@ pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused, } static int -pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, +pdump_regitser_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, struct rte_ring *ring, struct rte_mempool *mp, uint16_t operation) { @@ -279,7 +279,7 @@ pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, } static int -pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, +pdump_regitser_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, struct rte_ring *ring, struct rte_mempool *mp, uint16_t operation) { @@ -337,7 +337,7 @@ static int set_pdump_rxtx_cbs(struct pdump_request *p) { uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue; - uint8_t port; + uint16_t port; int ret = 0; uint32_t flags; uint16_t operation; @@ -764,7 +764,7 @@ pdump_validate_flags(uint32_t flags) } static int -pdump_validate_port(uint8_t port, char *name) +pdump_validate_port(uint16_t port, char *name) { int ret = 0; @@ -828,7 +828,7 @@ pdump_prepare_client_request(char *device, uint16_t queue, } int -rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags, +rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags, struct rte_ring *ring, struct rte_mempool *mp, void *filter) @@ -876,7 +876,7 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue, } int -rte_pdump_disable(uint8_t port, uint16_t queue, uint32_t flags) +rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags) { int ret = 0; char name[DEVICE_ID_SIZE]; diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h index ba6e39b0..4ec0a106 100644 --- a/lib/librte_pdump/rte_pdump.h +++ b/lib/librte_pdump/rte_pdump.h @@ -113,7 +113,7 @@ rte_pdump_uninit(void); */ int -rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags, +rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags, struct rte_ring *ring, struct rte_mempool *mp, void *filter); @@ -136,7 +136,7 @@ rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags, */ int -rte_pdump_disable(uint8_t port, uint16_t queue, uint32_t flags); +rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags); /** * Enables packet capturing on given device id and queue. diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile index 7a835fd5..a8285738 100644 --- a/lib/librte_pipeline/Makefile +++ b/lib/librte_pipeline/Makefile @@ -38,6 +38,8 @@ LIB = librte_pipeline.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_table +LDLIBS += -lrte_port EXPORT_MAP := rte_pipeline_version.map diff --git a/lib/librte_pipeline/rte_pipeline.c b/lib/librte_pipeline/rte_pipeline.c index 7f8fbac5..8611a88b 100644 --- a/lib/librte_pipeline/rte_pipeline.c +++ b/lib/librte_pipeline/rte_pipeline.c @@ -36,7 +36,6 @@ #include <rte_common.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_cycles.h> #include <rte_prefetch.h> #include <rte_branch_prediction.h> diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile index 76629a13..139dc59a 100644 --- a/lib/librte_port/Makefile +++ b/lib/librte_port/Makefile @@ -38,6 +38,11 @@ LIB = librte_port.a ifeq ($(CONFIG_RTE_PORT_PCAP),y) LDLIBS += -lpcap endif +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev +LDLIBS += -lrte_ip_frag -lrte_sched +ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) +LDLIBS += -lrte_kni +endif CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) diff --git a/lib/librte_port/rte_port_ethdev.c b/lib/librte_port/rte_port_ethdev.c index d5c5fba5..4ed10f27 100644 --- a/lib/librte_port/rte_port_ethdev.c +++ b/lib/librte_port/rte_port_ethdev.c @@ -60,7 +60,7 @@ struct rte_port_ethdev_reader { struct rte_port_in_stats stats; uint16_t queue_id; - uint8_t port_id; + uint16_t port_id; }; static void * @@ -156,7 +156,7 @@ struct rte_port_ethdev_writer { uint16_t tx_buf_count; uint64_t bsz_mask; uint16_t queue_id; - uint8_t port_id; + uint16_t port_id; }; static void * @@ -337,7 +337,7 @@ struct rte_port_ethdev_writer_nodrop { uint64_t bsz_mask; uint64_t n_retries; uint16_t queue_id; - uint8_t port_id; + uint16_t port_id; }; static void * diff --git a/lib/librte_port/rte_port_ethdev.h b/lib/librte_port/rte_port_ethdev.h index 201a79e4..f5ed9ab2 100644 --- a/lib/librte_port/rte_port_ethdev.h +++ b/lib/librte_port/rte_port_ethdev.h @@ -54,7 +54,7 @@ extern "C" { /** ethdev_reader port parameters */ struct rte_port_ethdev_reader_params { /** NIC RX port ID */ - uint8_t port_id; + uint16_t port_id; /** NIC RX queue ID */ uint16_t queue_id; @@ -66,7 +66,7 @@ extern struct rte_port_in_ops rte_port_ethdev_reader_ops; /** ethdev_writer port parameters */ struct rte_port_ethdev_writer_params { /** NIC RX port ID */ - uint8_t port_id; + uint16_t port_id; /** NIC RX queue ID */ uint16_t queue_id; @@ -82,7 +82,7 @@ extern struct rte_port_out_ops rte_port_ethdev_writer_ops; /** ethdev_writer_nodrop port parameters */ struct rte_port_ethdev_writer_nodrop_params { /** NIC RX port ID */ - uint8_t port_id; + uint16_t port_id; /** NIC RX queue ID */ uint16_t queue_id; diff --git a/lib/librte_power/Makefile b/lib/librte_power/Makefile index 06cd10e8..1b1491d7 100644 --- a/lib/librte_power/Makefile +++ b/lib/librte_power/Makefile @@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_power.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing +LDLIBS += -lrte_eal EXPORT_MAP := rte_power_version.map diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h index 383897bf..f0f5f0a2 100644 --- a/lib/librte_power/channel_commands.h +++ b/lib/librte_power/channel_commands.h @@ -39,6 +39,7 @@ extern "C" { #endif #include <stdint.h> +#include <stdbool.h> /* Maximum number of channels per VM */ #define CHANNEL_CMDS_MAX_VM_CHANNELS 64 @@ -46,17 +47,60 @@ extern "C" { /* Valid Commands */ #define CPU_POWER 1 #define CPU_POWER_CONNECT 2 +#define PKT_POLICY 3 /* CPU Power Command Scaling */ #define CPU_POWER_SCALE_UP 1 #define CPU_POWER_SCALE_DOWN 2 #define CPU_POWER_SCALE_MAX 3 #define CPU_POWER_SCALE_MIN 4 +#define CPU_POWER_ENABLE_TURBO 5 +#define CPU_POWER_DISABLE_TURBO 6 +#define HOURS 24 + +#define MAX_VFS 10 +#define VM_MAX_NAME_SZ 32 + +#define MAX_VCPU_PER_VM 8 + +struct t_boost_status { + bool tbEnabled; +}; + +struct timer_profile { + int busy_hours[HOURS]; + int quiet_hours[HOURS]; + int hours_to_use_traffic_profile[HOURS]; +}; + +enum workload {HIGH, MEDIUM, LOW}; +enum policy_to_use { + TRAFFIC, + TIME, + WORKLOAD +}; + +struct traffic { + uint32_t min_packet_thresh; + uint32_t avg_max_packet_thresh; + uint32_t max_max_packet_thresh; +}; struct channel_packet { uint64_t resource_id; /**< core_num, device */ uint32_t unit; /**< scale down/up/min/max */ uint32_t command; /**< Power, IO, etc */ + char vm_name[VM_MAX_NAME_SZ]; + + uint64_t vfid[MAX_VFS]; + int nb_mac_to_monitor; + struct traffic traffic_policy; + uint8_t vcpu_to_control[MAX_VCPU_PER_VM]; + uint8_t num_vcpu; + struct timer_profile timer_policy; + enum workload workload; + enum policy_to_use policy_to_use; + struct t_boost_status t_boost_status; }; diff --git a/lib/librte_power/guest_channel.c b/lib/librte_power/guest_channel.c index 85c92fab..fa5de0f5 100644 --- a/lib/librte_power/guest_channel.c +++ b/lib/librte_power/guest_channel.c @@ -148,6 +148,13 @@ guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id) return 0; } +int rte_power_guest_channel_send_msg(struct channel_packet *pkt, + unsigned int lcore_id) +{ + return guest_channel_send_msg(pkt, lcore_id); +} + + void guest_channel_host_disconnect(unsigned lcore_id) { diff --git a/lib/librte_power/guest_channel.h b/lib/librte_power/guest_channel.h index 9e18af52..741339ca 100644 --- a/lib/librte_power/guest_channel.h +++ b/lib/librte_power/guest_channel.h @@ -81,6 +81,21 @@ void guest_channel_host_disconnect(unsigned lcore_id); */ int guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id); +/** + * Send a message contained in pkt over the Virtio-Serial to the host endpoint. + * + * @param pkt + * Pointer to a populated struct channel_packet + * + * @param lcore_id + * lcore_id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_guest_channel_send_msg(struct channel_packet *pkt, + unsigned int lcore_id); #ifdef __cplusplus } diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c index 998ed1c9..b327a865 100644 --- a/lib/librte_power/rte_power.c +++ b/lib/librte_power/rte_power.c @@ -50,6 +50,9 @@ rte_power_freq_change_t rte_power_freq_up = NULL; rte_power_freq_change_t rte_power_freq_down = NULL; rte_power_freq_change_t rte_power_freq_max = NULL; rte_power_freq_change_t rte_power_freq_min = NULL; +rte_power_freq_change_t rte_power_turbo_status; +rte_power_freq_change_t rte_power_freq_enable_turbo; +rte_power_freq_change_t rte_power_freq_disable_turbo; int rte_power_set_env(enum power_management_env env) @@ -65,6 +68,9 @@ rte_power_set_env(enum power_management_env env) rte_power_freq_down = rte_power_acpi_cpufreq_freq_down; rte_power_freq_min = rte_power_acpi_cpufreq_freq_min; rte_power_freq_max = rte_power_acpi_cpufreq_freq_max; + rte_power_turbo_status = rte_power_acpi_turbo_status; + rte_power_freq_enable_turbo = rte_power_acpi_enable_turbo; + rte_power_freq_disable_turbo = rte_power_acpi_disable_turbo; } else if (env == PM_ENV_KVM_VM) { rte_power_freqs = rte_power_kvm_vm_freqs; rte_power_get_freq = rte_power_kvm_vm_get_freq; @@ -73,6 +79,9 @@ rte_power_set_env(enum power_management_env env) rte_power_freq_down = rte_power_kvm_vm_freq_down; rte_power_freq_min = rte_power_kvm_vm_freq_min; rte_power_freq_max = rte_power_kvm_vm_freq_max; + rte_power_turbo_status = rte_power_kvm_vm_turbo_status; + rte_power_freq_enable_turbo = rte_power_kvm_vm_enable_turbo; + rte_power_freq_disable_turbo = rte_power_kvm_vm_disable_turbo; } else { RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n", env); diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h index 67e0ec02..b17b7a53 100644 --- a/lib/librte_power/rte_power.h +++ b/lib/librte_power/rte_power.h @@ -236,6 +236,47 @@ extern rte_power_freq_change_t rte_power_freq_max; */ extern rte_power_freq_change_t rte_power_freq_min; +/** + * Query the Turbo Boost status of a specific lcore. + * Review each environments specific documentation for usage.. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 Turbo Boost is enabled for this lcore. + * - 0 Turbo Boost is disabled for this lcore. + * - Negative on error. + */ +extern rte_power_freq_change_t rte_power_turbo_status; + +/** + * Enable Turbo Boost for this lcore. + * Review each environments specific documentation for usage.. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +extern rte_power_freq_change_t rte_power_freq_enable_turbo; + +/** + * Disable Turbo Boost for this lcore. + * Review each environments specific documentation for usage.. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +extern rte_power_freq_change_t rte_power_freq_disable_turbo; + + #ifdef __cplusplus } #endif diff --git a/lib/librte_power/rte_power_acpi_cpufreq.c b/lib/librte_power/rte_power_acpi_cpufreq.c index a56c9b59..01ac5acb 100644 --- a/lib/librte_power/rte_power_acpi_cpufreq.c +++ b/lib/librte_power/rte_power_acpi_cpufreq.c @@ -87,6 +87,14 @@ #define POWER_SYSFILE_SETSPEED \ "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed" +/* + * MSR related + */ +#define PLATFORM_INFO 0x0CE +#define TURBO_RATIO_LIMIT 0x1AD +#define IA32_PERF_CTL 0x199 +#define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32) + enum power_state { POWER_IDLE = 0, POWER_ONGOING, @@ -105,6 +113,8 @@ struct rte_power_info { char governor_ori[32]; /**< Original governor name */ uint32_t curr_idx; /**< Freq index in freqs array */ volatile uint32_t state; /**< Power in use state */ + uint16_t turbo_available; /**< Turbo Boost available */ + uint16_t turbo_enable; /**< Turbo Boost enable/disable */ } __rte_cache_aligned; static struct rte_power_info lcore_power_info[RTE_MAX_LCORE]; @@ -244,6 +254,18 @@ power_get_available_freqs(struct rte_power_info *pi) POWER_CONVERT_TO_DECIMAL); } + if ((pi->freqs[0]-1000) == pi->freqs[1]) { + pi->turbo_available = 1; + pi->turbo_enable = 1; + POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n", + pi->lcore_id); + } else { + pi->turbo_available = 0; + pi->turbo_enable = 0; + POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n", + pi->lcore_id); + } + ret = 0; POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n", count, pi->lcore_id); @@ -525,7 +547,17 @@ rte_power_acpi_cpufreq_freq_max(unsigned lcore_id) } /* Frequencies in the array are from high to low. */ - return set_freq_internal(&lcore_power_info[lcore_id], 0); + if (lcore_power_info[lcore_id].turbo_available) { + if (lcore_power_info[lcore_id].turbo_enable) + /* Set to Turbo */ + return set_freq_internal( + &lcore_power_info[lcore_id], 0); + else + /* Set to max non-turbo */ + return set_freq_internal( + &lcore_power_info[lcore_id], 1); + } else + return set_freq_internal(&lcore_power_info[lcore_id], 0); } int @@ -543,3 +575,80 @@ rte_power_acpi_cpufreq_freq_min(unsigned lcore_id) /* Frequencies in the array are from high to low. */ return set_freq_internal(pi, pi->nb_freqs - 1); } + + +int +rte_power_acpi_turbo_status(unsigned int lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + + return pi->turbo_enable; +} + + +int +rte_power_acpi_enable_turbo(unsigned int lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + + if (pi->turbo_available) + pi->turbo_enable = 1; + else { + pi->turbo_enable = 0; + RTE_LOG(ERR, POWER, + "Failed to enable turbo on lcore %u\n", + lcore_id); + return -1; + } + + /* Max may have changed, so call to max function */ + if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) { + RTE_LOG(ERR, POWER, + "Failed to set frequency of lcore %u to max\n", + lcore_id); + return -1; + } + + return 0; +} + +int +rte_power_acpi_disable_turbo(unsigned int lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + + pi->turbo_enable = 0; + + if ((pi->turbo_available) && (pi->curr_idx <= 1)) { + /* Try to set freq to max by default coming out of turbo */ + if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) { + RTE_LOG(ERR, POWER, + "Failed to set frequency of lcore %u to max\n", + lcore_id); + return -1; + } + } + + return 0; +} diff --git a/lib/librte_power/rte_power_acpi_cpufreq.h b/lib/librte_power/rte_power_acpi_cpufreq.h index 68578e9b..eee0ca0a 100644 --- a/lib/librte_power/rte_power_acpi_cpufreq.h +++ b/lib/librte_power/rte_power_acpi_cpufreq.h @@ -185,6 +185,46 @@ int rte_power_acpi_cpufreq_freq_max(unsigned lcore_id); */ int rte_power_acpi_cpufreq_freq_min(unsigned lcore_id); +/** + * Get the turbo status of a specific lcore. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 Turbo Boost is enabled on this lcore. + * - 0 Turbo Boost is disabled on this lcore. + * - Negative on error. + */ +int rte_power_acpi_turbo_status(unsigned int lcore_id); + +/** + * Enable Turbo Boost on a specific lcore. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 Turbo Boost is enabled successfully on this lcore. + * - Negative on error. + */ +int rte_power_acpi_enable_turbo(unsigned int lcore_id); + +/** + * Disable Turbo Boost on a specific lcore. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 Turbo Boost disabled successfully on this lcore. + * - Negative on error. + */ +int rte_power_acpi_disable_turbo(unsigned int lcore_id); + #ifdef __cplusplus } #endif diff --git a/lib/librte_power/rte_power_kvm_vm.c b/lib/librte_power/rte_power_kvm_vm.c index a1badf34..99060625 100644 --- a/lib/librte_power/rte_power_kvm_vm.c +++ b/lib/librte_power/rte_power_kvm_vm.c @@ -134,3 +134,22 @@ rte_power_kvm_vm_freq_min(unsigned lcore_id) { return send_msg(lcore_id, CPU_POWER_SCALE_MIN); } + +int +rte_power_kvm_vm_turbo_status(__attribute__((unused)) unsigned int lcore_id) +{ + RTE_LOG(ERR, POWER, "rte_power_turbo_status is not implemented for Virtual Machine Power Management\n"); + return -ENOTSUP; +} + +int +rte_power_kvm_vm_enable_turbo(unsigned int lcore_id) +{ + return send_msg(lcore_id, CPU_POWER_ENABLE_TURBO); +} + +int +rte_power_kvm_vm_disable_turbo(unsigned int lcore_id) +{ + return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO); +} diff --git a/lib/librte_power/rte_power_kvm_vm.h b/lib/librte_power/rte_power_kvm_vm.h index dcbc878a..9af41d64 100644 --- a/lib/librte_power/rte_power_kvm_vm.h +++ b/lib/librte_power/rte_power_kvm_vm.h @@ -172,8 +172,41 @@ int rte_power_kvm_vm_freq_max(unsigned lcore_id); */ int rte_power_kvm_vm_freq_min(unsigned lcore_id); +/** + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * -ENOTSUP + */ +int rte_power_kvm_vm_turbo_status(unsigned int lcore_id); + +/** + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_enable_turbo(unsigned int lcore_id); + +/** + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_disable_turbo(unsigned int lcore_id); #ifdef __cplusplus } #endif - #endif diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map index db75ff3e..96dc42ec 100644 --- a/lib/librte_power/rte_power_version.map +++ b/lib/librte_power/rte_power_version.map @@ -16,3 +16,13 @@ DPDK_2.0 { local: *; }; + +DPDK_17.11 { + global: + + rte_power_guest_channel_send_msg; + rte_power_freq_disable_turbo; + rte_power_freq_enable_turbo; + rte_power_turbo_status; + +} DPDK_2.0;
\ No newline at end of file diff --git a/lib/librte_reorder/Makefile b/lib/librte_reorder/Makefile index 4e44e72f..5d38d712 100644 --- a/lib/librte_reorder/Makefile +++ b/lib/librte_reorder/Makefile @@ -36,6 +36,7 @@ LIB = librte_reorder.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf EXPORT_MAP := rte_reorder_version.map diff --git a/lib/librte_reorder/rte_reorder.c b/lib/librte_reorder/rte_reorder.c index 010dff68..867775da 100644 --- a/lib/librte_reorder/rte_reorder.c +++ b/lib/librte_reorder/rte_reorder.c @@ -36,7 +36,6 @@ #include <rte_log.h> #include <rte_mbuf.h> -#include <rte_memzone.h> #include <rte_eal_memconfig.h> #include <rte_errno.h> #include <rte_malloc.h> diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile index 3e2f4b87..e34d9d95 100644 --- a/lib/librte_ring/Makefile +++ b/lib/librte_ring/Makefile @@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_ring.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal EXPORT_MAP := rte_ring_version.map diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 8f5a4937..5e9b3b7b 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -174,7 +174,7 @@ struct rte_ring { * ring space will be wasted. */ #define RING_F_EXACT_SZ 0x0004 -#define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */ +#define RTE_RING_SZ_MASK (0x7fffffffU) /**< Ring size mask */ /* @internal defines for passing to the enqueue dequeue worker functions */ #define __IS_SP 1 diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 18274e73..04785f72 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -43,6 +43,8 @@ CFLAGS_rte_red.o := -D_GNU_SOURCE LDLIBS += -lm LDLIBS += -lrt +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_net +LDLIBS += -lrte_timer EXPORT_MAP := rte_sched_version.map @@ -55,7 +57,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_sched_common.h rte_red.h rte_approx.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index b7cba110..a2d0d685 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -42,9 +42,9 @@ #include <rte_prefetch.h> #include <rte_branch_prediction.h> #include <rte_mbuf.h> +#include <rte_bitmap.h> #include "rte_sched.h" -#include "rte_bitmap.h" #include "rte_sched_common.h" #include "rte_approx.h" #include "rte_reciprocal.h" diff --git a/lib/librte_security/Makefile b/lib/librte_security/Makefile new file mode 100644 index 00000000..bb93ec33 --- /dev/null +++ b/lib/librte_security/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_security.a + +# library version +LIBABIVER := 1 + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mempool + +# library source files +SRCS-y += rte_security.c + +# export include files +SYMLINK-y-include += rte_security.h +SYMLINK-y-include += rte_security_driver.h + +# versioning export map +EXPORT_MAP := rte_security_version.map + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c new file mode 100644 index 00000000..1227fca8 --- /dev/null +++ b/lib/librte_security/rte_security.c @@ -0,0 +1,149 @@ +/*- + * BSD LICENSE + * + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_malloc.h> +#include <rte_dev.h> + +#include "rte_security.h" +#include "rte_security_driver.h" + +struct rte_security_session * +rte_security_session_create(struct rte_security_ctx *instance, + struct rte_security_session_conf *conf, + struct rte_mempool *mp) +{ + struct rte_security_session *sess = NULL; + + if (conf == NULL) + return NULL; + + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_create, NULL); + + if (rte_mempool_get(mp, (void *)&sess)) + return NULL; + + if (instance->ops->session_create(instance->device, conf, sess, mp)) { + rte_mempool_put(mp, (void *)sess); + return NULL; + } + instance->sess_cnt++; + + return sess; +} + +int +rte_security_session_update(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_session_conf *conf) +{ + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_update, -ENOTSUP); + return instance->ops->session_update(instance->device, sess, conf); +} + +int +rte_security_session_stats_get(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_stats *stats) +{ + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_stats_get, -ENOTSUP); + return instance->ops->session_stats_get(instance->device, sess, stats); +} + +int +rte_security_session_destroy(struct rte_security_ctx *instance, + struct rte_security_session *sess) +{ + int ret; + struct rte_mempool *mp = rte_mempool_from_obj(sess); + + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP); + + if (instance->sess_cnt) + instance->sess_cnt--; + + ret = instance->ops->session_destroy(instance->device, sess); + if (!ret) + rte_mempool_put(mp, (void *)sess); + + return ret; +} + +int +rte_security_set_pkt_metadata(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_mbuf *m, void *params) +{ + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->set_pkt_metadata, -ENOTSUP); + return instance->ops->set_pkt_metadata(instance->device, + sess, m, params); +} + +const struct rte_security_capability * +rte_security_capabilities_get(struct rte_security_ctx *instance) +{ + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL); + return instance->ops->capabilities_get(instance->device); +} + +const struct rte_security_capability * +rte_security_capability_get(struct rte_security_ctx *instance, + struct rte_security_capability_idx *idx) +{ + const struct rte_security_capability *capabilities; + const struct rte_security_capability *capability; + uint16_t i = 0; + + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL); + capabilities = instance->ops->capabilities_get(instance->device); + + if (capabilities == NULL) + return NULL; + + while ((capability = &capabilities[i++])->action + != RTE_SECURITY_ACTION_TYPE_NONE) { + if (capability->action == idx->action && + capability->protocol == idx->protocol) { + if (idx->protocol == RTE_SECURITY_PROTOCOL_IPSEC) { + if (capability->ipsec.proto == + idx->ipsec.proto && + capability->ipsec.mode == + idx->ipsec.mode && + capability->ipsec.direction == + idx->ipsec.direction) + return capability; + } + } + } + + return NULL; +} diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h new file mode 100644 index 00000000..7e687d29 --- /dev/null +++ b/lib/librte_security/rte_security.h @@ -0,0 +1,529 @@ +/*- + * BSD LICENSE + * + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SECURITY_H_ +#define _RTE_SECURITY_H_ + +/** + * @file rte_security.h + * @b EXPERIMENTAL: this API may change without prior notice + * + * RTE Security Common Definitions + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> + +#include <rte_common.h> +#include <rte_crypto.h> +#include <rte_mbuf.h> +#include <rte_memory.h> +#include <rte_mempool.h> + +/** IPSec protocol mode */ +enum rte_security_ipsec_sa_mode { + RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT, + /**< IPSec Transport mode */ + RTE_SECURITY_IPSEC_SA_MODE_TUNNEL, + /**< IPSec Tunnel mode */ +}; + +/** IPSec Protocol */ +enum rte_security_ipsec_sa_protocol { + RTE_SECURITY_IPSEC_SA_PROTO_AH, + /**< AH protocol */ + RTE_SECURITY_IPSEC_SA_PROTO_ESP, + /**< ESP protocol */ +}; + +/** IPSEC tunnel type */ +enum rte_security_ipsec_tunnel_type { + RTE_SECURITY_IPSEC_TUNNEL_IPV4, + /**< Outer header is IPv4 */ + RTE_SECURITY_IPSEC_TUNNEL_IPV6, + /**< Outer header is IPv6 */ +}; + +/** + * Security context for crypto/eth devices + * + * Security instance for each driver to register security operations. + * The application can get the security context from the crypto/eth device id + * using the APIs rte_cryptodev_get_sec_ctx()/rte_eth_dev_get_sec_ctx() + * This structure is used to identify the device(crypto/eth) for which the + * security operations need to be performed. + */ +struct rte_security_ctx { + void *device; + /**< Crypto/ethernet device attached */ + struct rte_security_ops *ops; + /**< Pointer to security ops for the device */ + uint16_t sess_cnt; + /**< Number of sessions attached to this context */ +}; + +/** + * IPSEC tunnel parameters + * + * These parameters are used to build outbound tunnel headers. + */ +struct rte_security_ipsec_tunnel_param { + enum rte_security_ipsec_tunnel_type type; + /**< Tunnel type: IPv4 or IPv6 */ + RTE_STD_C11 + union { + struct { + struct in_addr src_ip; + /**< IPv4 source address */ + struct in_addr dst_ip; + /**< IPv4 destination address */ + uint8_t dscp; + /**< IPv4 Differentiated Services Code Point */ + uint8_t df; + /**< IPv4 Don't Fragment bit */ + uint8_t ttl; + /**< IPv4 Time To Live */ + } ipv4; + /**< IPv4 header parameters */ + struct { + struct in6_addr src_addr; + /**< IPv6 source address */ + struct in6_addr dst_addr; + /**< IPv6 destination address */ + uint8_t dscp; + /**< IPv6 Differentiated Services Code Point */ + uint32_t flabel; + /**< IPv6 flow label */ + uint8_t hlimit; + /**< IPv6 hop limit */ + } ipv6; + /**< IPv6 header parameters */ + }; +}; + +/** + * IPsec Security Association option flags + */ +struct rte_security_ipsec_sa_options { + /**< Extended Sequence Numbers (ESN) + * + * * 1: Use extended (64 bit) sequence numbers + * * 0: Use normal sequence numbers + */ + uint32_t esn : 1; + + /**< UDP encapsulation + * + * * 1: Do UDP encapsulation/decapsulation so that IPSEC packets can + * traverse through NAT boxes. + * * 0: No UDP encapsulation + */ + uint32_t udp_encap : 1; + + /**< Copy DSCP bits + * + * * 1: Copy IPv4 or IPv6 DSCP bits from inner IP header to + * the outer IP header in encapsulation, and vice versa in + * decapsulation. + * * 0: Do not change DSCP field. + */ + uint32_t copy_dscp : 1; + + /**< Copy IPv6 Flow Label + * + * * 1: Copy IPv6 flow label from inner IPv6 header to the + * outer IPv6 header. + * * 0: Outer header is not modified. + */ + uint32_t copy_flabel : 1; + + /**< Copy IPv4 Don't Fragment bit + * + * * 1: Copy the DF bit from the inner IPv4 header to the outer + * IPv4 header. + * * 0: Outer header is not modified. + */ + uint32_t copy_df : 1; + + /**< Decrement inner packet Time To Live (TTL) field + * + * * 1: In tunnel mode, decrement inner packet IPv4 TTL or + * IPv6 Hop Limit after tunnel decapsulation, or before tunnel + * encapsulation. + * * 0: Inner packet is not modified. + */ + uint32_t dec_ttl : 1; +}; + +/** IPSec security association direction */ +enum rte_security_ipsec_sa_direction { + RTE_SECURITY_IPSEC_SA_DIR_EGRESS, + /**< Encrypt and generate digest */ + RTE_SECURITY_IPSEC_SA_DIR_INGRESS, + /**< Verify digest and decrypt */ +}; + +/** + * IPsec security association configuration data. + * + * This structure contains data required to create an IPsec SA security session. + */ +struct rte_security_ipsec_xform { + uint32_t spi; + /**< SA security parameter index */ + uint32_t salt; + /**< SA salt */ + struct rte_security_ipsec_sa_options options; + /**< various SA options */ + enum rte_security_ipsec_sa_direction direction; + /**< IPSec SA Direction - Egress/Ingress */ + enum rte_security_ipsec_sa_protocol proto; + /**< IPsec SA Protocol - AH/ESP */ + enum rte_security_ipsec_sa_mode mode; + /**< IPsec SA Mode - transport/tunnel */ + struct rte_security_ipsec_tunnel_param tunnel; + /**< Tunnel parameters, NULL for transport mode */ +}; + +/** + * MACsec security session configuration + */ +struct rte_security_macsec_xform { + /** To be Filled */ +}; + +/** + * Security session action type. + */ +enum rte_security_session_action_type { + RTE_SECURITY_ACTION_TYPE_NONE, + /**< No security actions */ + RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO, + /**< Crypto processing for security protocol is processed inline + * during transmission + */ + RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL, + /**< All security protocol processing is performed inline during + * transmission + */ + RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL + /**< All security protocol processing including crypto is performed + * on a lookaside accelerator + */ +}; + +/** Security session protocol definition */ +enum rte_security_session_protocol { + RTE_SECURITY_PROTOCOL_IPSEC, + /**< IPsec Protocol */ + RTE_SECURITY_PROTOCOL_MACSEC, + /**< MACSec Protocol */ +}; + +/** + * Security session configuration + */ +struct rte_security_session_conf { + enum rte_security_session_action_type action_type; + /**< Type of action to be performed on the session */ + enum rte_security_session_protocol protocol; + /**< Security protocol to be configured */ + union { + struct rte_security_ipsec_xform ipsec; + struct rte_security_macsec_xform macsec; + }; + /**< Configuration parameters for security session */ + struct rte_crypto_sym_xform *crypto_xform; + /**< Security Session Crypto Transformations */ +}; + +struct rte_security_session { + void *sess_private_data; + /**< Private session material */ +}; + +/** + * Create security session as specified by the session configuration + * + * @param instance security instance + * @param conf session configuration parameters + * @param mp mempool to allocate session objects from + * @return + * - On success, pointer to session + * - On failure, NULL + */ +struct rte_security_session * +rte_security_session_create(struct rte_security_ctx *instance, + struct rte_security_session_conf *conf, + struct rte_mempool *mp); + +/** + * Update security session as specified by the session configuration + * + * @param instance security instance + * @param sess session to update parameters + * @param conf update configuration parameters + * @return + * - On success returns 0 + * - On failure return errno + */ +int +rte_security_session_update(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_session_conf *conf); + +/** + * Free security session header and the session private data and + * return it to its original mempool. + * + * @param instance security instance + * @param sess security session to freed + * + * @return + * - 0 if successful. + * - -EINVAL if session is NULL. + * - -EBUSY if not all device private data has been freed. + */ +int +rte_security_session_destroy(struct rte_security_ctx *instance, + struct rte_security_session *sess); + +/** + * Updates the buffer with device-specific defined metadata + * + * @param instance security instance + * @param sess security session + * @param mb packet mbuf to set metadata on. + * @param params device-specific defined parameters + * required for metadata + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_security_set_pkt_metadata(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_mbuf *mb, void *params); + +/** + * Attach a session to a symmetric crypto operation + * + * @param sym_op crypto operation + * @param sess security session + */ +static inline int +__rte_security_attach_session(struct rte_crypto_sym_op *sym_op, + struct rte_security_session *sess) +{ + sym_op->sec_session = sess; + + return 0; +} + +static inline void * +get_sec_session_private_data(const struct rte_security_session *sess) +{ + return sess->sess_private_data; +} + +static inline void +set_sec_session_private_data(struct rte_security_session *sess, + void *private_data) +{ + sess->sess_private_data = private_data; +} + +/** + * Attach a session to a crypto operation. + * This API is needed only in case of RTE_SECURITY_SESS_CRYPTO_PROTO_OFFLOAD + * For other rte_security_session_action_type, ol_flags in rte_mbuf may be + * defined to perform security operations. + * + * @param op crypto operation + * @param sess security session + */ +static inline int +rte_security_attach_session(struct rte_crypto_op *op, + struct rte_security_session *sess) +{ + if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) + return -EINVAL; + + op->sess_type = RTE_CRYPTO_OP_SECURITY_SESSION; + + return __rte_security_attach_session(op->sym, sess); +} + +struct rte_security_macsec_stats { + uint64_t reserved; +}; + +struct rte_security_ipsec_stats { + uint64_t reserved; + +}; + +struct rte_security_stats { + enum rte_security_session_protocol protocol; + /**< Security protocol to be configured */ + + union { + struct rte_security_macsec_stats macsec; + struct rte_security_ipsec_stats ipsec; + }; +}; + +/** + * Get security session statistics + * + * @param instance security instance + * @param sess security session + * @param stats statistics + * @return + * - On success return 0 + * - On failure errno + */ +int +rte_security_session_stats_get(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_stats *stats); + +/** + * Security capability definition + */ +struct rte_security_capability { + enum rte_security_session_action_type action; + /**< Security action type*/ + enum rte_security_session_protocol protocol; + /**< Security protocol */ + RTE_STD_C11 + union { + struct { + enum rte_security_ipsec_sa_protocol proto; + /**< IPsec SA protocol */ + enum rte_security_ipsec_sa_mode mode; + /**< IPsec SA mode */ + enum rte_security_ipsec_sa_direction direction; + /**< IPsec SA direction */ + struct rte_security_ipsec_sa_options options; + /**< IPsec SA supported options */ + } ipsec; + /**< IPsec capability */ + struct { + /* To be Filled */ + } macsec; + /**< MACsec capability */ + }; + + const struct rte_cryptodev_capabilities *crypto_capabilities; + /**< Corresponding crypto capabilities for security capability */ + + uint32_t ol_flags; + /**< Device offload flags */ +}; + +#define RTE_SECURITY_TX_OLOAD_NEED_MDATA 0x00000001 +/**< HW needs metadata update, see rte_security_set_pkt_metadata(). + */ + +#define RTE_SECURITY_TX_HW_TRAILER_OFFLOAD 0x00000002 +/**< HW constructs trailer of packets + * Transmitted packets will have the trailer added to them + * by hardawre. The next protocol field will be based on + * the mbuf->inner_esp_next_proto field. + */ +#define RTE_SECURITY_RX_HW_TRAILER_OFFLOAD 0x00010000 +/**< HW removes trailer of packets + * Received packets have no trailer, the next protocol field + * is supplied in the mbuf->inner_esp_next_proto field. + * Inner packet is not modified. + */ + +/** + * Security capability index used to query a security instance for a specific + * security capability + */ +struct rte_security_capability_idx { + enum rte_security_session_action_type action; + enum rte_security_session_protocol protocol; + + union { + struct { + enum rte_security_ipsec_sa_protocol proto; + enum rte_security_ipsec_sa_mode mode; + enum rte_security_ipsec_sa_direction direction; + } ipsec; + }; +}; + +/** + * Returns array of security instance capabilities + * + * @param instance Security instance. + * + * @return + * - Returns array of security capabilities. + * - Return NULL if no capabilities available. + */ +const struct rte_security_capability * +rte_security_capabilities_get(struct rte_security_ctx *instance); + +/** + * Query if a specific capability is available on security instance + * + * @param instance security instance. + * @param idx security capability index to match against + * + * @return + * - Returns pointer to security capability on match of capability + * index criteria. + * - Return NULL if the capability not matched on security instance. + */ +const struct rte_security_capability * +rte_security_capability_get(struct rte_security_ctx *instance, + struct rte_security_capability_idx *idx); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SECURITY_H_ */ diff --git a/lib/librte_security/rte_security_driver.h b/lib/librte_security/rte_security_driver.h new file mode 100644 index 00000000..997fbe79 --- /dev/null +++ b/lib/librte_security/rte_security_driver.h @@ -0,0 +1,156 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * Copyright 2017 NXP. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SECURITY_DRIVER_H_ +#define _RTE_SECURITY_DRIVER_H_ + +/** + * @file rte_security_driver.h + * @b EXPERIMENTAL: this API may change without prior notice + * + * RTE Security Common Definitions + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "rte_security.h" + +/** + * Configure a security session on a device. + * + * @param device Crypto/eth device pointer + * @param conf Security session configuration + * @param sess Pointer to Security private session structure + * @param mp Mempool where the private session is allocated + * + * @return + * - Returns 0 if private session structure have been created successfully. + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if crypto device does not support the crypto transform. + * - Returns -ENOMEM if the private session could not be allocated. + */ +typedef int (*security_session_create_t)(void *device, + struct rte_security_session_conf *conf, + struct rte_security_session *sess, + struct rte_mempool *mp); + +/** + * Free driver private session data. + * + * @param dev Crypto/eth device pointer + * @param sess Security session structure + */ +typedef int (*security_session_destroy_t)(void *device, + struct rte_security_session *sess); + +/** + * Update driver private session data. + * + * @param device Crypto/eth device pointer + * @param sess Pointer to Security private session structure + * @param conf Security session configuration + * + * @return + * - Returns 0 if private session structure have been updated successfully. + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if crypto device does not support the crypto transform. + */ +typedef int (*security_session_update_t)(void *device, + struct rte_security_session *sess, + struct rte_security_session_conf *conf); +/** + * Get stats from the PMD. + * + * @param device Crypto/eth device pointer + * @param sess Pointer to Security private session structure + * @param stats Security stats of the driver + * + * @return + * - Returns 0 if private session structure have been updated successfully. + * - Returns -EINVAL if session parameters are invalid. + */ +typedef int (*security_session_stats_get_t)(void *device, + struct rte_security_session *sess, + struct rte_security_stats *stats); + +/** + * Update the mbuf with provided metadata. + * + * @param sess Security session structure + * @param mb Packet buffer + * @param mt Metadata + * + * @return + * - Returns 0 if metadata updated successfully. + * - Returns -ve value for errors. + */ +typedef int (*security_set_pkt_metadata_t)(void *device, + struct rte_security_session *sess, struct rte_mbuf *m, + void *params); + +/** + * Get security capabilities of the device. + * + * @param device crypto/eth device pointer + * + * @return + * - Returns rte_security_capability pointer on success. + * - Returns NULL on error. + */ +typedef const struct rte_security_capability *(*security_capabilities_get_t)( + void *device); + +/** Security operations function pointer table */ +struct rte_security_ops { + security_session_create_t session_create; + /**< Configure a security session. */ + security_session_update_t session_update; + /**< Update a security session. */ + security_session_stats_get_t session_stats_get; + /**< Get security session statistics. */ + security_session_destroy_t session_destroy; + /**< Clear a security sessions private data. */ + security_set_pkt_metadata_t set_pkt_metadata; + /**< Update mbuf metadata. */ + security_capabilities_get_t capabilities_get; + /**< Get security capabilities. */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SECURITY_DRIVER_H_ */ diff --git a/lib/librte_security/rte_security_version.map b/lib/librte_security/rte_security_version.map new file mode 100644 index 00000000..e12c04b2 --- /dev/null +++ b/lib/librte_security/rte_security_version.map @@ -0,0 +1,14 @@ +EXPERIMENTAL { + global: + + rte_security_attach_session; + rte_security_capabilities_get; + rte_security_capability_get; + rte_security_session_create; + rte_security_session_destroy; + rte_security_session_stats_get; + rte_security_session_update; + rte_security_set_pkt_metadata; + + local: *; +}; diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile index 8ddc8804..2e32fbf1 100644 --- a/lib/librte_table/Makefile +++ b/lib/librte_table/Makefile @@ -38,10 +38,15 @@ LIB = librte_table.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_port +LDLIBS += -lrte_lpm -lrte_hash +ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) +LDLIBS += -lrte_acl +endif EXPORT_MAP := rte_table_version.map -LIBABIVER := 2 +LIBABIVER := 3 # # all source are stored in SRCS-y diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h index 57505a6f..15f1902b 100644 --- a/lib/librte_table/rte_table_hash.h +++ b/lib/librte_table/rte_table_hash.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,8 +45,6 @@ extern "C" { * These tables use the exact match criterion to uniquely associate data to * lookup keys. * - * Use-cases: Flow classification table, Address Resolution Protocol (ARP) table - * * Hash table types: * 1. Entry add strategy on bucket full: * a. Least Recently Used (LRU): One of the existing keys in the bucket is @@ -59,7 +57,7 @@ extern "C" { * to the bucket, it also becomes the new MRU key. When a key needs to * be picked and dropped, the most likely candidate for drop, i.e. the * current LRU key, is always picked. The LRU logic requires maintaining - * specific data structures per each bucket. + * specific data structures per each bucket. Use-cases: flow cache, etc. * b. Extendible bucket (ext): The bucket is extended with space for 4 more * keys. This is done by allocating additional memory at table init time, * which is used to create a pool of free keys (the size of this pool is @@ -73,20 +71,8 @@ extern "C" { * first group of 4 keys, the search continues beyond the first group of * 4 keys, potentially until all keys in this bucket are examined. The * extendible bucket logic requires maintaining specific data structures - * per table and per each bucket. - * 2. Key signature computation: - * a. Pre-computed key signature: The key lookup operation is split between - * two CPU cores. The first CPU core (typically the CPU core performing - * packet RX) extracts the key from the input packet, computes the key - * signature and saves both the key and the key signature in the packet - * buffer as packet meta-data. The second CPU core reads both the key and - * the key signature from the packet meta-data and performs the bucket - * search step of the key lookup operation. - * b. Key signature computed on lookup (do-sig): The same CPU core reads - * the key from the packet meta-data, uses it to compute the key - * signature and also performs the bucket search step of the key lookup - * operation. - * 3. Key size: + * per table and per each bucket. Use-cases: flow table, etc. + * 2. Key size: * a. Configurable key size * b. Single key size (8-byte, 16-byte or 32-byte key size) * @@ -98,59 +84,28 @@ extern "C" { /** Hash function */ typedef uint64_t (*rte_table_hash_op_hash)( void *key, + void *key_mask, uint32_t key_size, uint64_t seed); -/** - * Hash tables with configurable key size - * - */ -/** Extendible bucket hash table parameters */ -struct rte_table_hash_ext_params { +/** Hash table parameters */ +struct rte_table_hash_params { + /** Name */ + const char *name; + /** Key size (number of bytes) */ uint32_t key_size; - /** Maximum number of keys */ - uint32_t n_keys; - - /** Number of hash table buckets. Each bucket stores up to 4 keys. */ - uint32_t n_buckets; - - /** Number of hash table bucket extensions. Each bucket extension has - space for 4 keys and each bucket can have 0, 1 or more extensions. */ - uint32_t n_buckets_ext; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed value for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - /** Byte offset within packet meta-data where the key is located */ uint32_t key_offset; -}; -/** Extendible bucket hash table operations for pre-computed key signature */ -extern struct rte_table_ops rte_table_hash_ext_ops; - -/** Extendible bucket hash table operations for key signature computed on - lookup ("do-sig") */ -extern struct rte_table_ops rte_table_hash_ext_dosig_ops; - -/** LRU hash table parameters */ -struct rte_table_hash_lru_params { - /** Key size (number of bytes) */ - uint32_t key_size; + /** Key mask */ + uint8_t *key_mask; - /** Maximum number of keys */ + /** Number of keys */ uint32_t n_keys; - /** Number of hash table buckets. Each bucket stores up to 4 keys. */ + /** Number of buckets */ uint32_t n_buckets; /** Hash function */ @@ -158,239 +113,23 @@ struct rte_table_hash_lru_params { /** Seed value for the hash function */ uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; -}; - -/** LRU hash table operations for pre-computed key signature */ -extern struct rte_table_ops rte_table_hash_lru_ops; - -/** LRU hash table operations for key signature computed on lookup ("do-sig") */ -extern struct rte_table_ops rte_table_hash_lru_dosig_ops; - -/** - * 8-byte key hash tables - * - */ -/** LRU hash table parameters */ -struct rte_table_hash_key8_lru_params { - /** Maximum number of entries (and keys) in the table */ - uint32_t n_entries; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; - - /** Bit-mask to be AND-ed to the key on lookup */ - uint8_t *key_mask; }; -/** LRU hash table operations for pre-computed key signature */ -extern struct rte_table_ops rte_table_hash_key8_lru_ops; - -/** LRU hash table operations for key signature computed on lookup ("do-sig") */ -extern struct rte_table_ops rte_table_hash_key8_lru_dosig_ops; - -/** Extendible bucket hash table parameters */ -struct rte_table_hash_key8_ext_params { - /** Maximum number of entries (and keys) in the table */ - uint32_t n_entries; - - /** Number of entries (and keys) for hash table bucket extensions. Each - bucket is extended in increments of 4 keys. */ - uint32_t n_entries_ext; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; - - /** Bit-mask to be AND-ed to the key on lookup */ - uint8_t *key_mask; -}; - -/** Extendible bucket hash table operations for pre-computed key signature */ +/** Extendible bucket hash table operations */ +extern struct rte_table_ops rte_table_hash_ext_ops; extern struct rte_table_ops rte_table_hash_key8_ext_ops; - -/** Extendible bucket hash table operations for key signature computed on - lookup ("do-sig") */ -extern struct rte_table_ops rte_table_hash_key8_ext_dosig_ops; - -/** - * 16-byte key hash tables - * - */ -/** LRU hash table parameters */ -struct rte_table_hash_key16_lru_params { - /** Maximum number of entries (and keys) in the table */ - uint32_t n_entries; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; - - /** Bit-mask to be AND-ed to the key on lookup */ - uint8_t *key_mask; -}; - -/** LRU hash table operations for pre-computed key signature */ -extern struct rte_table_ops rte_table_hash_key16_lru_ops; - -/** LRU hash table operations for key signature computed on lookup - ("do-sig") */ -extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops; - -/** Extendible bucket hash table parameters */ -struct rte_table_hash_key16_ext_params { - /** Maximum number of entries (and keys) in the table */ - uint32_t n_entries; - - /** Number of entries (and keys) for hash table bucket extensions. Each - bucket is extended in increments of 4 keys. */ - uint32_t n_entries_ext; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; - - /** Bit-mask to be AND-ed to the key on lookup */ - uint8_t *key_mask; -}; - -/** Extendible bucket operations for pre-computed key signature */ extern struct rte_table_ops rte_table_hash_key16_ext_ops; - -/** Extendible bucket hash table operations for key signature computed on - lookup ("do-sig") */ -extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops; - -/** - * 32-byte key hash tables - * - */ -/** LRU hash table parameters */ -struct rte_table_hash_key32_lru_params { - /** Maximum number of entries (and keys) in the table */ - uint32_t n_entries; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; -}; - -/** LRU hash table operations for pre-computed key signature */ -extern struct rte_table_ops rte_table_hash_key32_lru_ops; - -/** Extendible bucket hash table parameters */ -struct rte_table_hash_key32_ext_params { - /** Maximum number of entries (and keys) in the table */ - uint32_t n_entries; - - /** Number of entries (and keys) for hash table bucket extensions. Each - bucket is extended in increments of 4 keys. */ - uint32_t n_entries_ext; - - /** Hash function */ - rte_table_hash_op_hash f_hash; - - /** Seed for the hash function */ - uint64_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; -}; - -/** Extendible bucket hash table operations */ extern struct rte_table_ops rte_table_hash_key32_ext_ops; -/** Cuckoo hash table parameters */ -struct rte_table_hash_cuckoo_params { - /** Key size (number of bytes */ - uint32_t key_size; - - /** Maximum number of hash table entries */ - uint32_t n_keys; - - /** Hash function used to calculate hash */ - rte_table_hash_op_hash f_hash; - - /** Seed value or Init value used by f_hash */ - uint32_t seed; - - /** Byte offset within packet meta-data where the 4-byte key signature - is located. Valid for pre-computed key signature tables, ignored for - do-sig tables. */ - uint32_t signature_offset; - - /** Byte offset within packet meta-data where the key is located */ - uint32_t key_offset; +/** LRU hash table operations */ +extern struct rte_table_ops rte_table_hash_lru_ops; - /** Hash table name */ - const char *name; -}; +extern struct rte_table_ops rte_table_hash_key8_lru_ops; +extern struct rte_table_ops rte_table_hash_key16_lru_ops; +extern struct rte_table_ops rte_table_hash_key32_lru_ops; /** Cuckoo hash table operations */ -extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops; +extern struct rte_table_ops rte_table_hash_cuckoo_ops; #ifdef __cplusplus } diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c index da1597fa..f3845c75 100644 --- a/lib/librte_table/rte_table_hash_cuckoo.c +++ b/lib/librte_table/rte_table_hash_cuckoo.c @@ -1,34 +1,34 @@ /*- - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> #include <stdio.h> @@ -66,25 +66,28 @@ struct rte_table_hash { uint32_t n_keys; rte_table_hash_op_hash f_hash; uint32_t seed; - uint32_t signature_offset; uint32_t key_offset; - const char *name; /* cuckoo hash table object */ struct rte_hash *h_table; /* Lookup table */ - uint8_t memory[0] __rte_cache_aligned; }; + uint8_t memory[0] __rte_cache_aligned; +}; static int -check_params_create_hash_cuckoo(const struct -rte_table_hash_cuckoo_params *params) { - /* Check for valid parameters */ +check_params_create_hash_cuckoo(struct rte_table_hash_params *params) +{ if (params == NULL) { RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n"); return -EINVAL; } + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "Table name is NULL.\n"); + return -EINVAL; + } + if (params->key_size == 0) { RTE_LOG(ERR, TABLE, "Invalid key_size.\n"); return -EINVAL; @@ -100,11 +103,6 @@ rte_table_hash_cuckoo_params *params) { return -EINVAL; } - if (params->name == NULL) { - RTE_LOG(ERR, TABLE, "Table name is NULL.\n"); - return -EINVAL; - } - return 0; } @@ -113,34 +111,24 @@ rte_table_hash_cuckoo_create(void *params, int socket_id, uint32_t entry_size) { - struct rte_hash *rte_hash_handle; + struct rte_table_hash_params *p = params; + struct rte_hash *h_table; struct rte_table_hash *t; - uint32_t total_size, total_cl_size; + uint32_t total_size; /* Check input parameters */ - struct rte_table_hash_cuckoo_params *p = - (struct rte_table_hash_cuckoo_params *) params; - if (check_params_create_hash_cuckoo(params)) return NULL; /* Memory allocation */ - total_cl_size = - (sizeof(struct rte_table_hash) + - RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; - total_cl_size += (p->n_keys * entry_size + - RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; - total_size = total_cl_size * RTE_CACHE_LINE_SIZE; - - t = rte_zmalloc_socket("TABLE", - total_size, - RTE_CACHE_LINE_SIZE, - socket_id); + total_size = sizeof(struct rte_table_hash) + + RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size); + + t = rte_zmalloc_socket(p->name, total_size, RTE_CACHE_LINE_SIZE, socket_id); if (t == NULL) { RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for Cuckoo hash table\n", - __func__, - (uint32_t)sizeof(struct rte_table_hash)); + "%s: Cannot allocate %u bytes for cuckoo hash table %s\n", + __func__, total_size, p->name); return NULL; } @@ -154,13 +142,13 @@ rte_table_hash_cuckoo_create(void *params, .name = p->name }; - rte_hash_handle = rte_hash_find_existing(p->name); - if (rte_hash_handle == NULL) { - rte_hash_handle = rte_hash_create(&hash_cuckoo_params); - if (NULL == rte_hash_handle) { + h_table = rte_hash_find_existing(p->name); + if (h_table == NULL) { + h_table = rte_hash_create(&hash_cuckoo_params); + if (h_table == NULL) { RTE_LOG(ERR, TABLE, - "%s: failed to create cuckoo hash table. keysize: %u", - __func__, hash_cuckoo_params.key_len); + "%s: failed to create cuckoo hash table %s\n", + __func__, p->name); rte_free(t); return NULL; } @@ -172,26 +160,22 @@ rte_table_hash_cuckoo_create(void *params, t->n_keys = p->n_keys; t->f_hash = p->f_hash; t->seed = p->seed; - t->signature_offset = p->signature_offset; t->key_offset = p->key_offset; - t->name = p->name; - t->h_table = rte_hash_handle; + t->h_table = h_table; RTE_LOG(INFO, TABLE, - "%s: Cuckoo Hash table memory footprint is %u bytes\n", - __func__, total_size); + "%s: Cuckoo hash table %s memory footprint is %u bytes\n", + __func__, p->name, total_size); return t; } static int rte_table_hash_cuckoo_free(void *table) { - if (table == NULL) { - RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); - return -EINVAL; - } - struct rte_table_hash *t = table; + if (table == NULL) + return -EINVAL; + rte_hash_free(t->h_table); rte_free(t); @@ -200,25 +184,18 @@ rte_table_hash_cuckoo_free(void *table) { static int rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, - int *key_found, void **entry_ptr) { + int *key_found, void **entry_ptr) +{ + struct rte_table_hash *t = table; int pos = 0; - if (table == NULL) { - RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); - return -EINVAL; - } - - if (key == NULL) { - RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); - return -EINVAL; - } - - if (entry == NULL) { - RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + /* Check input parameters */ + if ((table == NULL) || + (key == NULL) || + (entry == NULL) || + (key_found == NULL) || + (entry_ptr == NULL)) return -EINVAL; - } - - struct rte_table_hash *t = table; /* Find Existing entries */ pos = rte_hash_lookup(t->h_table, key); @@ -231,17 +208,15 @@ rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, *entry_ptr = existing_entry; return 0; -} else if (pos == -ENOENT) { - /* Entry not found. Adding new entry */ + } + + if (pos == -ENOENT) { + /* Entry not found. Adding new entry */ uint8_t *new_entry; pos = rte_hash_add_key(t->h_table, key); - if (pos < 0) { - RTE_LOG(ERR, TABLE, - "%s: Entry not added, status : %u\n", - __func__, pos); + if (pos < 0) return pos; - } new_entry = &t->memory[pos * t->entry_size]; memcpy(new_entry, entry, t->entry_size); @@ -250,25 +225,22 @@ rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, *entry_ptr = new_entry; return 0; } + return pos; } static int rte_table_hash_cuckoo_entry_delete(void *table, void *key, - int *key_found, __rte_unused void *entry) { + int *key_found, void *entry) +{ + struct rte_table_hash *t = table; int pos = 0; - if (table == NULL) { - RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); - return -EINVAL; - } - - if (key == NULL) { - RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + /* Check input parameters */ + if ((table == NULL) || + (key == NULL) || + (key_found == NULL)) return -EINVAL; - } - - struct rte_table_hash *t = table; pos = rte_hash_del_key(t->h_table, key); if (pos >= 0) { @@ -279,20 +251,21 @@ rte_table_hash_cuckoo_entry_delete(void *table, void *key, memcpy(entry, entry_ptr, t->entry_size); memset(&t->memory[pos * t->entry_size], 0, t->entry_size); + return 0; } + *key_found = 0; return pos; } - static int -rte_table_hash_cuckoo_lookup_dosig(void *table, +rte_table_hash_cuckoo_lookup(void *table, struct rte_mbuf **pkts, uint64_t pkts_mask, uint64_t *lookup_hit_mask, void **entries) { - struct rte_table_hash *t = (struct rte_table_hash *)table; + struct rte_table_hash *t = table; uint64_t pkts_mask_out = 0; uint32_t i; @@ -301,20 +274,19 @@ rte_table_hash_cuckoo_lookup_dosig(void *table, RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in); if ((pkts_mask & (pkts_mask + 1)) == 0) { - const uint8_t *keys[64]; - int32_t positions[64], status; + const uint8_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + int32_t positions[RTE_PORT_IN_BURST_SIZE_MAX], status; /* Keys for bulk lookup */ for (i = 0; i < n_pkts_in; i++) keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i], - t->key_offset); + t->key_offset); /* Bulk Lookup */ status = rte_hash_lookup_bulk(t->h_table, (const void **) keys, n_pkts_in, positions); - if (status == 0) { for (i = 0; i < n_pkts_in; i++) { if (likely(positions[i] >= 0)) { @@ -326,7 +298,7 @@ rte_table_hash_cuckoo_lookup_dosig(void *table, } } } - } else { + } else for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX - __builtin_clzll(pkts_mask)); i++) { uint64_t pkt_mask = 1LLU << i; @@ -345,7 +317,6 @@ rte_table_hash_cuckoo_lookup_dosig(void *table, } } } - } *lookup_hit_mask = pkts_mask_out; RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t, @@ -370,13 +341,13 @@ rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats, return 0; } -struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = { +struct rte_table_ops rte_table_hash_cuckoo_ops = { .f_create = rte_table_hash_cuckoo_create, .f_free = rte_table_hash_cuckoo_free, .f_add = rte_table_hash_cuckoo_entry_add, .f_delete = rte_table_hash_cuckoo_entry_delete, .f_add_bulk = NULL, .f_delete_bulk = NULL, - .f_lookup = rte_table_hash_cuckoo_lookup_dosig, + .f_lookup = rte_table_hash_cuckoo_lookup, .f_stats = rte_table_hash_cuckoo_stats_read, }; diff --git a/lib/librte_table/rte_table_hash_ext.c b/lib/librte_table/rte_table_hash_ext.c index e7181026..3af1bcab 100644 --- a/lib/librte_table/rte_table_hash_ext.c +++ b/lib/librte_table/rte_table_hash_ext.c @@ -1,34 +1,34 @@ /*- - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> @@ -106,7 +106,6 @@ struct rte_table_hash { uint32_t n_buckets_ext; rte_table_hash_op_hash f_hash; uint64_t seed; - uint32_t signature_offset; uint32_t key_offset; /* Internal */ @@ -120,6 +119,7 @@ struct rte_table_hash { struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX]; /* Tables */ + uint64_t *key_mask; struct bucket *buckets; struct bucket *buckets_ext; uint8_t *key_mem; @@ -132,29 +132,53 @@ struct rte_table_hash { }; static int -check_params_create(struct rte_table_hash_ext_params *params) +keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes) { - uint32_t n_buckets_min; + uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; + uint32_t i; + + for (i = 0; i < n_bytes / sizeof(uint64_t); i++) + if (a64[i] != (b64[i] & b_mask64[i])) + return 1; + + return 0; +} + +static void +keycpy(void *dst, void *src, void *src_mask, uint32_t n_bytes) +{ + uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask; + uint32_t i; + + for (i = 0; i < n_bytes / sizeof(uint64_t); i++) + dst64[i] = src64[i] & src_mask64[i]; +} + +static int +check_params_create(struct rte_table_hash_params *params) +{ + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__); + return -EINVAL; + } /* key_size */ - if ((params->key_size == 0) || + if ((params->key_size < sizeof(uint64_t)) || (!rte_is_power_of_2(params->key_size))) { RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); return -EINVAL; } /* n_keys */ - if ((params->n_keys == 0) || - (!rte_is_power_of_2(params->n_keys))) { + if (params->n_keys == 0) { RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__); return -EINVAL; } /* n_buckets */ - n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys; if ((params->n_buckets == 0) || - (!rte_is_power_of_2(params->n_keys)) || - (params->n_buckets < n_buckets_min)) { + (!rte_is_power_of_2(params->n_buckets))) { RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); return -EINVAL; } @@ -171,15 +195,13 @@ check_params_create(struct rte_table_hash_ext_params *params) static void * rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_ext_params *p = - params; + struct rte_table_hash_params *p = params; struct rte_table_hash *t; - uint32_t total_size, table_meta_sz; - uint32_t bucket_sz, bucket_ext_sz, key_sz; - uint32_t key_stack_sz, bkt_ext_stack_sz, data_sz; - uint32_t bucket_offset, bucket_ext_offset, key_offset; - uint32_t key_stack_offset, bkt_ext_stack_offset, data_offset; - uint32_t i; + uint64_t table_meta_sz, key_mask_sz, bucket_sz, bucket_ext_sz, key_sz; + uint64_t key_stack_sz, bkt_ext_stack_sz, data_sz, total_size; + uint64_t key_mask_offset, bucket_offset, bucket_ext_offset, key_offset; + uint64_t key_stack_offset, bkt_ext_stack_offset, data_offset; + uint32_t n_buckets_ext, i; /* Check input parameters */ if ((check_params_create(p) != 0) || @@ -188,38 +210,66 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) (sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2))) return NULL; + /* + * Table dimensioning + * + * Objective: Pick the number of bucket extensions (n_buckets_ext) so that + * it is guaranteed that n_keys keys can be stored in the table at any time. + * + * The worst case scenario takes place when all the n_keys keys fall into + * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst + * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the + * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall + * into a different bucket. This case defeats the purpose of the hash table. + * It indicates unsuitable f_hash or n_keys to n_buckets ratio. + * + * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1 + */ + n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1; + /* Memory allocation */ table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash)); + key_mask_sz = RTE_CACHE_LINE_ROUNDUP(p->key_size); bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket)); bucket_ext_sz = - RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(struct bucket)); + RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(struct bucket)); key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size); key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t)); bkt_ext_stack_sz = - RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(uint32_t)); + RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t)); data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size); - total_size = table_meta_sz + bucket_sz + bucket_ext_sz + key_sz + - key_stack_sz + bkt_ext_stack_sz + data_sz; + total_size = table_meta_sz + key_mask_sz + bucket_sz + bucket_ext_sz + + key_sz + key_stack_sz + bkt_ext_stack_sz + data_sz; - t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes" + " for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } + + t = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (t == NULL) { - RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes" + " for hash table %s\n", + __func__, total_size, p->name); return NULL; } - RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is " - "%u bytes\n", __func__, p->key_size, total_size); + RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table %s memory " + "footprint is %" PRIu64 " bytes\n", + __func__, p->key_size, p->name, total_size); /* Memory initialization */ t->key_size = p->key_size; t->entry_size = entry_size; t->n_keys = p->n_keys; t->n_buckets = p->n_buckets; - t->n_buckets_ext = p->n_buckets_ext; + t->n_buckets_ext = n_buckets_ext; t->f_hash = p->f_hash; t->seed = p->seed; - t->signature_offset = p->signature_offset; t->key_offset = p->key_offset; /* Internal */ @@ -228,13 +278,15 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) t->data_size_shl = __builtin_ctzl(entry_size); /* Tables */ - bucket_offset = 0; + key_mask_offset = 0; + bucket_offset = key_mask_offset + key_mask_sz; bucket_ext_offset = bucket_offset + bucket_sz; key_offset = bucket_ext_offset + bucket_ext_sz; key_stack_offset = key_offset + key_sz; bkt_ext_stack_offset = key_stack_offset + key_stack_sz; data_offset = bkt_ext_stack_offset + bkt_ext_stack_sz; + t->key_mask = (uint64_t *) &t->memory[key_mask_offset]; t->buckets = (struct bucket *) &t->memory[bucket_offset]; t->buckets_ext = (struct bucket *) &t->memory[bucket_ext_offset]; t->key_mem = &t->memory[key_offset]; @@ -242,6 +294,12 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) t->bkt_ext_stack = (uint32_t *) &t->memory[bkt_ext_stack_offset]; t->data_mem = &t->memory[data_offset]; + /* Key mask */ + if (p->key_mask == NULL) + memset(t->key_mask, 0xFF, p->key_size); + else + memcpy(t->key_mask, p->key_mask, p->key_size); + /* Key stack */ for (i = 0; i < t->n_keys; i++) t->key_stack[i] = t->n_keys - 1 - i; @@ -277,7 +335,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry, uint64_t sig; uint32_t bkt_index, i; - sig = t->f_hash(key, t->key_size, t->seed); + sig = t->f_hash(key, t->key_mask, t->key_size, t->seed); bkt_index = sig & t->bucket_mask; bkt0 = &t->buckets[bkt_index]; sig = (sig >> 16) | 1LLU; @@ -290,7 +348,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry, uint8_t *bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; - if ((sig == bkt_sig) && (memcmp(key, bkt_key, + if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask, t->key_size) == 0)) { uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; @@ -327,7 +385,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry, bkt->sig[i] = (uint16_t) sig; bkt->key_pos[i] = bkt_key_index; - memcpy(bkt_key, key, t->key_size); + keycpy(bkt_key, key, t->key_mask, t->key_size); memcpy(data, entry, t->entry_size); *key_found = 0; @@ -358,7 +416,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry, /* Install new key into bucket */ bkt->sig[0] = (uint16_t) sig; bkt->key_pos[0] = bkt_key_index; - memcpy(bkt_key, key, t->key_size); + keycpy(bkt_key, key, t->key_mask, t->key_size); memcpy(data, entry, t->entry_size); *key_found = 0; @@ -378,7 +436,7 @@ void *entry) uint64_t sig; uint32_t bkt_index, i; - sig = t->f_hash(key, t->key_size, t->seed); + sig = t->f_hash(key, t->key_mask, t->key_size, t->seed); bkt_index = sig & t->bucket_mask; bkt0 = &t->buckets[bkt_index]; sig = (sig >> 16) | 1LLU; @@ -392,7 +450,7 @@ void *entry) uint8_t *bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; - if ((sig == bkt_sig) && (memcmp(key, bkt_key, + if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask, t->key_size) == 0)) { uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; @@ -437,8 +495,7 @@ static int rte_table_hash_ext_lookup_unoptimized( struct rte_mbuf **pkts, uint64_t pkts_mask, uint64_t *lookup_hit_mask, - void **entries, - int dosig) + void **entries) { struct rte_table_hash *t = (struct rte_table_hash *) table; uint64_t pkts_mask_out = 0; @@ -458,11 +515,7 @@ static int rte_table_hash_ext_lookup_unoptimized( pkt = pkts[pkt_index]; key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset); - if (dosig) - sig = (uint64_t) t->f_hash(key, t->key_size, t->seed); - else - sig = RTE_MBUF_METADATA_UINT32(pkt, - t->signature_offset); + sig = (uint64_t) t->f_hash(key, t->key_mask, t->key_size, t->seed); bkt_index = sig & t->bucket_mask; bkt0 = &t->buckets[bkt_index]; @@ -476,8 +529,8 @@ static int rte_table_hash_ext_lookup_unoptimized( uint8_t *bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; - if ((sig == bkt_sig) && (memcmp(key, bkt_key, - t->key_size) == 0)) { + if ((sig == bkt_sig) && (keycmp(bkt_key, key, + t->key_mask, t->key_size) == 0)) { uint8_t *data = &t->data_mem[ bkt_key_index << t->data_size_shl]; @@ -576,11 +629,12 @@ static int rte_table_hash_ext_lookup_unoptimized( { \ uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\ uint64_t *bkt_key = (uint64_t *) key; \ + uint64_t *key_mask = f->key_mask; \ \ switch (f->key_size) { \ case 8: \ { \ - uint64_t xor = pkt_key[0] ^ bkt_key[0]; \ + uint64_t xor = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ match_key = 0; \ if (xor == 0) \ match_key = 1; \ @@ -591,8 +645,8 @@ static int rte_table_hash_ext_lookup_unoptimized( { \ uint64_t xor[2], or; \ \ - xor[0] = pkt_key[0] ^ bkt_key[0]; \ - xor[1] = pkt_key[1] ^ bkt_key[1]; \ + xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \ or = xor[0] | xor[1]; \ match_key = 0; \ if (or == 0) \ @@ -604,10 +658,10 @@ static int rte_table_hash_ext_lookup_unoptimized( { \ uint64_t xor[4], or; \ \ - xor[0] = pkt_key[0] ^ bkt_key[0]; \ - xor[1] = pkt_key[1] ^ bkt_key[1]; \ - xor[2] = pkt_key[2] ^ bkt_key[2]; \ - xor[3] = pkt_key[3] ^ bkt_key[3]; \ + xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \ + xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \ + xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \ or = xor[0] | xor[1] | xor[2] | xor[3]; \ match_key = 0; \ if (or == 0) \ @@ -619,14 +673,14 @@ static int rte_table_hash_ext_lookup_unoptimized( { \ uint64_t xor[8], or; \ \ - xor[0] = pkt_key[0] ^ bkt_key[0]; \ - xor[1] = pkt_key[1] ^ bkt_key[1]; \ - xor[2] = pkt_key[2] ^ bkt_key[2]; \ - xor[3] = pkt_key[3] ^ bkt_key[3]; \ - xor[4] = pkt_key[4] ^ bkt_key[4]; \ - xor[5] = pkt_key[5] ^ bkt_key[5]; \ - xor[6] = pkt_key[6] ^ bkt_key[6]; \ - xor[7] = pkt_key[7] ^ bkt_key[7]; \ + xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \ + xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \ + xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \ + xor[4] = (pkt_key[4] & key_mask[4]) ^ bkt_key[4]; \ + xor[5] = (pkt_key[5] & key_mask[5]) ^ bkt_key[5]; \ + xor[6] = (pkt_key[6] & key_mask[6]) ^ bkt_key[6]; \ + xor[7] = (pkt_key[7] & key_mask[7]) ^ bkt_key[7]; \ or = xor[0] | xor[1] | xor[2] | xor[3] | \ xor[4] | xor[5] | xor[6] | xor[7]; \ match_key = 0; \ @@ -637,7 +691,7 @@ static int rte_table_hash_ext_lookup_unoptimized( \ default: \ match_key = 0; \ - if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \ + if (keycmp(bkt_key, pkt_key, key_mask, f->key_size) == 0) \ match_key = 1; \ } \ } @@ -685,38 +739,7 @@ static int rte_table_hash_ext_lookup_unoptimized( rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ } -#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \ -{ \ - struct grinder *g10, *g11; \ - uint64_t sig10, sig11, bkt10_index, bkt11_index; \ - struct rte_mbuf *mbuf10, *mbuf11; \ - struct bucket *bkt10, *bkt11, *buckets = t->buckets; \ - uint64_t bucket_mask = t->bucket_mask; \ - uint32_t signature_offset = t->signature_offset; \ - \ - mbuf10 = pkts[pkt10_index]; \ - sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\ - bkt10_index = sig10 & bucket_mask; \ - bkt10 = &buckets[bkt10_index]; \ - \ - mbuf11 = pkts[pkt11_index]; \ - sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\ - bkt11_index = sig11 & bucket_mask; \ - bkt11 = &buckets[bkt11_index]; \ - \ - rte_prefetch0(bkt10); \ - rte_prefetch0(bkt11); \ - \ - g10 = &g[pkt10_index]; \ - g10->sig = sig10; \ - g10->bkt = bkt10; \ - \ - g11 = &g[pkt11_index]; \ - g11->sig = sig11; \ - g11->bkt = bkt11; \ -} - -#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index) \ +#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \ { \ struct grinder *g10, *g11; \ uint64_t sig10, sig11, bkt10_index, bkt11_index; \ @@ -731,13 +754,13 @@ static int rte_table_hash_ext_lookup_unoptimized( \ mbuf10 = pkts[pkt10_index]; \ key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset); \ - sig10 = (uint64_t) f_hash(key10, key_size, seed); \ + sig10 = (uint64_t) f_hash(key10, t->key_mask, key_size, seed); \ bkt10_index = sig10 & bucket_mask; \ bkt10 = &buckets[bkt10_index]; \ \ mbuf11 = pkts[pkt11_index]; \ key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset); \ - sig11 = (uint64_t) f_hash(key11, key_size, seed); \ + sig11 = (uint64_t) f_hash(key11, t->key_mask, key_size, seed); \ bkt11_index = sig11 & bucket_mask; \ bkt11 = &buckets[bkt11_index]; \ \ @@ -874,7 +897,7 @@ static int rte_table_hash_ext_lookup( /* Cannot run the pipeline with less than 7 packets */ if (__builtin_popcountll(pkts_mask) < 7) { status = rte_table_hash_ext_lookup_unoptimized(table, pkts, - pkts_mask, lookup_hit_mask, entries, 0); + pkts_mask, lookup_hit_mask, entries); RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(*lookup_hit_mask)); return status; @@ -982,144 +1005,7 @@ static int rte_table_hash_ext_lookup( uint64_t pkts_mask_out_slow = 0; status = rte_table_hash_ext_lookup_unoptimized(table, pkts, - pkts_mask_match_many, &pkts_mask_out_slow, entries, 0); - pkts_mask_out |= pkts_mask_out_slow; - } - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); - return status; -} - -static int rte_table_hash_ext_lookup_dosig( - void *table, - struct rte_mbuf **pkts, - uint64_t pkts_mask, - uint64_t *lookup_hit_mask, - void **entries) -{ - struct rte_table_hash *t = (struct rte_table_hash *) table; - struct grinder *g = t->grinders; - uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index; - uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index; - uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0; - int status = 0; - - __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); - - /* Cannot run the pipeline with less than 7 packets */ - if (__builtin_popcountll(pkts_mask) < 7) { - status = rte_table_hash_ext_lookup_unoptimized(table, pkts, - pkts_mask, lookup_hit_mask, entries, 1); - RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - - __builtin_popcountll(*lookup_hit_mask)); - return status; - } - - /* Pipeline stage 0 */ - lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); - - /* Pipeline feed */ - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline feed */ - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); - - /* - * Pipeline run - * - */ - for ( ; pkts_mask; ) { - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, - pkt00_index, pkt01_index); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, - pkts_mask_match_many); - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, - pkts_mask_out, entries); - } - - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, - entries); - - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, - entries); - - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, - entries); - - /* Slow path */ - pkts_mask_match_many &= ~pkts_mask_out; - if (pkts_mask_match_many) { - uint64_t pkts_mask_out_slow = 0; - - status = rte_table_hash_ext_lookup_unoptimized(table, pkts, - pkts_mask_match_many, &pkts_mask_out_slow, entries, 1); + pkts_mask_match_many, &pkts_mask_out_slow, entries); pkts_mask_out |= pkts_mask_out_slow; } @@ -1152,14 +1038,3 @@ struct rte_table_ops rte_table_hash_ext_ops = { .f_lookup = rte_table_hash_ext_lookup, .f_stats = rte_table_hash_ext_stats_read, }; - -struct rte_table_ops rte_table_hash_ext_dosig_ops = { - .f_create = rte_table_hash_ext_create, - .f_free = rte_table_hash_ext_free, - .f_add = rte_table_hash_ext_entry_add, - .f_delete = rte_table_hash_ext_entry_delete, - .f_add_bulk = NULL, - .f_delete_bulk = NULL, - .f_lookup = rte_table_hash_ext_lookup_dosig, - .f_stats = rte_table_hash_ext_stats_read, -}; diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c index ce057b78..b541735c 100644 --- a/lib/librte_table/rte_table_hash_key16.c +++ b/lib/librte_table/rte_table_hash_key16.c @@ -1,34 +1,34 @@ /*- - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> #include <stdio.h> @@ -42,7 +42,9 @@ #include "rte_table_hash.h" #include "rte_lru.h" -#define RTE_TABLE_HASH_KEY_SIZE 16 +#define KEY_SIZE 16 + +#define KEYS_PER_BUCKET 4 #define RTE_BUCKET_ENTRY_VALID 0x1LLU @@ -79,11 +81,9 @@ struct rte_table_hash { /* Input parameters */ uint32_t n_buckets; - uint32_t n_entries_per_bucket; uint32_t key_size; uint32_t entry_size; uint32_t bucket_size; - uint32_t signature_offset; uint32_t key_offset; uint64_t key_mask[2]; rte_table_hash_op_hash f_hash; @@ -99,17 +99,55 @@ struct rte_table_hash { }; static int -check_params_create_lru(struct rte_table_hash_key16_lru_params *params) { - /* n_entries */ - if (params->n_entries == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); +keycmp(void *a, void *b, void *b_mask) +{ + uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; + + return (a64[0] != (b64[0] & b_mask64[0])) || + (a64[1] != (b64[1] & b_mask64[1])); +} + +static void +keycpy(void *dst, void *src, void *src_mask) +{ + uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask; + + dst64[0] = src64[0] & src_mask64[0]; + dst64[1] = src64[1] & src_mask64[1]; +} + +static int +check_params_create(struct rte_table_hash_params *params) +{ + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__); + return -EINVAL; + } + + /* key_size */ + if (params->key_size != KEY_SIZE) { + RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); + return -EINVAL; + } + + /* n_keys */ + if (params->n_keys == 0) { + RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__); + return -EINVAL; + } + + /* n_buckets */ + if ((params->n_buckets == 0) || + (!rte_is_power_of_2(params->n_buckets))) { + RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); return -EINVAL; } /* f_hash */ if (params->f_hash == NULL) { - RTE_LOG(ERR, TABLE, - "%s: f_hash function pointer is NULL\n", __func__); + RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", + __func__); return -EINVAL; } @@ -121,46 +159,67 @@ rte_table_hash_create_key16_lru(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_key16_lru_params *p = - (struct rte_table_hash_key16_lru_params *) params; + struct rte_table_hash_params *p = params; struct rte_table_hash *f; - uint32_t n_buckets, n_entries_per_bucket, - key_size, bucket_size_cl, total_size, i; + uint64_t bucket_size, total_size; + uint32_t n_buckets, i; /* Check input parameters */ - if ((check_params_create_lru(p) != 0) || + if ((check_params_create(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; - n_entries_per_bucket = 4; - key_size = 16; + + /* + * Table dimensioning + * + * Objective: Pick the number of buckets (n_buckets) so that there a chance + * to store n_keys keys in the table. + * + * Note: Since the buckets do not get extended, it is not possible to + * guarantee that n_keys keys can be stored in the table at any time. In the + * worst case scenario when all the n_keys fall into the same bucket, only + * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case + * defeats the purpose of the hash table. It indicates unsuitable f_hash or + * n_keys to n_buckets ratio. + * + * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET + */ + n_buckets = rte_align32pow2( + (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET); + n_buckets = RTE_MAX(n_buckets, p->n_buckets); /* Memory allocation */ - n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / - n_entries_per_bucket); - bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket - * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; - total_size = sizeof(struct rte_table_hash) + n_buckets * - bucket_size_cl * RTE_CACHE_LINE_SIZE; - - f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_16) + + KEYS_PER_BUCKET * entry_size); + total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size; + + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } + + f = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (f == NULL) { - RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); return NULL; } - RTE_LOG(INFO, TABLE, - "%s: Hash table memory footprint is %u bytes\n", - __func__, total_size); + RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint " + "is %" PRIu64 " bytes\n", + __func__, p->name, total_size); /* Memory initialization */ f->n_buckets = n_buckets; - f->n_entries_per_bucket = n_entries_per_bucket; - f->key_size = key_size; + f->key_size = KEY_SIZE; f->entry_size = entry_size; - f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; - f->signature_offset = p->signature_offset; + f->bucket_size = bucket_size; f->key_offset = p->key_offset; f->f_hash = p->f_hash; f->seed = p->seed; @@ -212,19 +271,19 @@ rte_table_hash_entry_add_key16_lru( uint64_t signature, pos; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket = (struct rte_bucket_4_16 *) - &f->memory[bucket_index * f->bucket_size]; + &f->memory[bucket_index * f->bucket_size]; signature |= RTE_BUCKET_ENTRY_VALID; /* Key is present in the bucket */ for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; memcpy(bucket_data, entry, f->entry_size); @@ -238,13 +297,13 @@ rte_table_hash_entry_add_key16_lru( /* Key is not present in the bucket */ for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if (bucket_signature == 0) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = signature; - memcpy(bucket_key, key, f->key_size); + keycpy(bucket_key, key, f->key_mask); memcpy(bucket_data, entry, f->entry_size); lru_update(bucket, i); *key_found = 0; @@ -257,7 +316,7 @@ rte_table_hash_entry_add_key16_lru( /* Bucket full: replace LRU entry */ pos = lru_pos(bucket); bucket->signature[pos] = signature; - memcpy(bucket->key[pos], key, f->key_size); + keycpy(&bucket->key[pos], key, f->key_mask); memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size); lru_update(bucket, pos); *key_found = 0; @@ -278,19 +337,19 @@ rte_table_hash_entry_delete_key16_lru( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket = (struct rte_bucket_4_16 *) - &f->memory[bucket_index * f->bucket_size]; + &f->memory[bucket_index * f->bucket_size]; signature |= RTE_BUCKET_ENTRY_VALID; /* Key is present in the bucket */ for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = 0; @@ -306,81 +365,71 @@ rte_table_hash_entry_delete_key16_lru( return 0; } -static int -check_params_create_ext(struct rte_table_hash_key16_ext_params *params) { - /* n_entries */ - if (params->n_entries == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); - return -EINVAL; - } - - /* n_entries_ext */ - if (params->n_entries_ext == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__); - return -EINVAL; - } - - /* f_hash */ - if (params->f_hash == NULL) { - RTE_LOG(ERR, TABLE, - "%s: f_hash function pointer is NULL\n", __func__); - return -EINVAL; - } - - return 0; -} - static void * rte_table_hash_create_key16_ext(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_key16_ext_params *p = - (struct rte_table_hash_key16_ext_params *) params; + struct rte_table_hash_params *p = params; struct rte_table_hash *f; - uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size, - bucket_size_cl, stack_size_cl, total_size, i; + uint64_t bucket_size, stack_size, total_size; + uint32_t n_buckets_ext, i; /* Check input parameters */ - if ((check_params_create_ext(p) != 0) || + if ((check_params_create(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; - n_entries_per_bucket = 4; - key_size = 16; + /* + * Table dimensioning + * + * Objective: Pick the number of bucket extensions (n_buckets_ext) so that + * it is guaranteed that n_keys keys can be stored in the table at any time. + * + * The worst case scenario takes place when all the n_keys keys fall into + * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst + * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the + * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall + * into a different bucket. This case defeats the purpose of the hash table. + * It indicates unsuitable f_hash or n_keys to n_buckets ratio. + * + * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1 + */ + n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1; /* Memory allocation */ - n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / - n_entries_per_bucket); - n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) / - n_entries_per_bucket; - bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket - * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; - stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1) - / RTE_CACHE_LINE_SIZE; + bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_16) + + KEYS_PER_BUCKET * entry_size); + stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t)); total_size = sizeof(struct rte_table_hash) + - ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) * - RTE_CACHE_LINE_SIZE; + (p->n_buckets + n_buckets_ext) * bucket_size + stack_size; + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } - f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + f = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (f == NULL) { - RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); return NULL; } - RTE_LOG(INFO, TABLE, - "%s: Hash table memory footprint is %u bytes\n", - __func__, total_size); + RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint " + "is %" PRIu64 " bytes\n", + __func__, p->name, total_size); /* Memory initialization */ - f->n_buckets = n_buckets; - f->n_entries_per_bucket = n_entries_per_bucket; - f->key_size = key_size; + f->n_buckets = p->n_buckets; + f->key_size = KEY_SIZE; f->entry_size = entry_size; - f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; - f->signature_offset = p->signature_offset; + f->bucket_size = bucket_size; f->key_offset = p->key_offset; f->f_hash = p->f_hash; f->seed = p->seed; @@ -388,10 +437,7 @@ rte_table_hash_create_key16_ext(void *params, f->n_buckets_ext = n_buckets_ext; f->stack_pos = n_buckets_ext; f->stack = (uint32_t *) - &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size]; - - for (i = 0; i < n_buckets_ext; i++) - f->stack[i] = i; + &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size]; if (p->key_mask != NULL) { f->key_mask[0] = (((uint64_t *)p->key_mask)[0]); @@ -401,6 +447,9 @@ rte_table_hash_create_key16_ext(void *params, f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU; } + for (i = 0; i < n_buckets_ext; i++) + f->stack[i] = i; + return f; } @@ -432,20 +481,20 @@ rte_table_hash_entry_add_key16_ext( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket0 = (struct rte_bucket_4_16 *) - &f->memory[bucket_index * f->bucket_size]; + &f->memory[bucket_index * f->bucket_size]; signature |= RTE_BUCKET_ENTRY_VALID; /* Key is present in the bucket */ for (bucket = bucket0; bucket != NULL; bucket = bucket->next) for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; @@ -458,17 +507,17 @@ rte_table_hash_entry_add_key16_ext( /* Key is not present in the bucket */ for (bucket_prev = NULL, bucket = bucket0; bucket != NULL; - bucket_prev = bucket, bucket = bucket->next) + bucket_prev = bucket, bucket = bucket->next) for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if (bucket_signature == 0) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = signature; - memcpy(bucket_key, key, f->key_size); + keycpy(bucket_key, key, f->key_mask); memcpy(bucket_data, entry, f->entry_size); *key_found = 0; *entry_ptr = (void *) bucket_data; @@ -487,7 +536,7 @@ rte_table_hash_entry_add_key16_ext( bucket_prev->next_valid = 1; bucket->signature[0] = signature; - memcpy(bucket->key[0], key, f->key_size); + keycpy(&bucket->key[0], key, f->key_mask); memcpy(&bucket->data[0], entry, f->entry_size); *key_found = 0; *entry_ptr = (void *) &bucket->data[0]; @@ -509,7 +558,7 @@ rte_table_hash_entry_delete_key16_ext( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket0 = (struct rte_bucket_4_16 *) &f->memory[bucket_index * f->bucket_size]; @@ -520,18 +569,17 @@ rte_table_hash_entry_delete_key16_ext( bucket_prev = bucket, bucket = bucket->next) for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = 0; *key_found = 1; if (entry) - memcpy(entry, bucket_data, - f->entry_size); + memcpy(entry, bucket_data, f->entry_size); if ((bucket->signature[0] == 0) && (bucket->signature[1] == 0) && @@ -558,26 +606,28 @@ rte_table_hash_entry_delete_key16_ext( return 0; } -#define lookup_key16_cmp(key_in, bucket, pos) \ +#define lookup_key16_cmp(key_in, bucket, pos, f) \ { \ - uint64_t xor[4][2], or[4], signature[4]; \ + uint64_t xor[4][2], or[4], signature[4], k[2]; \ \ + k[0] = key_in[0] & f->key_mask[0]; \ + k[1] = key_in[1] & f->key_mask[1]; \ signature[0] = (~bucket->signature[0]) & 1; \ signature[1] = (~bucket->signature[1]) & 1; \ signature[2] = (~bucket->signature[2]) & 1; \ signature[3] = (~bucket->signature[3]) & 1; \ \ - xor[0][0] = key_in[0] ^ bucket->key[0][0]; \ - xor[0][1] = key_in[1] ^ bucket->key[0][1]; \ + xor[0][0] = k[0] ^ bucket->key[0][0]; \ + xor[0][1] = k[1] ^ bucket->key[0][1]; \ \ - xor[1][0] = key_in[0] ^ bucket->key[1][0]; \ - xor[1][1] = key_in[1] ^ bucket->key[1][1]; \ + xor[1][0] = k[0] ^ bucket->key[1][0]; \ + xor[1][1] = k[1] ^ bucket->key[1][1]; \ \ - xor[2][0] = key_in[0] ^ bucket->key[2][0]; \ - xor[2][1] = key_in[1] ^ bucket->key[2][1]; \ + xor[2][0] = k[0] ^ bucket->key[2][0]; \ + xor[2][1] = k[1] ^ bucket->key[2][1]; \ \ - xor[3][0] = key_in[0] ^ bucket->key[3][0]; \ - xor[3][1] = key_in[1] ^ bucket->key[3][1]; \ + xor[3][0] = k[0] ^ bucket->key[3][0]; \ + xor[3][1] = k[1] ^ bucket->key[3][1]; \ \ or[0] = xor[0][0] | xor[0][1] | signature[0]; \ or[1] = xor[1][0] | xor[1][1] | signature[1]; \ @@ -610,30 +660,12 @@ rte_table_hash_entry_delete_key16_ext( #define lookup1_stage1(mbuf1, bucket1, f) \ { \ - uint64_t signature; \ - uint32_t bucket_index; \ - \ - signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\ - bucket_index = signature & (f->n_buckets - 1); \ - bucket1 = (struct rte_bucket_4_16 *) \ - &f->memory[bucket_index * f->bucket_size]; \ - rte_prefetch0(bucket1); \ - rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\ -} - -#define lookup1_stage1_dosig(mbuf1, bucket1, f) \ -{ \ uint64_t *key; \ uint64_t signature = 0; \ uint32_t bucket_index; \ - uint64_t hash_key_buffer[2]; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\ - \ - hash_key_buffer[0] = key[0] & f->key_mask[0]; \ - hash_key_buffer[1] = key[1] & f->key_mask[1]; \ - signature = f->f_hash(hash_key_buffer, \ - RTE_TABLE_HASH_KEY_SIZE, f->seed); \ + signature = f->f_hash(key, f->key_mask, KEY_SIZE, f->seed); \ \ bucket_index = signature & (f->n_buckets - 1); \ bucket1 = (struct rte_bucket_4_16 *) \ @@ -648,14 +680,10 @@ rte_table_hash_entry_delete_key16_ext( void *a; \ uint64_t pkt_mask; \ uint64_t *key; \ - uint64_t hash_key_buffer[2]; \ uint32_t pos; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ - hash_key_buffer[0] = key[0] & f->key_mask[0]; \ - hash_key_buffer[1] = key[1] & f->key_mask[1]; \ - \ - lookup_key16_cmp(hash_key_buffer, bucket2, pos); \ + lookup_key16_cmp(key, bucket2, pos, f); \ \ pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ pkts_mask_out |= pkt_mask; \ @@ -673,14 +701,10 @@ rte_table_hash_entry_delete_key16_ext( void *a; \ uint64_t pkt_mask, bucket_mask; \ uint64_t *key; \ - uint64_t hash_key_buffer[2]; \ uint32_t pos; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ - hash_key_buffer[0] = key[0] & f->key_mask[0]; \ - hash_key_buffer[1] = key[1] & f->key_mask[1]; \ - \ - lookup_key16_cmp(hash_key_buffer, bucket2, pos); \ + lookup_key16_cmp(key, bucket2, pos, f); \ \ pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ pkts_mask_out |= pkt_mask; \ @@ -703,15 +727,11 @@ rte_table_hash_entry_delete_key16_ext( void *a; \ uint64_t pkt_mask, bucket_mask; \ uint64_t *key; \ - uint64_t hash_key_buffer[2]; \ uint32_t pos; \ \ bucket = buckets[pkt_index]; \ key = keys[pkt_index]; \ - hash_key_buffer[0] = key[0] & f->key_mask[0]; \ - hash_key_buffer[1] = key[1] & f->key_mask[1]; \ - \ - lookup_key16_cmp(hash_key_buffer, bucket, pos); \ + lookup_key16_cmp(key, bucket, pos, f); \ \ pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\ pkts_mask_out |= pkt_mask; \ @@ -775,36 +795,12 @@ rte_table_hash_entry_delete_key16_ext( #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \ { \ - uint64_t signature10, signature11; \ - uint32_t bucket10_index, bucket11_index; \ - \ - signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\ - bucket10_index = signature10 & (f->n_buckets - 1); \ - bucket10 = (struct rte_bucket_4_16 *) \ - &f->memory[bucket10_index * f->bucket_size]; \ - rte_prefetch0(bucket10); \ - rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ - \ - signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\ - bucket11_index = signature11 & (f->n_buckets - 1); \ - bucket11 = (struct rte_bucket_4_16 *) \ - &f->memory[bucket11_index * f->bucket_size]; \ - rte_prefetch0(bucket11); \ - rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ -} - -#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f) \ -{ \ uint64_t *key10, *key11; \ - uint64_t hash_offset_buffer[2]; \ uint64_t signature10, signature11; \ uint32_t bucket10_index, bucket11_index; \ \ key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\ - hash_offset_buffer[0] = key10[0] & f->key_mask[0]; \ - hash_offset_buffer[1] = key10[1] & f->key_mask[1]; \ - signature10 = f->f_hash(hash_offset_buffer, \ - RTE_TABLE_HASH_KEY_SIZE, f->seed);\ + signature10 = f->f_hash(key10, f->key_mask, KEY_SIZE, f->seed);\ bucket10_index = signature10 & (f->n_buckets - 1); \ bucket10 = (struct rte_bucket_4_16 *) \ &f->memory[bucket10_index * f->bucket_size]; \ @@ -812,10 +808,7 @@ rte_table_hash_entry_delete_key16_ext( rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ \ key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\ - hash_offset_buffer[0] = key11[0] & f->key_mask[0]; \ - hash_offset_buffer[1] = key11[1] & f->key_mask[1]; \ - signature11 = f->f_hash(hash_offset_buffer, \ - RTE_TABLE_HASH_KEY_SIZE, f->seed);\ + signature11 = f->f_hash(key11, f->key_mask, KEY_SIZE, f->seed);\ bucket11_index = signature11 & (f->n_buckets - 1); \ bucket11 = (struct rte_bucket_4_16 *) \ &f->memory[bucket11_index * f->bucket_size]; \ @@ -829,19 +822,13 @@ rte_table_hash_entry_delete_key16_ext( void *a20, *a21; \ uint64_t pkt20_mask, pkt21_mask; \ uint64_t *key20, *key21; \ - uint64_t hash_key_buffer20[2]; \ - uint64_t hash_key_buffer21[2]; \ uint32_t pos20, pos21; \ \ key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ - hash_key_buffer20[0] = key20[0] & f->key_mask[0]; \ - hash_key_buffer20[1] = key20[1] & f->key_mask[1]; \ - hash_key_buffer21[0] = key21[0] & f->key_mask[0]; \ - hash_key_buffer21[1] = key21[1] & f->key_mask[1]; \ \ - lookup_key16_cmp(hash_key_buffer20, bucket20, pos20); \ - lookup_key16_cmp(hash_key_buffer21, bucket21, pos21); \ + lookup_key16_cmp(key20, bucket20, pos20, f); \ + lookup_key16_cmp(key21, bucket21, pos21, f); \ \ pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ @@ -864,19 +851,13 @@ rte_table_hash_entry_delete_key16_ext( void *a20, *a21; \ uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\ uint64_t *key20, *key21; \ - uint64_t hash_key_buffer20[2]; \ - uint64_t hash_key_buffer21[2]; \ uint32_t pos20, pos21; \ \ key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ - hash_key_buffer20[0] = key20[0] & f->key_mask[0]; \ - hash_key_buffer20[1] = key20[1] & f->key_mask[1]; \ - hash_key_buffer21[0] = key21[0] & f->key_mask[0]; \ - hash_key_buffer21[1] = key21[1] & f->key_mask[1]; \ \ - lookup_key16_cmp(hash_key_buffer20, bucket20, pos20); \ - lookup_key16_cmp(hash_key_buffer21, bucket21, pos21); \ + lookup_key16_cmp(key20, bucket20, pos20, f); \ + lookup_key16_cmp(key21, bucket21, pos21, f); \ \ pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ @@ -916,6 +897,7 @@ rte_table_hash_lookup_key16_lru( uint64_t pkts_mask_out = 0; __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); /* Cannot run the pipeline with less than 5 packets */ @@ -932,8 +914,8 @@ rte_table_hash_lookup_key16_lru( } *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, - n_pkts_in - __builtin_popcountll(pkts_mask_out)); + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); return 0; } @@ -1026,136 +1008,7 @@ rte_table_hash_lookup_key16_lru( RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); return 0; -} /* rte_table_hash_lookup_key16_lru() */ - -static int -rte_table_hash_lookup_key16_lru_dosig( - void *table, - struct rte_mbuf **pkts, - uint64_t pkts_mask, - uint64_t *lookup_hit_mask, - void **entries) -{ - struct rte_table_hash *f = (struct rte_table_hash *) table; - struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; - struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; - uint32_t pkt00_index, pkt01_index, pkt10_index; - uint32_t pkt11_index, pkt20_index, pkt21_index; - uint64_t pkts_mask_out = 0; - - __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - - RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); - - /* Cannot run the pipeline with less than 5 packets */ - if (__builtin_popcountll(pkts_mask) < 5) { - for ( ; pkts_mask; ) { - struct rte_bucket_4_16 *bucket; - struct rte_mbuf *mbuf; - uint32_t pkt_index; - - lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); - lookup1_stage1_dosig(mbuf, bucket, f); - lookup1_stage2_lru(pkt_index, mbuf, bucket, - pkts_mask_out, entries, f); - } - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - - __builtin_popcountll(pkts_mask_out)); - return 0; - } - - /* - * Pipeline fill - * - */ - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline feed */ - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* - * Pipeline run - * - */ - for ( ; pkts_mask; ) { - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, - mbuf00, mbuf01, pkts, pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, f); - } - - /* - * Pipeline flush - * - */ - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, f); - - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - - /* Pipeline stage 2 */ - lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, f); - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - - __builtin_popcountll(pkts_mask_out)); - return 0; -} /* rte_table_hash_lookup_key16_lru_dosig() */ +} /* lookup LRU */ static int rte_table_hash_lookup_key16_ext( @@ -1175,6 +1028,7 @@ rte_table_hash_lookup_key16_ext( uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); /* Cannot run the pipeline with less than 5 packets */ @@ -1306,159 +1160,7 @@ grind_next_buckets: RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); return 0; -} /* rte_table_hash_lookup_key16_ext() */ - -static int -rte_table_hash_lookup_key16_ext_dosig( - void *table, - struct rte_mbuf **pkts, - uint64_t pkts_mask, - uint64_t *lookup_hit_mask, - void **entries) -{ - struct rte_table_hash *f = (struct rte_table_hash *) table; - struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; - struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; - uint32_t pkt00_index, pkt01_index, pkt10_index; - uint32_t pkt11_index, pkt20_index, pkt21_index; - uint64_t pkts_mask_out = 0, buckets_mask = 0; - struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; - uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; - - __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - - RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); - - /* Cannot run the pipeline with less than 5 packets */ - if (__builtin_popcountll(pkts_mask) < 5) { - for ( ; pkts_mask; ) { - struct rte_bucket_4_16 *bucket; - struct rte_mbuf *mbuf; - uint32_t pkt_index; - - lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); - lookup1_stage1_dosig(mbuf, bucket, f); - lookup1_stage2_ext(pkt_index, mbuf, bucket, - pkts_mask_out, entries, buckets_mask, - buckets, keys, f); - } - - goto grind_next_buckets; - } - - /* - * Pipeline fill - * - */ - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline feed */ - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* - * Pipeline run - * - */ - for ( ; pkts_mask; ) { - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, - mbuf00, mbuf01, pkts, pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, - buckets_mask, buckets, keys, f); - } - - /* - * Pipeline flush - * - */ - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, - buckets_mask, buckets, keys, f); - - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - - /* Pipeline stage 2 */ - lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, - buckets_mask, buckets, keys, f); - -grind_next_buckets: - /* Grind next buckets */ - for ( ; buckets_mask; ) { - uint64_t buckets_mask_next = 0; - - for ( ; buckets_mask; ) { - uint64_t pkt_mask; - uint32_t pkt_index; - - pkt_index = __builtin_ctzll(buckets_mask); - pkt_mask = 1LLU << pkt_index; - buckets_mask &= ~pkt_mask; - - lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, - entries, buckets_mask_next, f); - } - - buckets_mask = buckets_mask_next; - } - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - - __builtin_popcountll(pkts_mask_out)); - return 0; -} /* rte_table_hash_lookup_key16_ext_dosig() */ +} /* lookup EXT */ static int rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear) @@ -1485,15 +1187,6 @@ struct rte_table_ops rte_table_hash_key16_lru_ops = { .f_stats = rte_table_hash_key16_stats_read, }; -struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = { - .f_create = rte_table_hash_create_key16_lru, - .f_free = rte_table_hash_free_key16_lru, - .f_add = rte_table_hash_entry_add_key16_lru, - .f_delete = rte_table_hash_entry_delete_key16_lru, - .f_lookup = rte_table_hash_lookup_key16_lru_dosig, - .f_stats = rte_table_hash_key16_stats_read, -}; - struct rte_table_ops rte_table_hash_key16_ext_ops = { .f_create = rte_table_hash_create_key16_ext, .f_free = rte_table_hash_free_key16_ext, @@ -1504,12 +1197,3 @@ struct rte_table_ops rte_table_hash_key16_ext_ops = { .f_lookup = rte_table_hash_lookup_key16_ext, .f_stats = rte_table_hash_key16_stats_read, }; - -struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = { - .f_create = rte_table_hash_create_key16_ext, - .f_free = rte_table_hash_free_key16_ext, - .f_add = rte_table_hash_entry_add_key16_ext, - .f_delete = rte_table_hash_entry_delete_key16_ext, - .f_lookup = rte_table_hash_lookup_key16_ext_dosig, - .f_stats = rte_table_hash_key16_stats_read, -}; diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c index 31fe6fda..d4364d62 100644 --- a/lib/librte_table/rte_table_hash_key32.c +++ b/lib/librte_table/rte_table_hash_key32.c @@ -1,34 +1,34 @@ /*- - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> #include <stdio.h> @@ -42,7 +42,9 @@ #include "rte_table_hash.h" #include "rte_lru.h" -#define RTE_TABLE_HASH_KEY_SIZE 32 +#define KEY_SIZE 32 + +#define KEYS_PER_BUCKET 4 #define RTE_BUCKET_ENTRY_VALID 0x1LLU @@ -79,12 +81,11 @@ struct rte_table_hash { /* Input parameters */ uint32_t n_buckets; - uint32_t n_entries_per_bucket; uint32_t key_size; uint32_t entry_size; uint32_t bucket_size; - uint32_t signature_offset; uint32_t key_offset; + uint64_t key_mask[4]; rte_table_hash_op_hash f_hash; uint64_t seed; @@ -98,10 +99,52 @@ struct rte_table_hash { }; static int -check_params_create_lru(struct rte_table_hash_key32_lru_params *params) { - /* n_entries */ - if (params->n_entries == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); +keycmp(void *a, void *b, void *b_mask) +{ + uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; + + return (a64[0] != (b64[0] & b_mask64[0])) || + (a64[1] != (b64[1] & b_mask64[1])) || + (a64[2] != (b64[2] & b_mask64[2])) || + (a64[3] != (b64[3] & b_mask64[3])); +} + +static void +keycpy(void *dst, void *src, void *src_mask) +{ + uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask; + + dst64[0] = src64[0] & src_mask64[0]; + dst64[1] = src64[1] & src_mask64[1]; + dst64[2] = src64[2] & src_mask64[2]; + dst64[3] = src64[3] & src_mask64[3]; +} + +static int +check_params_create(struct rte_table_hash_params *params) +{ + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__); + return -EINVAL; + } + + /* key_size */ + if (params->key_size != KEY_SIZE) { + RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); + return -EINVAL; + } + + /* n_keys */ + if (params->n_keys == 0) { + RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__); + return -EINVAL; + } + + /* n_buckets */ + if ((params->n_buckets == 0) || + (!rte_is_power_of_2(params->n_buckets))) { + RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); return -EINVAL; } @@ -120,51 +163,83 @@ rte_table_hash_create_key32_lru(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_key32_lru_params *p = - (struct rte_table_hash_key32_lru_params *) params; + struct rte_table_hash_params *p = params; struct rte_table_hash *f; - uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl; - uint32_t total_size, i; + uint64_t bucket_size, total_size; + uint32_t n_buckets, i; /* Check input parameters */ - if ((check_params_create_lru(p) != 0) || + if ((check_params_create(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % 64) != 0)) { + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) return NULL; - } - n_entries_per_bucket = 4; - key_size = 32; + + /* + * Table dimensioning + * + * Objective: Pick the number of buckets (n_buckets) so that there a chance + * to store n_keys keys in the table. + * + * Note: Since the buckets do not get extended, it is not possible to + * guarantee that n_keys keys can be stored in the table at any time. In the + * worst case scenario when all the n_keys fall into the same bucket, only + * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case + * defeats the purpose of the hash table. It indicates unsuitable f_hash or + * n_keys to n_buckets ratio. + * + * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET + */ + n_buckets = rte_align32pow2( + (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET); + n_buckets = RTE_MAX(n_buckets, p->n_buckets); /* Memory allocation */ - n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / - n_entries_per_bucket); - bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket - * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; - total_size = sizeof(struct rte_table_hash) + n_buckets * - bucket_size_cl * RTE_CACHE_LINE_SIZE; - - f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_32) + + KEYS_PER_BUCKET * entry_size); + total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size; + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } + + f = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (f == NULL) { - RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); return NULL; } RTE_LOG(INFO, TABLE, - "%s: Hash table memory footprint is %u bytes\n", __func__, - total_size); + "%s: Hash table %s memory footprint " + "is %" PRIu64 " bytes\n", + __func__, p->name, total_size); /* Memory initialization */ f->n_buckets = n_buckets; - f->n_entries_per_bucket = n_entries_per_bucket; - f->key_size = key_size; + f->key_size = KEY_SIZE; f->entry_size = entry_size; - f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; - f->signature_offset = p->signature_offset; + f->bucket_size = bucket_size; f->key_offset = p->key_offset; f->f_hash = p->f_hash; f->seed = p->seed; + if (p->key_mask != NULL) { + f->key_mask[0] = ((uint64_t *)p->key_mask)[0]; + f->key_mask[1] = ((uint64_t *)p->key_mask)[1]; + f->key_mask[2] = ((uint64_t *)p->key_mask)[2]; + f->key_mask[3] = ((uint64_t *)p->key_mask)[3]; + } else { + f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[2] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[3] = 0xFFFFFFFFFFFFFFFFLLU; + } + for (i = 0; i < n_buckets; i++) { struct rte_bucket_4_32 *bucket; @@ -204,7 +279,7 @@ rte_table_hash_entry_add_key32_lru( uint64_t signature, pos; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket = (struct rte_bucket_4_32 *) &f->memory[bucket_index * f->bucket_size]; @@ -213,10 +288,10 @@ rte_table_hash_entry_add_key32_lru( /* Key is present in the bucket */ for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; memcpy(bucket_data, entry, f->entry_size); @@ -230,13 +305,13 @@ rte_table_hash_entry_add_key32_lru( /* Key is not present in the bucket */ for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if (bucket_signature == 0) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = signature; - memcpy(bucket_key, key, f->key_size); + keycpy(bucket_key, key, f->key_mask); memcpy(bucket_data, entry, f->entry_size); lru_update(bucket, i); *key_found = 0; @@ -249,10 +324,10 @@ rte_table_hash_entry_add_key32_lru( /* Bucket full: replace LRU entry */ pos = lru_pos(bucket); bucket->signature[pos] = signature; - memcpy(bucket->key[pos], key, f->key_size); + keycpy(&bucket->key[pos], key, f->key_mask); memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size); lru_update(bucket, pos); - *key_found = 0; + *key_found = 0; *entry_ptr = (void *) &bucket->data[pos * f->entry_size]; return 0; @@ -270,7 +345,7 @@ rte_table_hash_entry_delete_key32_lru( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket = (struct rte_bucket_4_32 *) &f->memory[bucket_index * f->bucket_size]; @@ -279,10 +354,10 @@ rte_table_hash_entry_delete_key32_lru( /* Key is present in the bucket */ for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = 0; @@ -299,81 +374,72 @@ rte_table_hash_entry_delete_key32_lru( return 0; } -static int -check_params_create_ext(struct rte_table_hash_key32_ext_params *params) { - /* n_entries */ - if (params->n_entries == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); - return -EINVAL; - } - - /* n_entries_ext */ - if (params->n_entries_ext == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__); - return -EINVAL; - } - - /* f_hash */ - if (params->f_hash == NULL) { - RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", - __func__); - return -EINVAL; - } - - return 0; -} - static void * rte_table_hash_create_key32_ext(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_key32_ext_params *p = - params; + struct rte_table_hash_params *p = params; struct rte_table_hash *f; - uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket; - uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i; + uint64_t bucket_size, stack_size, total_size; + uint32_t n_buckets_ext, i; /* Check input parameters */ - if ((check_params_create_ext(p) != 0) || + if ((check_params_create(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || ((sizeof(struct rte_bucket_4_32) % 64) != 0)) return NULL; - n_entries_per_bucket = 4; - key_size = 32; + /* + * Table dimensioning + * + * Objective: Pick the number of bucket extensions (n_buckets_ext) so that + * it is guaranteed that n_keys keys can be stored in the table at any time. + * + * The worst case scenario takes place when all the n_keys keys fall into + * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst + * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the + * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall + * into a different bucket. This case defeats the purpose of the hash table. + * It indicates unsuitable f_hash or n_keys to n_buckets ratio. + * + * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1 + */ + n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1; /* Memory allocation */ - n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / - n_entries_per_bucket); - n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) / - n_entries_per_bucket; - bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket - * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; - stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1) - / RTE_CACHE_LINE_SIZE; + bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_32) + + KEYS_PER_BUCKET * entry_size); + stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t)); total_size = sizeof(struct rte_table_hash) + - ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) * - RTE_CACHE_LINE_SIZE; + (p->n_buckets + n_buckets_ext) * bucket_size + stack_size; + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } - f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + f = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (f == NULL) { - RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); return NULL; } RTE_LOG(INFO, TABLE, - "%s: Hash table memory footprint is %u bytes\n", __func__, - total_size); + "%s: Hash table %s memory footprint " + "is %" PRIu64" bytes\n", + __func__, p->name, total_size); /* Memory initialization */ - f->n_buckets = n_buckets; - f->n_entries_per_bucket = n_entries_per_bucket; - f->key_size = key_size; + f->n_buckets = p->n_buckets; + f->key_size = KEY_SIZE; f->entry_size = entry_size; - f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; - f->signature_offset = p->signature_offset; + f->bucket_size = bucket_size; f->key_offset = p->key_offset; f->f_hash = p->f_hash; f->seed = p->seed; @@ -381,7 +447,19 @@ rte_table_hash_create_key32_ext(void *params, f->n_buckets_ext = n_buckets_ext; f->stack_pos = n_buckets_ext; f->stack = (uint32_t *) - &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size]; + &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size]; + + if (p->key_mask != NULL) { + f->key_mask[0] = (((uint64_t *)p->key_mask)[0]); + f->key_mask[1] = (((uint64_t *)p->key_mask)[1]); + f->key_mask[2] = (((uint64_t *)p->key_mask)[2]); + f->key_mask[3] = (((uint64_t *)p->key_mask)[3]); + } else { + f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[2] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[3] = 0xFFFFFFFFFFFFFFFFLLU; + } for (i = 0; i < n_buckets_ext; i++) f->stack[i] = i; @@ -417,7 +495,7 @@ rte_table_hash_entry_add_key32_ext( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket0 = (struct rte_bucket_4_32 *) &f->memory[bucket_index * f->bucket_size]; @@ -427,10 +505,10 @@ rte_table_hash_entry_add_key32_ext( for (bucket = bucket0; bucket != NULL; bucket = bucket->next) { for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; @@ -448,14 +526,14 @@ rte_table_hash_entry_add_key32_ext( bucket_prev = bucket, bucket = bucket->next) for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if (bucket_signature == 0) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = signature; - memcpy(bucket_key, key, f->key_size); + keycpy(bucket_key, key, f->key_mask); memcpy(bucket_data, entry, f->entry_size); *key_found = 0; *entry_ptr = (void *) bucket_data; @@ -475,7 +553,7 @@ rte_table_hash_entry_add_key32_ext( bucket_prev->next_valid = 1; bucket->signature[0] = signature; - memcpy(bucket->key[0], key, f->key_size); + keycpy(&bucket->key[0], key, f->key_mask); memcpy(&bucket->data[0], entry, f->entry_size); *key_found = 0; *entry_ptr = (void *) &bucket->data[0]; @@ -497,7 +575,7 @@ rte_table_hash_entry_delete_key32_ext( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket0 = (struct rte_bucket_4_32 *) &f->memory[bucket_index * f->bucket_size]; @@ -508,24 +586,23 @@ rte_table_hash_entry_delete_key32_ext( bucket_prev = bucket, bucket = bucket->next) for (i = 0; i < 4; i++) { uint64_t bucket_signature = bucket->signature[i]; - uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + uint8_t *bucket_key = (uint8_t *) &bucket->key[i]; if ((bucket_signature == signature) && - (memcmp(key, bucket_key, f->key_size) == 0)) { + (keycmp(bucket_key, key, f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature[i] = 0; *key_found = 1; if (entry) - memcpy(entry, bucket_data, - f->entry_size); + memcpy(entry, bucket_data, f->entry_size); if ((bucket->signature[0] == 0) && - (bucket->signature[1] == 0) && - (bucket->signature[2] == 0) && - (bucket->signature[3] == 0) && - (bucket_prev != NULL)) { + (bucket->signature[1] == 0) && + (bucket->signature[2] == 0) && + (bucket->signature[3] == 0) && + (bucket_prev != NULL)) { bucket_prev->next = bucket->next; bucket_prev->next_valid = bucket->next_valid; @@ -546,34 +623,39 @@ rte_table_hash_entry_delete_key32_ext( return 0; } -#define lookup_key32_cmp(key_in, bucket, pos) \ +#define lookup_key32_cmp(key_in, bucket, pos, f) \ { \ - uint64_t xor[4][4], or[4], signature[4]; \ + uint64_t xor[4][4], or[4], signature[4], k[4]; \ + \ + k[0] = key_in[0] & f->key_mask[0]; \ + k[1] = key_in[1] & f->key_mask[1]; \ + k[2] = key_in[2] & f->key_mask[2]; \ + k[3] = key_in[3] & f->key_mask[3]; \ \ signature[0] = ((~bucket->signature[0]) & 1); \ signature[1] = ((~bucket->signature[1]) & 1); \ signature[2] = ((~bucket->signature[2]) & 1); \ signature[3] = ((~bucket->signature[3]) & 1); \ \ - xor[0][0] = key_in[0] ^ bucket->key[0][0]; \ - xor[0][1] = key_in[1] ^ bucket->key[0][1]; \ - xor[0][2] = key_in[2] ^ bucket->key[0][2]; \ - xor[0][3] = key_in[3] ^ bucket->key[0][3]; \ + xor[0][0] = k[0] ^ bucket->key[0][0]; \ + xor[0][1] = k[1] ^ bucket->key[0][1]; \ + xor[0][2] = k[2] ^ bucket->key[0][2]; \ + xor[0][3] = k[3] ^ bucket->key[0][3]; \ \ - xor[1][0] = key_in[0] ^ bucket->key[1][0]; \ - xor[1][1] = key_in[1] ^ bucket->key[1][1]; \ - xor[1][2] = key_in[2] ^ bucket->key[1][2]; \ - xor[1][3] = key_in[3] ^ bucket->key[1][3]; \ + xor[1][0] = k[0] ^ bucket->key[1][0]; \ + xor[1][1] = k[1] ^ bucket->key[1][1]; \ + xor[1][2] = k[2] ^ bucket->key[1][2]; \ + xor[1][3] = k[3] ^ bucket->key[1][3]; \ \ - xor[2][0] = key_in[0] ^ bucket->key[2][0]; \ - xor[2][1] = key_in[1] ^ bucket->key[2][1]; \ - xor[2][2] = key_in[2] ^ bucket->key[2][2]; \ - xor[2][3] = key_in[3] ^ bucket->key[2][3]; \ + xor[2][0] = k[0] ^ bucket->key[2][0]; \ + xor[2][1] = k[1] ^ bucket->key[2][1]; \ + xor[2][2] = k[2] ^ bucket->key[2][2]; \ + xor[2][3] = k[3] ^ bucket->key[2][3]; \ \ - xor[3][0] = key_in[0] ^ bucket->key[3][0]; \ - xor[3][1] = key_in[1] ^ bucket->key[3][1]; \ - xor[3][2] = key_in[2] ^ bucket->key[3][2]; \ - xor[3][3] = key_in[3] ^ bucket->key[3][3]; \ + xor[3][0] = k[0] ^ bucket->key[3][0]; \ + xor[3][1] = k[1] ^ bucket->key[3][1]; \ + xor[3][2] = k[2] ^ bucket->key[3][2]; \ + xor[3][3] = k[3] ^ bucket->key[3][3]; \ \ or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\ or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\ @@ -604,12 +686,15 @@ rte_table_hash_entry_delete_key32_ext( rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\ } -#define lookup1_stage1(mbuf1, bucket1, f) \ +#define lookup1_stage1(mbuf1, bucket1, f) \ { \ + uint64_t *key; \ uint64_t signature; \ uint32_t bucket_index; \ \ - signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset); \ + signature = f->f_hash(key, f->key_mask, KEY_SIZE, f->seed); \ + \ bucket_index = signature & (f->n_buckets - 1); \ bucket1 = (struct rte_bucket_4_32 *) \ &f->memory[bucket_index * f->bucket_size]; \ @@ -627,8 +712,7 @@ rte_table_hash_entry_delete_key32_ext( uint32_t pos; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ - \ - lookup_key32_cmp(key, bucket2, pos); \ + lookup_key32_cmp(key, bucket2, pos, f); \ \ pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ pkts_mask_out |= pkt_mask; \ @@ -649,8 +733,7 @@ rte_table_hash_entry_delete_key32_ext( uint32_t pos; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ - \ - lookup_key32_cmp(key, bucket2, pos); \ + lookup_key32_cmp(key, bucket2, pos, f); \ \ pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ pkts_mask_out |= pkt_mask; \ @@ -678,7 +761,7 @@ rte_table_hash_entry_delete_key32_ext( bucket = buckets[pkt_index]; \ key = keys[pkt_index]; \ \ - lookup_key32_cmp(key, bucket, pos); \ + lookup_key32_cmp(key, bucket, pos, f); \ \ pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\ pkts_mask_out |= pkt_mask; \ @@ -745,22 +828,27 @@ rte_table_hash_entry_delete_key32_ext( #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \ { \ - uint64_t signature10, signature11; \ - uint32_t bucket10_index, bucket11_index; \ + uint64_t *key10, *key11; \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ \ - signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\ - bucket10_index = signature10 & (f->n_buckets - 1); \ + key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset); \ + signature10 = f->f_hash(key10, f->key_mask, KEY_SIZE, f->seed); \ + \ + bucket10_index = signature10 & (f->n_buckets - 1); \ bucket10 = (struct rte_bucket_4_32 *) \ &f->memory[bucket10_index * f->bucket_size]; \ - rte_prefetch0(bucket10); \ + rte_prefetch0(bucket10); \ rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\ \ - signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\ - bucket11_index = signature11 & (f->n_buckets - 1); \ + key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset); \ + signature11 = f->f_hash(key11, f->key_mask, KEY_SIZE, f->seed);\ + \ + bucket11_index = signature11 & (f->n_buckets - 1); \ bucket11 = (struct rte_bucket_4_32 *) \ &f->memory[bucket11_index * f->bucket_size]; \ - rte_prefetch0(bucket11); \ + rte_prefetch0(bucket11); \ rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\ } @@ -776,8 +864,8 @@ rte_table_hash_entry_delete_key32_ext( key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ \ - lookup_key32_cmp(key20, bucket20, pos20); \ - lookup_key32_cmp(key21, bucket21, pos21); \ + lookup_key32_cmp(key20, bucket20, pos20, f); \ + lookup_key32_cmp(key21, bucket21, pos21, f); \ \ pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ @@ -805,8 +893,8 @@ rte_table_hash_entry_delete_key32_ext( key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ \ - lookup_key32_cmp(key20, bucket20, pos20); \ - lookup_key32_cmp(key21, bucket21, pos21); \ + lookup_key32_cmp(key20, bucket20, pos20, f); \ + lookup_key32_cmp(key21, bucket21, pos21, f); \ \ pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c index 5f0c6566..94373043 100644 --- a/lib/librte_table/rte_table_hash_key8.c +++ b/lib/librte_table/rte_table_hash_key8.c @@ -1,34 +1,34 @@ /*- - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> #include <stdio.h> @@ -42,7 +42,9 @@ #include "rte_table_hash.h" #include "rte_lru.h" -#define RTE_TABLE_HASH_KEY_SIZE 8 +#define KEY_SIZE 8 + +#define KEYS_PER_BUCKET 4 #ifdef RTE_TABLE_STATS_COLLECT @@ -76,11 +78,9 @@ struct rte_table_hash { /* Input parameters */ uint32_t n_buckets; - uint32_t n_entries_per_bucket; uint32_t key_size; uint32_t entry_size; uint32_t bucket_size; - uint32_t signature_offset; uint32_t key_offset; uint64_t key_mask; rte_table_hash_op_hash f_hash; @@ -96,10 +96,46 @@ struct rte_table_hash { }; static int -check_params_create_lru(struct rte_table_hash_key8_lru_params *params) { - /* n_entries */ - if (params->n_entries == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); +keycmp(void *a, void *b, void *b_mask) +{ + uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; + + return a64[0] != (b64[0] & b_mask64[0]); +} + +static void +keycpy(void *dst, void *src, void *src_mask) +{ + uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask; + + dst64[0] = src64[0] & src_mask64[0]; +} + +static int +check_params_create(struct rte_table_hash_params *params) +{ + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__); + return -EINVAL; + } + + /* key_size */ + if (params->key_size != KEY_SIZE) { + RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); + return -EINVAL; + } + + /* n_keys */ + if (params->n_keys == 0) { + RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__); + return -EINVAL; + } + + /* n_buckets */ + if ((params->n_buckets == 0) || + (!rte_is_power_of_2(params->n_buckets))) { + RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); return -EINVAL; } @@ -116,47 +152,68 @@ check_params_create_lru(struct rte_table_hash_key8_lru_params *params) { static void * rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_key8_lru_params *p = - (struct rte_table_hash_key8_lru_params *) params; + struct rte_table_hash_params *p = params; struct rte_table_hash *f; - uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl; - uint32_t total_size, i; + uint64_t bucket_size, total_size; + uint32_t n_buckets, i; /* Check input parameters */ - if ((check_params_create_lru(p) != 0) || + if ((check_params_create(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % 64) != 0)) { + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) return NULL; - } - n_entries_per_bucket = 4; - key_size = 8; + + /* + * Table dimensioning + * + * Objective: Pick the number of buckets (n_buckets) so that there a chance + * to store n_keys keys in the table. + * + * Note: Since the buckets do not get extended, it is not possible to + * guarantee that n_keys keys can be stored in the table at any time. In the + * worst case scenario when all the n_keys fall into the same bucket, only + * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case + * defeats the purpose of the hash table. It indicates unsuitable f_hash or + * n_keys to n_buckets ratio. + * + * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET + */ + n_buckets = rte_align32pow2( + (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET); + n_buckets = RTE_MAX(n_buckets, p->n_buckets); /* Memory allocation */ - n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / - n_entries_per_bucket); - bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket * - entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; - total_size = sizeof(struct rte_table_hash) + n_buckets * - bucket_size_cl * RTE_CACHE_LINE_SIZE; - - f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) + + KEYS_PER_BUCKET * entry_size); + total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size; + + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes" + " for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } + + f = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (f == NULL) { - RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes" + " for hash table %s\n", + __func__, total_size, p->name); return NULL; } - RTE_LOG(INFO, TABLE, - "%s: Hash table memory footprint is %u bytes\n", - __func__, total_size); + + RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint " + "is %" PRIu64 " bytes\n", + __func__, p->name, total_size); /* Memory initialization */ f->n_buckets = n_buckets; - f->n_entries_per_bucket = n_entries_per_bucket; - f->key_size = key_size; + f->key_size = KEY_SIZE; f->entry_size = entry_size; - f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; - f->signature_offset = p->signature_offset; + f->bucket_size = bucket_size; f->key_offset = p->key_offset; f->f_hash = p->f_hash; f->seed = p->seed; @@ -205,7 +262,7 @@ rte_table_hash_entry_add_key8_lru( uint64_t signature, mask, pos; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket = (struct rte_bucket_4_8 *) &f->memory[bucket_index * f->bucket_size]; @@ -213,10 +270,10 @@ rte_table_hash_entry_add_key8_lru( /* Key is present in the bucket */ for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { uint64_t bucket_signature = bucket->signature; - uint64_t bucket_key = bucket->key[i]; + uint64_t *bucket_key = &bucket->key[i]; if ((bucket_signature & mask) && - (*((uint64_t *) key) == bucket_key)) { + (keycmp(bucket_key, key, &f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; memcpy(bucket_data, entry, f->entry_size); @@ -235,7 +292,7 @@ rte_table_hash_entry_add_key8_lru( uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature |= mask; - bucket->key[i] = *((uint64_t *) key); + keycpy(&bucket->key[i], key, &f->key_mask); memcpy(bucket_data, entry, f->entry_size); lru_update(bucket, i); *key_found = 0; @@ -247,10 +304,10 @@ rte_table_hash_entry_add_key8_lru( /* Bucket full: replace LRU entry */ pos = lru_pos(bucket); - bucket->key[pos] = *((uint64_t *) key); + keycpy(&bucket->key[pos], key, &f->key_mask); memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size); lru_update(bucket, pos); - *key_found = 0; + *key_found = 0; *entry_ptr = (void *) &bucket->data[pos * f->entry_size]; return 0; @@ -268,7 +325,7 @@ rte_table_hash_entry_delete_key8_lru( uint64_t signature, mask; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket = (struct rte_bucket_4_8 *) &f->memory[bucket_index * f->bucket_size]; @@ -276,10 +333,10 @@ rte_table_hash_entry_delete_key8_lru( /* Key is present in the bucket */ for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { uint64_t bucket_signature = bucket->signature; - uint64_t bucket_key = bucket->key[i]; + uint64_t *bucket_key = &bucket->key[i]; if ((bucket_signature & mask) && - (*((uint64_t *) key) == bucket_key)) { + (keycmp(bucket_key, key, &f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; bucket->signature &= ~mask; @@ -296,79 +353,71 @@ rte_table_hash_entry_delete_key8_lru( return 0; } -static int -check_params_create_ext(struct rte_table_hash_key8_ext_params *params) { - /* n_entries */ - if (params->n_entries == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); - return -EINVAL; - } - - /* n_entries_ext */ - if (params->n_entries_ext == 0) { - RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__); - return -EINVAL; - } - - /* f_hash */ - if (params->f_hash == NULL) { - RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", - __func__); - return -EINVAL; - } - - return 0; -} - static void * rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_key8_ext_params *p = - (struct rte_table_hash_key8_ext_params *) params; + struct rte_table_hash_params *p = params; struct rte_table_hash *f; - uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size; - uint32_t bucket_size_cl, stack_size_cl, total_size, i; + uint64_t bucket_size, stack_size, total_size; + uint32_t n_buckets_ext, i; /* Check input parameters */ - if ((check_params_create_ext(p) != 0) || + if ((check_params_create(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || ((sizeof(struct rte_bucket_4_8) % 64) != 0)) return NULL; - n_entries_per_bucket = 4; - key_size = 8; + /* + * Table dimensioning + * + * Objective: Pick the number of bucket extensions (n_buckets_ext) so that + * it is guaranteed that n_keys keys can be stored in the table at any time. + * + * The worst case scenario takes place when all the n_keys keys fall into + * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst + * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the + * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall + * into a different bucket. This case defeats the purpose of the hash table. + * It indicates unsuitable f_hash or n_keys to n_buckets ratio. + * + * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1 + */ + n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1; /* Memory allocation */ - n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / - n_entries_per_bucket); - n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) / - n_entries_per_bucket; - bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket * - entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; - stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1) - / RTE_CACHE_LINE_SIZE; - total_size = sizeof(struct rte_table_hash) + ((n_buckets + - n_buckets_ext) * bucket_size_cl + stack_size_cl) * - RTE_CACHE_LINE_SIZE; - - f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) + + KEYS_PER_BUCKET * entry_size); + stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t)); + total_size = sizeof(struct rte_table_hash) + + (p->n_buckets + n_buckets_ext) * bucket_size + stack_size; + + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); + return NULL; + } + + f = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (f == NULL) { RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + "%s: Cannot allocate %" PRIu64 " bytes " + "for hash table %s\n", + __func__, total_size, p->name); return NULL; } - RTE_LOG(INFO, TABLE, - "%s: Hash table memory footprint is %u bytes\n", - __func__, total_size); + RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint " + "is %" PRIu64 " bytes\n", + __func__, p->name, total_size); /* Memory initialization */ - f->n_buckets = n_buckets; - f->n_entries_per_bucket = n_entries_per_bucket; - f->key_size = key_size; + f->n_buckets = p->n_buckets; + f->key_size = KEY_SIZE; f->entry_size = entry_size; - f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; - f->signature_offset = p->signature_offset; + f->bucket_size = bucket_size; f->key_offset = p->key_offset; f->f_hash = p->f_hash; f->seed = p->seed; @@ -376,7 +425,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) f->n_buckets_ext = n_buckets_ext; f->stack_pos = n_buckets_ext; f->stack = (uint32_t *) - &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size]; + &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size]; if (p->key_mask != NULL) f->key_mask = ((uint64_t *)p->key_mask)[0]; @@ -417,7 +466,7 @@ rte_table_hash_entry_add_key8_ext( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket0 = (struct rte_bucket_4_8 *) &f->memory[bucket_index * f->bucket_size]; @@ -428,10 +477,10 @@ rte_table_hash_entry_add_key8_ext( for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { uint64_t bucket_signature = bucket->signature; - uint64_t bucket_key = bucket->key[i]; + uint64_t *bucket_key = &bucket->key[i]; if ((bucket_signature & mask) && - (*((uint64_t *) key) == bucket_key)) { + (keycmp(bucket_key, key, &f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; @@ -456,7 +505,7 @@ rte_table_hash_entry_add_key8_ext( f->entry_size]; bucket->signature |= mask; - bucket->key[i] = *((uint64_t *) key); + keycpy(&bucket->key[i], key, &f->key_mask); memcpy(bucket_data, entry, f->entry_size); *key_found = 0; *entry_ptr = (void *) bucket_data; @@ -476,7 +525,7 @@ rte_table_hash_entry_add_key8_ext( bucket_prev->next_valid = 1; bucket->signature = 1; - bucket->key[0] = *((uint64_t *) key); + keycpy(&bucket->key[0], key, &f->key_mask); memcpy(&bucket->data[0], entry, f->entry_size); *key_found = 0; *entry_ptr = (void *) &bucket->data[0]; @@ -498,7 +547,7 @@ rte_table_hash_entry_delete_key8_ext( uint64_t signature; uint32_t bucket_index, i; - signature = f->f_hash(key, f->key_size, f->seed); + signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed); bucket_index = signature & (f->n_buckets - 1); bucket0 = (struct rte_bucket_4_8 *) &f->memory[bucket_index * f->bucket_size]; @@ -510,10 +559,10 @@ rte_table_hash_entry_delete_key8_ext( for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { uint64_t bucket_signature = bucket->signature; - uint64_t bucket_key = bucket->key[i]; + uint64_t *bucket_key = &bucket->key[i]; if ((bucket_signature & mask) && - (*((uint64_t *) key) == bucket_key)) { + (keycmp(bucket_key, key, &f->key_mask) == 0)) { uint8_t *bucket_data = &bucket->data[i * f->entry_size]; @@ -546,16 +595,17 @@ rte_table_hash_entry_delete_key8_ext( return 0; } -#define lookup_key8_cmp(key_in, bucket, pos) \ +#define lookup_key8_cmp(key_in, bucket, pos, f) \ { \ - uint64_t xor[4], signature; \ + uint64_t xor[4], signature, k; \ \ signature = ~bucket->signature; \ \ - xor[0] = (key_in[0] ^ bucket->key[0]) | (signature & 1);\ - xor[1] = (key_in[0] ^ bucket->key[1]) | (signature & 2);\ - xor[2] = (key_in[0] ^ bucket->key[2]) | (signature & 4);\ - xor[3] = (key_in[0] ^ bucket->key[3]) | (signature & 8);\ + k = key_in[0] & f->key_mask; \ + xor[0] = (k ^ bucket->key[0]) | (signature & 1); \ + xor[1] = (k ^ bucket->key[1]) | (signature & 2); \ + xor[2] = (k ^ bucket->key[2]) | (signature & 4); \ + xor[3] = (k ^ bucket->key[3]) | (signature & 8); \ \ pos = 4; \ if (xor[0] == 0) \ @@ -583,27 +633,12 @@ rte_table_hash_entry_delete_key8_ext( #define lookup1_stage1(mbuf1, bucket1, f) \ { \ - uint64_t signature; \ - uint32_t bucket_index; \ - \ - signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\ - bucket_index = signature & (f->n_buckets - 1); \ - bucket1 = (struct rte_bucket_4_8 *) \ - &f->memory[bucket_index * f->bucket_size]; \ - rte_prefetch0(bucket1); \ -} - -#define lookup1_stage1_dosig(mbuf1, bucket1, f) \ -{ \ uint64_t *key; \ uint64_t signature; \ uint32_t bucket_index; \ - uint64_t hash_key_buffer; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\ - hash_key_buffer = *key & f->key_mask; \ - signature = f->f_hash(&hash_key_buffer, \ - RTE_TABLE_HASH_KEY_SIZE, f->seed); \ + signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed); \ bucket_index = signature & (f->n_buckets - 1); \ bucket1 = (struct rte_bucket_4_8 *) \ &f->memory[bucket_index * f->bucket_size]; \ @@ -617,12 +652,9 @@ rte_table_hash_entry_delete_key8_ext( uint64_t pkt_mask; \ uint64_t *key; \ uint32_t pos; \ - uint64_t hash_key_buffer; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ - hash_key_buffer = key[0] & f->key_mask; \ - \ - lookup_key8_cmp((&hash_key_buffer), bucket2, pos); \ + lookup_key8_cmp(key, bucket2, pos, f); \ \ pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\ pkts_mask_out |= pkt_mask; \ @@ -641,12 +673,9 @@ rte_table_hash_entry_delete_key8_ext( uint64_t pkt_mask, bucket_mask; \ uint64_t *key; \ uint32_t pos; \ - uint64_t hash_key_buffer; \ \ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ - hash_key_buffer = *key & f->key_mask; \ - \ - lookup_key8_cmp((&hash_key_buffer), bucket2, pos); \ + lookup_key8_cmp(key, bucket2, pos, f); \ \ pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\ pkts_mask_out |= pkt_mask; \ @@ -670,13 +699,10 @@ rte_table_hash_entry_delete_key8_ext( uint64_t pkt_mask, bucket_mask; \ uint64_t *key; \ uint32_t pos; \ - uint64_t hash_key_buffer; \ \ bucket = buckets[pkt_index]; \ key = keys[pkt_index]; \ - hash_key_buffer = (*key) & f->key_mask; \ - \ - lookup_key8_cmp((&hash_key_buffer), bucket, pos); \ + lookup_key8_cmp(key, bucket, pos, f); \ \ pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\ pkts_mask_out |= pkt_mask; \ @@ -738,29 +764,9 @@ rte_table_hash_entry_delete_key8_ext( rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ } -#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \ -{ \ - uint64_t signature10, signature11; \ - uint32_t bucket10_index, bucket11_index; \ - \ - signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\ - bucket10_index = signature10 & (f->n_buckets - 1); \ - bucket10 = (struct rte_bucket_4_8 *) \ - &f->memory[bucket10_index * f->bucket_size]; \ - rte_prefetch0(bucket10); \ - \ - signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\ - bucket11_index = signature11 & (f->n_buckets - 1); \ - bucket11 = (struct rte_bucket_4_8 *) \ - &f->memory[bucket11_index * f->bucket_size]; \ - rte_prefetch0(bucket11); \ -} - -#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)\ +#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\ { \ uint64_t *key10, *key11; \ - uint64_t hash_offset_buffer10; \ - uint64_t hash_offset_buffer11; \ uint64_t signature10, signature11; \ uint32_t bucket10_index, bucket11_index; \ rte_table_hash_op_hash f_hash = f->f_hash; \ @@ -769,18 +775,14 @@ rte_table_hash_entry_delete_key8_ext( \ key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\ key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\ - hash_offset_buffer10 = *key10 & f->key_mask; \ - hash_offset_buffer11 = *key11 & f->key_mask; \ \ - signature10 = f_hash(&hash_offset_buffer10, \ - RTE_TABLE_HASH_KEY_SIZE, seed); \ + signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed); \ bucket10_index = signature10 & (f->n_buckets - 1); \ bucket10 = (struct rte_bucket_4_8 *) \ &f->memory[bucket10_index * f->bucket_size]; \ rte_prefetch0(bucket10); \ \ - signature11 = f_hash(&hash_offset_buffer11, \ - RTE_TABLE_HASH_KEY_SIZE, seed); \ + signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed); \ bucket11_index = signature11 & (f->n_buckets - 1); \ bucket11 = (struct rte_bucket_4_8 *) \ &f->memory[bucket11_index * f->bucket_size]; \ @@ -793,17 +795,13 @@ rte_table_hash_entry_delete_key8_ext( void *a20, *a21; \ uint64_t pkt20_mask, pkt21_mask; \ uint64_t *key20, *key21; \ - uint64_t hash_offset_buffer20; \ - uint64_t hash_offset_buffer21; \ uint32_t pos20, pos21; \ \ key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ - hash_offset_buffer20 = *key20 & f->key_mask; \ - hash_offset_buffer21 = *key21 & f->key_mask; \ \ - lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\ - lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\ + lookup_key8_cmp(key20, bucket20, pos20, f); \ + lookup_key8_cmp(key21, bucket21, pos21, f); \ \ pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\ pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\ @@ -826,17 +824,13 @@ rte_table_hash_entry_delete_key8_ext( void *a20, *a21; \ uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\ uint64_t *key20, *key21; \ - uint64_t hash_offset_buffer20; \ - uint64_t hash_offset_buffer21; \ uint32_t pos20, pos21; \ \ key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ - hash_offset_buffer20 = *key20 & f->key_mask; \ - hash_offset_buffer21 = *key21 & f->key_mask; \ \ - lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\ - lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\ + lookup_key8_cmp(key20, bucket20, pos20, f); \ + lookup_key8_cmp(key21, bucket21, pos21, f); \ \ pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\ pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\ @@ -871,8 +865,8 @@ rte_table_hash_lookup_key8_lru( struct rte_table_hash *f = (struct rte_table_hash *) table; struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; - uint32_t pkt00_index, pkt01_index, pkt10_index, - pkt11_index, pkt20_index, pkt21_index; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; uint64_t pkts_mask_out = 0; __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); @@ -888,7 +882,7 @@ rte_table_hash_lookup_key8_lru( lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); lookup1_stage1(mbuf, bucket, f); lookup1_stage2_lru(pkt_index, mbuf, bucket, - pkts_mask_out, entries, f); + pkts_mask_out, entries, f); } *lookup_hit_mask = pkts_mask_out; @@ -984,133 +978,7 @@ rte_table_hash_lookup_key8_lru( *lookup_hit_mask = pkts_mask_out; RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); return 0; -} /* rte_table_hash_lookup_key8_lru() */ - -static int -rte_table_hash_lookup_key8_lru_dosig( - void *table, - struct rte_mbuf **pkts, - uint64_t pkts_mask, - uint64_t *lookup_hit_mask, - void **entries) -{ - struct rte_table_hash *f = (struct rte_table_hash *) table; - struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; - struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; - uint32_t pkt00_index, pkt01_index, pkt10_index; - uint32_t pkt11_index, pkt20_index, pkt21_index; - uint64_t pkts_mask_out = 0; - - __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in); - - /* Cannot run the pipeline with less than 5 packets */ - if (__builtin_popcountll(pkts_mask) < 5) { - for ( ; pkts_mask; ) { - struct rte_bucket_4_8 *bucket; - struct rte_mbuf *mbuf; - uint32_t pkt_index; - - lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); - lookup1_stage1_dosig(mbuf, bucket, f); - lookup1_stage2_lru(pkt_index, mbuf, bucket, - pkts_mask_out, entries, f); - } - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); - return 0; - } - - /* - * Pipeline fill - * - */ - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline feed */ - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* - * Pipeline run - * - */ - for ( ; pkts_mask; ) { - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, - mbuf00, mbuf01, pkts, pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, f); - } - - /* - * Pipeline flush - * - */ - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, f); - - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - - /* Pipeline stage 2 */ - lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, f); - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); - return 0; -} /* rte_table_hash_lookup_key8_lru_dosig() */ +} /* lookup LRU */ static int rte_table_hash_lookup_key8_ext( @@ -1142,8 +1010,8 @@ rte_table_hash_lookup_key8_ext( lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); lookup1_stage1(mbuf, bucket, f); lookup1_stage2_ext(pkt_index, mbuf, bucket, - pkts_mask_out, entries, buckets_mask, buckets, - keys, f); + pkts_mask_out, entries, buckets_mask, + buckets, keys, f); } goto grind_next_buckets; @@ -1260,157 +1128,7 @@ grind_next_buckets: *lookup_hit_mask = pkts_mask_out; RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); return 0; -} /* rte_table_hash_lookup_key8_ext() */ - -static int -rte_table_hash_lookup_key8_ext_dosig( - void *table, - struct rte_mbuf **pkts, - uint64_t pkts_mask, - uint64_t *lookup_hit_mask, - void **entries) -{ - struct rte_table_hash *f = (struct rte_table_hash *) table; - struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; - struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; - uint32_t pkt00_index, pkt01_index, pkt10_index; - uint32_t pkt11_index, pkt20_index, pkt21_index; - uint64_t pkts_mask_out = 0, buckets_mask = 0; - struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; - uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; - - __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in); - - /* Cannot run the pipeline with less than 5 packets */ - if (__builtin_popcountll(pkts_mask) < 5) { - for ( ; pkts_mask; ) { - struct rte_bucket_4_8 *bucket; - struct rte_mbuf *mbuf; - uint32_t pkt_index; - - lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); - lookup1_stage1_dosig(mbuf, bucket, f); - lookup1_stage2_ext(pkt_index, mbuf, bucket, - pkts_mask_out, entries, buckets_mask, - buckets, keys, f); - } - - goto grind_next_buckets; - } - - /* - * Pipeline fill - * - */ - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline feed */ - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, - pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* - * Pipeline run - * - */ - for ( ; pkts_mask; ) { - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, - mbuf00, mbuf01, pkts, pkts_mask, f); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, - buckets_mask, buckets, keys, f); - } - - /* - * Pipeline flush - * - */ - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - mbuf10 = mbuf00; - mbuf11 = mbuf01; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); - - /* Pipeline stage 2 */ - lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, - buckets_mask, buckets, keys, f); - - /* Pipeline feed */ - bucket20 = bucket10; - bucket21 = bucket11; - mbuf20 = mbuf10; - mbuf21 = mbuf11; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - - /* Pipeline stage 2 */ - lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, - bucket20, bucket21, pkts_mask_out, entries, - buckets_mask, buckets, keys, f); - -grind_next_buckets: - /* Grind next buckets */ - for ( ; buckets_mask; ) { - uint64_t buckets_mask_next = 0; - - for ( ; buckets_mask; ) { - uint64_t pkt_mask; - uint32_t pkt_index; - - pkt_index = __builtin_ctzll(buckets_mask); - pkt_mask = 1LLU << pkt_index; - buckets_mask &= ~pkt_mask; - - lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, - entries, buckets_mask_next, f); - } - - buckets_mask = buckets_mask_next; - } - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); - return 0; -} /* rte_table_hash_lookup_key8_dosig_ext() */ +} /* lookup EXT */ static int rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear) @@ -1437,17 +1155,6 @@ struct rte_table_ops rte_table_hash_key8_lru_ops = { .f_stats = rte_table_hash_key8_stats_read, }; -struct rte_table_ops rte_table_hash_key8_lru_dosig_ops = { - .f_create = rte_table_hash_create_key8_lru, - .f_free = rte_table_hash_free_key8_lru, - .f_add = rte_table_hash_entry_add_key8_lru, - .f_delete = rte_table_hash_entry_delete_key8_lru, - .f_add_bulk = NULL, - .f_delete_bulk = NULL, - .f_lookup = rte_table_hash_lookup_key8_lru_dosig, - .f_stats = rte_table_hash_key8_stats_read, -}; - struct rte_table_ops rte_table_hash_key8_ext_ops = { .f_create = rte_table_hash_create_key8_ext, .f_free = rte_table_hash_free_key8_ext, @@ -1458,14 +1165,3 @@ struct rte_table_ops rte_table_hash_key8_ext_ops = { .f_lookup = rte_table_hash_lookup_key8_ext, .f_stats = rte_table_hash_key8_stats_read, }; - -struct rte_table_ops rte_table_hash_key8_ext_dosig_ops = { - .f_create = rte_table_hash_create_key8_ext, - .f_free = rte_table_hash_free_key8_ext, - .f_add = rte_table_hash_entry_add_key8_ext, - .f_delete = rte_table_hash_entry_delete_key8_ext, - .f_add_bulk = NULL, - .f_delete_bulk = NULL, - .f_lookup = rte_table_hash_lookup_key8_ext_dosig, - .f_stats = rte_table_hash_key8_stats_read, -}; diff --git a/lib/librte_table/rte_table_hash_lru.c b/lib/librte_table/rte_table_hash_lru.c index 5a4864e2..a07392fd 100644 --- a/lib/librte_table/rte_table_hash_lru.c +++ b/lib/librte_table/rte_table_hash_lru.c @@ -1,34 +1,34 @@ /*- - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> @@ -86,7 +86,6 @@ struct rte_table_hash { uint32_t n_buckets; rte_table_hash_op_hash f_hash; uint64_t seed; - uint32_t signature_offset; uint32_t key_offset; /* Internal */ @@ -99,6 +98,7 @@ struct rte_table_hash { struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX]; /* Tables */ + uint64_t *key_mask; struct bucket *buckets; uint8_t *key_mem; uint8_t *data_mem; @@ -109,29 +109,53 @@ struct rte_table_hash { }; static int -check_params_create(struct rte_table_hash_lru_params *params) +keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes) { - uint32_t n_buckets_min; + uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; + uint32_t i; + + for (i = 0; i < n_bytes / sizeof(uint64_t); i++) + if (a64[i] != (b64[i] & b_mask64[i])) + return 1; + + return 0; +} + +static void +keycpy(void *dst, void *src, void *src_mask, uint32_t n_bytes) +{ + uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask; + uint32_t i; + + for (i = 0; i < n_bytes / sizeof(uint64_t); i++) + dst64[i] = src64[i] & src_mask64[i]; +} + +static int +check_params_create(struct rte_table_hash_params *params) +{ + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__); + return -EINVAL; + } /* key_size */ - if ((params->key_size == 0) || + if ((params->key_size < sizeof(uint64_t)) || (!rte_is_power_of_2(params->key_size))) { RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); return -EINVAL; } /* n_keys */ - if ((params->n_keys == 0) || - (!rte_is_power_of_2(params->n_keys))) { + if (params->n_keys == 0) { RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__); return -EINVAL; } /* n_buckets */ - n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys; if ((params->n_buckets == 0) || - (!rte_is_power_of_2(params->n_keys)) || - (params->n_buckets < n_buckets_min)) { + (!rte_is_power_of_2(params->n_buckets))) { RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); return -EINVAL; } @@ -148,13 +172,13 @@ check_params_create(struct rte_table_hash_lru_params *params) static void * rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_lru_params *p = - params; + struct rte_table_hash_params *p = params; struct rte_table_hash *t; - uint32_t total_size, table_meta_sz; - uint32_t bucket_sz, key_sz, key_stack_sz, data_sz; - uint32_t bucket_offset, key_offset, key_stack_offset, data_offset; - uint32_t i; + uint64_t table_meta_sz, key_mask_sz, bucket_sz, key_sz, key_stack_sz; + uint64_t data_sz, total_size; + uint64_t key_mask_offset, bucket_offset, key_offset, key_stack_offset; + uint64_t data_offset; + uint32_t n_buckets, i; /* Check input parameters */ if ((check_params_create(p) != 0) || @@ -164,33 +188,65 @@ rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size) return NULL; } + /* + * Table dimensioning + * + * Objective: Pick the number of buckets (n_buckets) so that there a chance + * to store n_keys keys in the table. + * + * Note: Since the buckets do not get extended, it is not possible to + * guarantee that n_keys keys can be stored in the table at any time. In the + * worst case scenario when all the n_keys fall into the same bucket, only + * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case + * defeats the purpose of the hash table. It indicates unsuitable f_hash or + * n_keys to n_buckets ratio. + * + * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET + */ + n_buckets = rte_align32pow2( + (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET); + n_buckets = RTE_MAX(n_buckets, p->n_buckets); + /* Memory allocation */ table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash)); - bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket)); + key_mask_sz = RTE_CACHE_LINE_ROUNDUP(p->key_size); + bucket_sz = RTE_CACHE_LINE_ROUNDUP(n_buckets * sizeof(struct bucket)); key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size); key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t)); data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size); - total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz + - data_sz; + total_size = table_meta_sz + key_mask_sz + bucket_sz + key_sz + + key_stack_sz + data_sz; + + if (total_size > SIZE_MAX) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %" PRIu64 " bytes for hash " + "table %s\n", + __func__, total_size, p->name); + return NULL; + } - t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + t = rte_zmalloc_socket(p->name, + (size_t)total_size, + RTE_CACHE_LINE_SIZE, + socket_id); if (t == NULL) { RTE_LOG(ERR, TABLE, - "%s: Cannot allocate %u bytes for hash table\n", - __func__, total_size); + "%s: Cannot allocate %" PRIu64 " bytes for hash " + "table %s\n", + __func__, total_size, p->name); return NULL; } - RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is " - "%u bytes\n", __func__, p->key_size, total_size); + RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table %s memory footprint" + " is %" PRIu64 " bytes\n", + __func__, p->key_size, p->name, total_size); /* Memory initialization */ t->key_size = p->key_size; t->entry_size = entry_size; t->n_keys = p->n_keys; - t->n_buckets = p->n_buckets; + t->n_buckets = n_buckets; t->f_hash = p->f_hash; t->seed = p->seed; - t->signature_offset = p->signature_offset; t->key_offset = p->key_offset; /* Internal */ @@ -199,16 +255,24 @@ rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size) t->data_size_shl = __builtin_ctzl(entry_size); /* Tables */ - bucket_offset = 0; + key_mask_offset = 0; + bucket_offset = key_mask_offset + key_mask_sz; key_offset = bucket_offset + bucket_sz; key_stack_offset = key_offset + key_sz; data_offset = key_stack_offset + key_stack_sz; + t->key_mask = (uint64_t *) &t->memory[key_mask_offset]; t->buckets = (struct bucket *) &t->memory[bucket_offset]; t->key_mem = &t->memory[key_offset]; t->key_stack = (uint32_t *) &t->memory[key_stack_offset]; t->data_mem = &t->memory[data_offset]; + /* Key mask */ + if (p->key_mask == NULL) + memset(t->key_mask, 0xFF, p->key_size); + else + memcpy(t->key_mask, p->key_mask, p->key_size); + /* Key stack */ for (i = 0; i < t->n_keys; i++) t->key_stack[i] = t->n_keys - 1 - i; @@ -246,7 +310,7 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry, uint64_t sig; uint32_t bkt_index, i; - sig = t->f_hash(key, t->key_size, t->seed); + sig = t->f_hash(key, t->key_mask, t->key_size, t->seed); bkt_index = sig & t->bucket_mask; bkt = &t->buckets[bkt_index]; sig = (sig >> 16) | 1LLU; @@ -258,8 +322,8 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry, uint8_t *bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; - if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size) - == 0)) { + if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask, + t->key_size) == 0)) { uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; @@ -292,7 +356,7 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry, bkt->sig[i] = (uint16_t) sig; bkt->key_pos[i] = bkt_key_index; - memcpy(bkt_key, key, t->key_size); + keycpy(bkt_key, key, t->key_mask, t->key_size); memcpy(data, entry, t->entry_size); lru_update(bkt, i); @@ -311,7 +375,7 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry, uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; bkt->sig[pos] = (uint16_t) sig; - memcpy(bkt_key, key, t->key_size); + keycpy(bkt_key, key, t->key_mask, t->key_size); memcpy(data, entry, t->entry_size); lru_update(bkt, pos); @@ -330,7 +394,7 @@ rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found, uint64_t sig; uint32_t bkt_index, i; - sig = t->f_hash(key, t->key_size, t->seed); + sig = t->f_hash(key, t->key_mask, t->key_size, t->seed); bkt_index = sig & t->bucket_mask; bkt = &t->buckets[bkt_index]; sig = (sig >> 16) | 1LLU; @@ -343,14 +407,15 @@ rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found, t->key_size_shl]; if ((sig == bkt_sig) && - (memcmp(key, bkt_key, t->key_size) == 0)) { + (keycmp(bkt_key, key, t->key_mask, t->key_size) == 0)) { uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; bkt->sig[i] = 0; t->key_stack[t->key_stack_tos++] = bkt_key_index; *key_found = 1; - memcpy(entry, data, t->entry_size); + if (entry) + memcpy(entry, data, t->entry_size); return 0; } } @@ -365,8 +430,7 @@ static int rte_table_hash_lru_lookup_unoptimized( struct rte_mbuf **pkts, uint64_t pkts_mask, uint64_t *lookup_hit_mask, - void **entries, - int dosig) + void **entries) { struct rte_table_hash *t = (struct rte_table_hash *) table; uint64_t pkts_mask_out = 0; @@ -387,11 +451,7 @@ static int rte_table_hash_lru_lookup_unoptimized( pkt = pkts[pkt_index]; key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset); - if (dosig) - sig = (uint64_t) t->f_hash(key, t->key_size, t->seed); - else - sig = RTE_MBUF_METADATA_UINT32(pkt, - t->signature_offset); + sig = (uint64_t) t->f_hash(key, t->key_mask, t->key_size, t->seed); bkt_index = sig & t->bucket_mask; bkt = &t->buckets[bkt_index]; @@ -404,7 +464,7 @@ static int rte_table_hash_lru_lookup_unoptimized( uint8_t *bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; - if ((sig == bkt_sig) && (memcmp(key, bkt_key, + if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask, t->key_size) == 0)) { uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; @@ -502,74 +562,75 @@ static int rte_table_hash_lru_lookup_unoptimized( match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3; \ } -#define lookup_cmp_key(mbuf, key, match_key, f) \ -{ \ +#define lookup_cmp_key(mbuf, key, match_key, f) \ +{ \ uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\ - uint64_t *bkt_key = (uint64_t *) key; \ - \ - switch (f->key_size) { \ - case 8: \ - { \ - uint64_t xor = pkt_key[0] ^ bkt_key[0]; \ - match_key = 0; \ - if (xor == 0) \ - match_key = 1; \ - } \ - break; \ - \ - case 16: \ - { \ - uint64_t xor[2], or; \ - \ - xor[0] = pkt_key[0] ^ bkt_key[0]; \ - xor[1] = pkt_key[1] ^ bkt_key[1]; \ - or = xor[0] | xor[1]; \ - match_key = 0; \ - if (or == 0) \ - match_key = 1; \ - } \ - break; \ - \ - case 32: \ - { \ - uint64_t xor[4], or; \ - \ - xor[0] = pkt_key[0] ^ bkt_key[0]; \ - xor[1] = pkt_key[1] ^ bkt_key[1]; \ - xor[2] = pkt_key[2] ^ bkt_key[2]; \ - xor[3] = pkt_key[3] ^ bkt_key[3]; \ - or = xor[0] | xor[1] | xor[2] | xor[3]; \ - match_key = 0; \ - if (or == 0) \ - match_key = 1; \ - } \ - break; \ - \ - case 64: \ - { \ - uint64_t xor[8], or; \ - \ - xor[0] = pkt_key[0] ^ bkt_key[0]; \ - xor[1] = pkt_key[1] ^ bkt_key[1]; \ - xor[2] = pkt_key[2] ^ bkt_key[2]; \ - xor[3] = pkt_key[3] ^ bkt_key[3]; \ - xor[4] = pkt_key[4] ^ bkt_key[4]; \ - xor[5] = pkt_key[5] ^ bkt_key[5]; \ - xor[6] = pkt_key[6] ^ bkt_key[6]; \ - xor[7] = pkt_key[7] ^ bkt_key[7]; \ - or = xor[0] | xor[1] | xor[2] | xor[3] | \ - xor[4] | xor[5] | xor[6] | xor[7]; \ - match_key = 0; \ - if (or == 0) \ - match_key = 1; \ - } \ - break; \ - \ - default: \ - match_key = 0; \ - if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \ - match_key = 1; \ - } \ + uint64_t *bkt_key = (uint64_t *) key; \ + uint64_t *key_mask = f->key_mask; \ + \ + switch (f->key_size) { \ + case 8: \ + { \ + uint64_t xor = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + match_key = 0; \ + if (xor == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 16: \ + { \ + uint64_t xor[2], or; \ + \ + xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \ + or = xor[0] | xor[1]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 32: \ + { \ + uint64_t xor[4], or; \ + \ + xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \ + xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \ + xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \ + or = xor[0] | xor[1] | xor[2] | xor[3]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 64: \ + { \ + uint64_t xor[8], or; \ + \ + xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \ + xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \ + xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \ + xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \ + xor[4] = (pkt_key[4] & key_mask[4]) ^ bkt_key[4]; \ + xor[5] = (pkt_key[5] & key_mask[5]) ^ bkt_key[5]; \ + xor[6] = (pkt_key[6] & key_mask[6]) ^ bkt_key[6]; \ + xor[7] = (pkt_key[7] & key_mask[7]) ^ bkt_key[7]; \ + or = xor[0] | xor[1] | xor[2] | xor[3] | \ + xor[4] | xor[5] | xor[6] | xor[7]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + default: \ + match_key = 0; \ + if (keycmp(bkt_key, pkt_key, key_mask, f->key_size) == 0) \ + match_key = 1; \ + } \ } #define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)\ @@ -616,38 +677,7 @@ static int rte_table_hash_lru_lookup_unoptimized( rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ } -#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \ -{ \ - struct grinder *g10, *g11; \ - uint64_t sig10, sig11, bkt10_index, bkt11_index; \ - struct rte_mbuf *mbuf10, *mbuf11; \ - struct bucket *bkt10, *bkt11, *buckets = t->buckets; \ - uint64_t bucket_mask = t->bucket_mask; \ - uint32_t signature_offset = t->signature_offset; \ - \ - mbuf10 = pkts[pkt10_index]; \ - sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\ - bkt10_index = sig10 & bucket_mask; \ - bkt10 = &buckets[bkt10_index]; \ - \ - mbuf11 = pkts[pkt11_index]; \ - sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\ - bkt11_index = sig11 & bucket_mask; \ - bkt11 = &buckets[bkt11_index]; \ - \ - rte_prefetch0(bkt10); \ - rte_prefetch0(bkt11); \ - \ - g10 = &g[pkt10_index]; \ - g10->sig = sig10; \ - g10->bkt = bkt10; \ - \ - g11 = &g[pkt11_index]; \ - g11->sig = sig11; \ - g11->bkt = bkt11; \ -} - -#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)\ +#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)\ { \ struct grinder *g10, *g11; \ uint64_t sig10, sig11, bkt10_index, bkt11_index; \ @@ -662,13 +692,13 @@ static int rte_table_hash_lru_lookup_unoptimized( \ mbuf10 = pkts[pkt10_index]; \ key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);\ - sig10 = (uint64_t) f_hash(key10, key_size, seed); \ + sig10 = (uint64_t) f_hash(key10, t->key_mask, key_size, seed);\ bkt10_index = sig10 & bucket_mask; \ bkt10 = &buckets[bkt10_index]; \ \ mbuf11 = pkts[pkt11_index]; \ key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);\ - sig11 = (uint64_t) f_hash(key11, key_size, seed); \ + sig11 = (uint64_t) f_hash(key11, t->key_mask, key_size, seed);\ bkt11_index = sig11 & bucket_mask; \ bkt11 = &buckets[bkt11_index]; \ \ @@ -819,7 +849,7 @@ static int rte_table_hash_lru_lookup( /* Cannot run the pipeline with less than 7 packets */ if (__builtin_popcountll(pkts_mask) < 7) return rte_table_hash_lru_lookup_unoptimized(table, pkts, - pkts_mask, lookup_hit_mask, entries, 0); + pkts_mask, lookup_hit_mask, entries); /* Pipeline stage 0 */ lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); @@ -923,140 +953,7 @@ static int rte_table_hash_lru_lookup( uint64_t pkts_mask_out_slow = 0; status = rte_table_hash_lru_lookup_unoptimized(table, pkts, - pkts_mask_match_many, &pkts_mask_out_slow, entries, 0); - pkts_mask_out |= pkts_mask_out_slow; - } - - *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); - return status; -} - -static int rte_table_hash_lru_lookup_dosig( - void *table, - struct rte_mbuf **pkts, - uint64_t pkts_mask, - uint64_t *lookup_hit_mask, - void **entries) -{ - struct rte_table_hash *t = (struct rte_table_hash *) table; - struct grinder *g = t->grinders; - uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index; - uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index; - uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0; - int status = 0; - - __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in); - - /* Cannot run the pipeline with less than 7 packets */ - if (__builtin_popcountll(pkts_mask) < 7) - return rte_table_hash_lru_lookup_unoptimized(table, pkts, - pkts_mask, lookup_hit_mask, entries, 1); - - /* Pipeline stage 0 */ - lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); - - /* Pipeline feed */ - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline feed */ - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); - - /* - * Pipeline run - * - */ - for ( ; pkts_mask; ) { - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 0 */ - lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, - pkt00_index, pkt01_index); - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, - pkts_mask_match_many); - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, - pkts_mask_out, entries); - } - - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - pkt10_index = pkt00_index; - pkt11_index = pkt01_index; - - /* Pipeline stage 1 */ - lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, - entries); - - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - pkt20_index = pkt10_index; - pkt21_index = pkt11_index; - - /* Pipeline stage 2 */ - lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, - entries); - - /* Pipeline feed */ - pkt30_index = pkt20_index; - pkt31_index = pkt21_index; - - /* Pipeline stage 3 */ - lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, - entries); - - /* Slow path */ - pkts_mask_match_many &= ~pkts_mask_out; - if (pkts_mask_match_many) { - uint64_t pkts_mask_out_slow = 0; - - status = rte_table_hash_lru_lookup_unoptimized(table, pkts, - pkts_mask_match_many, &pkts_mask_out_slow, entries, 1); + pkts_mask_match_many, &pkts_mask_out_slow, entries); pkts_mask_out |= pkts_mask_out_slow; } @@ -1089,14 +986,3 @@ struct rte_table_ops rte_table_hash_lru_ops = { .f_lookup = rte_table_hash_lru_lookup, .f_stats = rte_table_hash_lru_stats_read, }; - -struct rte_table_ops rte_table_hash_lru_dosig_ops = { - .f_create = rte_table_hash_lru_create, - .f_free = rte_table_hash_lru_free, - .f_add = rte_table_hash_lru_entry_add, - .f_delete = rte_table_hash_lru_entry_delete, - .f_add_bulk = NULL, - .f_delete_bulk = NULL, - .f_lookup = rte_table_hash_lru_lookup_dosig, - .f_stats = rte_table_hash_lru_stats_read, -}; diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map index e1eaa275..6237252b 100644 --- a/lib/librte_table/rte_table_version.map +++ b/lib/librte_table/rte_table_version.map @@ -1,19 +1,16 @@ -DPDK_2.0 { +DPDK_17.11 { global: rte_table_acl_ops; rte_table_array_ops; - rte_table_hash_ext_dosig_ops; + rte_table_hash_cuckoo_ops; rte_table_hash_ext_ops; - rte_table_hash_key8_ext_dosig_ops; - rte_table_hash_key8_ext_ops; - rte_table_hash_key8_lru_dosig_ops; - rte_table_hash_key8_lru_ops; rte_table_hash_key16_ext_ops; rte_table_hash_key16_lru_ops; rte_table_hash_key32_ext_ops; rte_table_hash_key32_lru_ops; - rte_table_hash_lru_dosig_ops; + rte_table_hash_key8_ext_ops; + rte_table_hash_key8_lru_ops; rte_table_hash_lru_ops; rte_table_lpm_ipv6_ops; rte_table_lpm_ops; @@ -21,18 +18,3 @@ DPDK_2.0 { local: *; }; - -DPDK_2.2 { - global: - - rte_table_hash_key16_ext_dosig_ops; - rte_table_hash_key16_lru_dosig_ops; - -}; - -DPDK_16.07 { - global: - - rte_table_hash_cuckoo_dosig_ops; - -} DPDK_2.0; diff --git a/lib/librte_timer/Makefile b/lib/librte_timer/Makefile index 03a15390..eb9c5624 100644 --- a/lib/librte_timer/Makefile +++ b/lib/librte_timer/Makefile @@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_timer.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lrte_eal EXPORT_MAP := rte_timer_version.map diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index 5ee08408..28decc39 100644 --- a/lib/librte_timer/rte_timer.c +++ b/lib/librte_timer/rte_timer.c @@ -43,7 +43,6 @@ #include <rte_cycles.h> #include <rte_per_lcore.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_lcore.h> @@ -432,7 +431,8 @@ rte_timer_reset(struct rte_timer *tim, uint64_t ticks, uint64_t period; if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) && - !rte_lcore_is_enabled(tim_lcore))) + !(rte_lcore_is_enabled(tim_lcore) || + rte_lcore_has_role(tim_lcore, ROLE_SERVICE)))) return -1; if (type == PERIODICAL) @@ -525,7 +525,7 @@ void rte_timer_manage(void) return; cur_time = rte_get_timer_cycles(); -#ifdef RTE_ARCH_X86_64 +#ifdef RTE_ARCH_64 /* on 64-bit the value cached in the pending_head.expired will be * updated atomically, so we can consult that for a quick check here * outside the lock */ diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 4a116fe3..be182798 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -45,10 +45,11 @@ LDLIBS += -lpthread ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) LDLIBS += -lnuma endif +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \ - virtio_net.c +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \ + vhost_user.c virtio_net.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 2ceacc9a..4c6fed41 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -222,6 +222,7 @@ fdset_event_dispatch(void *arg) int remove1, remove2; int need_shrink; struct fdset *pfdset = arg; + int val; if (pfdset == NULL) return NULL; @@ -239,7 +240,9 @@ fdset_event_dispatch(void *arg) numfds = pfdset->num; pthread_mutex_unlock(&pfdset->fd_mutex); - poll(pfdset->rwfds, numfds, 1000 /* millisecs */); + val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */); + if (val < 0) + continue; need_shrink = 0; for (i = 0; i < numfds; i++) { diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c new file mode 100644 index 00000000..b74cc6a7 --- /dev/null +++ b/lib/librte_vhost/iotlb.c @@ -0,0 +1,350 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2017 Red Hat, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef RTE_LIBRTE_VHOST_NUMA +#include <numaif.h> +#endif + +#include <rte_tailq.h> + +#include "iotlb.h" +#include "vhost.h" + +struct vhost_iotlb_entry { + TAILQ_ENTRY(vhost_iotlb_entry) next; + + uint64_t iova; + uint64_t uaddr; + uint64_t size; + uint8_t perm; +}; + +#define IOTLB_CACHE_SIZE 2048 + +static void +vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) +{ + struct vhost_iotlb_entry *node, *temp_node; + + rte_rwlock_write_lock(&vq->iotlb_pending_lock); + + TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { + TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); + rte_mempool_put(vq->iotlb_pool, node); + } + + rte_rwlock_write_unlock(&vq->iotlb_pending_lock); +} + +bool +vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, + uint8_t perm) +{ + struct vhost_iotlb_entry *node; + bool found = false; + + rte_rwlock_read_lock(&vq->iotlb_pending_lock); + + TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) { + if ((node->iova == iova) && (node->perm == perm)) { + found = true; + break; + } + } + + rte_rwlock_read_unlock(&vq->iotlb_pending_lock); + + return found; +} + +void +vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, + uint64_t iova, uint8_t perm) +{ + struct vhost_iotlb_entry *node; + int ret; + + ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); + if (ret) { + RTE_LOG(INFO, VHOST_CONFIG, + "IOTLB pool empty, clear pending misses\n"); + vhost_user_iotlb_pending_remove_all(vq); + ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n"); + return; + } + } + + node->iova = iova; + node->perm = perm; + + rte_rwlock_write_lock(&vq->iotlb_pending_lock); + + TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next); + + rte_rwlock_write_unlock(&vq->iotlb_pending_lock); +} + +static void +vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size, uint8_t perm) +{ + struct vhost_iotlb_entry *node, *temp_node; + + rte_rwlock_write_lock(&vq->iotlb_pending_lock); + + TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { + if (node->iova < iova) + continue; + if (node->iova >= iova + size) + continue; + if ((node->perm & perm) != node->perm) + continue; + TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); + rte_mempool_put(vq->iotlb_pool, node); + } + + rte_rwlock_write_unlock(&vq->iotlb_pending_lock); +} + +static void +vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) +{ + struct vhost_iotlb_entry *node, *temp_node; + + rte_rwlock_write_lock(&vq->iotlb_lock); + + TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { + TAILQ_REMOVE(&vq->iotlb_list, node, next); + rte_mempool_put(vq->iotlb_pool, node); + } + + vq->iotlb_cache_nr = 0; + + rte_rwlock_write_unlock(&vq->iotlb_lock); +} + +static void +vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq) +{ + struct vhost_iotlb_entry *node, *temp_node; + int entry_idx; + + rte_rwlock_write_lock(&vq->iotlb_lock); + + entry_idx = rte_rand() % vq->iotlb_cache_nr; + + TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { + if (!entry_idx) { + TAILQ_REMOVE(&vq->iotlb_list, node, next); + rte_mempool_put(vq->iotlb_pool, node); + vq->iotlb_cache_nr--; + break; + } + entry_idx--; + } + + rte_rwlock_write_unlock(&vq->iotlb_lock); +} + +void +vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova, + uint64_t uaddr, uint64_t size, uint8_t perm) +{ + struct vhost_iotlb_entry *node, *new_node; + int ret; + + ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); + if (ret) { + RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one entry\n"); + vhost_user_iotlb_cache_random_evict(vq); + ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n"); + return; + } + } + + new_node->iova = iova; + new_node->uaddr = uaddr; + new_node->size = size; + new_node->perm = perm; + + rte_rwlock_write_lock(&vq->iotlb_lock); + + TAILQ_FOREACH(node, &vq->iotlb_list, next) { + /* + * Entries must be invalidated before being updated. + * So if iova already in list, assume identical. + */ + if (node->iova == new_node->iova) { + rte_mempool_put(vq->iotlb_pool, new_node); + goto unlock; + } else if (node->iova > new_node->iova) { + TAILQ_INSERT_BEFORE(node, new_node, next); + vq->iotlb_cache_nr++; + goto unlock; + } + } + + TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next); + vq->iotlb_cache_nr++; + +unlock: + vhost_user_iotlb_pending_remove(vq, iova, size, perm); + + rte_rwlock_write_unlock(&vq->iotlb_lock); + +} + +void +vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size) +{ + struct vhost_iotlb_entry *node, *temp_node; + + if (unlikely(!size)) + return; + + rte_rwlock_write_lock(&vq->iotlb_lock); + + TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { + /* Sorted list */ + if (unlikely(iova + size < node->iova)) + break; + + if (iova < node->iova + node->size) { + TAILQ_REMOVE(&vq->iotlb_list, node, next); + rte_mempool_put(vq->iotlb_pool, node); + vq->iotlb_cache_nr--; + } + } + + rte_rwlock_write_unlock(&vq->iotlb_lock); +} + +uint64_t +vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, + uint64_t *size, uint8_t perm) +{ + struct vhost_iotlb_entry *node; + uint64_t offset, vva = 0, mapped = 0; + + if (unlikely(!*size)) + goto out; + + TAILQ_FOREACH(node, &vq->iotlb_list, next) { + /* List sorted by iova */ + if (unlikely(iova < node->iova)) + break; + + if (iova >= node->iova + node->size) + continue; + + if (unlikely((perm & node->perm) != perm)) { + vva = 0; + break; + } + + offset = iova - node->iova; + if (!vva) + vva = node->uaddr + offset; + + mapped += node->size - offset; + iova = node->iova + node->size; + + if (mapped >= *size) + break; + } + +out: + /* Only part of the requested chunk is mapped */ + if (unlikely(mapped < *size)) + *size = mapped; + + return vva; +} + +int +vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) +{ + char pool_name[RTE_MEMPOOL_NAMESIZE]; + struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; + int socket = 0; + + if (vq->iotlb_pool) { + /* + * The cache has already been initialized, + * just drop all cached and pending entries. + */ + vhost_user_iotlb_cache_remove_all(vq); + vhost_user_iotlb_pending_remove_all(vq); + } + +#ifdef RTE_LIBRTE_VHOST_NUMA + if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0) + socket = 0; +#endif + + rte_rwlock_init(&vq->iotlb_lock); + rte_rwlock_init(&vq->iotlb_pending_lock); + + TAILQ_INIT(&vq->iotlb_list); + TAILQ_INIT(&vq->iotlb_pending_list); + + snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d", + dev->vid, vq_index); + + /* If already created, free it and recreate */ + vq->iotlb_pool = rte_mempool_lookup(pool_name); + if (vq->iotlb_pool) + rte_mempool_free(vq->iotlb_pool); + + vq->iotlb_pool = rte_mempool_create(pool_name, + IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0, + 0, 0, NULL, NULL, NULL, socket, + MEMPOOL_F_NO_CACHE_ALIGN | + MEMPOOL_F_SP_PUT | + MEMPOOL_F_SC_GET); + if (!vq->iotlb_pool) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to create IOTLB cache pool (%s)\n", + pool_name); + return -1; + } + + vq->iotlb_cache_nr = 0; + + return 0; +} + diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h new file mode 100644 index 00000000..f1a050e4 --- /dev/null +++ b/lib/librte_vhost/iotlb.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2017 Red Hat, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _VHOST_IOTLB_H_ +#define _VHOST_IOTLB_H_ + +#include <stdbool.h> + +#include "vhost.h" + +static __rte_always_inline void +vhost_user_iotlb_rd_lock(struct vhost_virtqueue *vq) +{ + rte_rwlock_read_lock(&vq->iotlb_lock); +} + +static __rte_always_inline void +vhost_user_iotlb_rd_unlock(struct vhost_virtqueue *vq) +{ + rte_rwlock_read_unlock(&vq->iotlb_lock); +} + +static __rte_always_inline void +vhost_user_iotlb_wr_lock(struct vhost_virtqueue *vq) +{ + rte_rwlock_write_lock(&vq->iotlb_lock); +} + +static __rte_always_inline void +vhost_user_iotlb_wr_unlock(struct vhost_virtqueue *vq) +{ + rte_rwlock_write_unlock(&vq->iotlb_lock); +} + +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova, + uint64_t uaddr, uint64_t size, + uint8_t perm); +void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size); +uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, + uint64_t *size, uint8_t perm); +bool vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, + uint8_t perm); +void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova, + uint8_t perm); +int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); + +#endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 8c974eb1..f6536449 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -56,6 +56,7 @@ extern "C" { #define RTE_VHOST_USER_CLIENT (1ULL << 0) #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) +#define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3) /** * Information relating to memory regions including offsets to @@ -107,7 +108,10 @@ struct vhost_device_ops { */ int (*features_changed)(int vid, uint64_t features); - void *reserved[4]; /**< Reserved for future extension */ + int (*new_connection)(int vid); + void (*destroy_connection)(int vid); + + void *reserved[2]; /**< Reserved for future extension */ }; /** diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 41aa3f9b..422da002 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -68,6 +68,7 @@ struct vhost_user_socket { bool is_server; bool reconnect; bool dequeue_zero_copy; + bool iommu_support; /* * The "supported_features" indicates the feature bits the @@ -217,9 +218,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) vid = vhost_new_device(); if (vid == -1) { - close(fd); - free(conn); - return; + goto err; } size = strnlen(vsocket->path, PATH_MAX); @@ -230,24 +229,40 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); + if (vsocket->notify_ops->new_connection) { + ret = vsocket->notify_ops->new_connection(vid); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to add vhost user connection with fd %d\n", + fd); + goto err; + } + } + conn->connfd = fd; conn->vsocket = vsocket; conn->vid = vid; ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, NULL, conn); if (ret < 0) { - conn->connfd = -1; - free(conn); - close(fd); RTE_LOG(ERR, VHOST_CONFIG, "failed to add fd %d into vhost server fdset\n", fd); - return; + + if (vsocket->notify_ops->destroy_connection) + vsocket->notify_ops->destroy_connection(conn->vid); + + goto err; } pthread_mutex_lock(&vsocket->conn_mutex); TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); pthread_mutex_unlock(&vsocket->conn_mutex); + return; + +err: + free(conn); + close(fd); } /* call back when there is new vhost-user connection from client */ @@ -277,6 +292,9 @@ vhost_user_read_cb(int connfd, void *dat, int *remove) *remove = 1; vhost_destroy_device(conn->vid); + if (vsocket->notify_ops->destroy_connection) + vsocket->notify_ops->destroy_connection(conn->vid); + pthread_mutex_lock(&vsocket->conn_mutex); TAILQ_REMOVE(&vsocket->conn_list, conn, next); pthread_mutex_unlock(&vsocket->conn_mutex); @@ -652,6 +670,11 @@ rte_vhost_driver_register(const char *path, uint64_t flags) vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; + if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { + vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + } + if ((flags & RTE_VHOST_USER_CLIENT) != 0) { vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); if (vsocket->reconnect && reconn_tid == 0) { diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 0b6aa1cc..4f8b73a0 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -47,11 +47,49 @@ #include <rte_memory.h> #include <rte_malloc.h> #include <rte_vhost.h> +#include <rte_rwlock.h> +#include "iotlb.h" #include "vhost.h" +#include "vhost_user.h" struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; +/* Called with iotlb_lock read-locked */ +uint64_t +__vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size, uint8_t perm) +{ + uint64_t vva, tmp_size; + + if (unlikely(!size)) + return 0; + + tmp_size = size; + + vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm); + if (tmp_size == size) + return vva; + + if (!vhost_user_iotlb_pending_miss(vq, iova + tmp_size, perm)) { + /* + * iotlb_lock is read-locked for a full burst, + * but it only protects the iotlb cache. + * In case of IOTLB miss, we might block on the socket, + * which could cause a deadlock with QEMU if an IOTLB update + * is being handled. We can safely unlock here to avoid it. + */ + vhost_user_iotlb_rd_unlock(vq); + + vhost_user_iotlb_pending_insert(vq, iova + tmp_size, perm); + vhost_user_iotlb_miss(dev, iova + tmp_size, perm); + + vhost_user_iotlb_rd_lock(vq); + } + + return 0; +} + struct virtio_net * get_device(int vid) { @@ -102,40 +140,108 @@ free_device(struct virtio_net *dev) vq = dev->virtqueue[i]; rte_free(vq->shadow_used_ring); - + rte_free(vq->batch_copy_elems); + rte_mempool_free(vq->iotlb_pool); rte_free(vq); } rte_free(dev); } +int +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + uint64_t size; + + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + goto out; + + size = sizeof(struct vring_desc) * vq->size; + vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq, + vq->ring_addrs.desc_user_addr, + size, VHOST_ACCESS_RW); + if (!vq->desc) + return -1; + + size = sizeof(struct vring_avail); + size += sizeof(uint16_t) * vq->size; + vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq, + vq->ring_addrs.avail_user_addr, + size, VHOST_ACCESS_RW); + if (!vq->avail) + return -1; + + size = sizeof(struct vring_used); + size += sizeof(struct vring_used_elem) * vq->size; + vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq, + vq->ring_addrs.used_user_addr, + size, VHOST_ACCESS_RW); + if (!vq->used) + return -1; + +out: + vq->access_ok = 1; + + return 0; +} + +void +vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_wr_lock(vq); + + vq->access_ok = 0; + vq->desc = NULL; + vq->avail = NULL; + vq->used = NULL; + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_wr_unlock(vq); +} + static void -init_vring_queue(struct vhost_virtqueue *vq) +init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) { + struct vhost_virtqueue *vq; + + if (vring_idx >= VHOST_MAX_VRING) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed not init vring, out of bound (%d)\n", + vring_idx); + return; + } + + vq = dev->virtqueue[vring_idx]; + memset(vq, 0, sizeof(struct vhost_virtqueue)); vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + vhost_user_iotlb_init(dev, vring_idx); /* Backends are set to -1 indicating an inactive device. */ vq->backend = -1; - /* - * always set the vq to enabled; this is to keep compatibility - * with the old QEMU, whereas there is no SET_VRING_ENABLE message. - */ - vq->enabled = 1; - TAILQ_INIT(&vq->zmbuf_list); } static void -reset_vring_queue(struct vhost_virtqueue *vq) +reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx) { + struct vhost_virtqueue *vq; int callfd; + if (vring_idx >= VHOST_MAX_VRING) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed not init vring, out of bound (%d)\n", + vring_idx); + return; + } + + vq = dev->virtqueue[vring_idx]; callfd = vq->callfd; - init_vring_queue(vq); + init_vring_queue(dev, vring_idx); vq->callfd = callfd; } @@ -152,7 +258,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) } dev->virtqueue[vring_idx] = vq; - init_vring_queue(vq); + init_vring_queue(dev, vring_idx); dev->nr_vring += 1; @@ -174,7 +280,7 @@ reset_device(struct virtio_net *dev) dev->flags = 0; for (i = 0; i < dev->nr_vring; i++) - reset_vring_queue(dev->virtqueue[i]); + reset_vring_queue(dev, i); } /* @@ -207,6 +313,7 @@ vhost_new_device(void) vhost_devices[i] = dev; dev->vid = i; + dev->slave_req_fd = -1; return i; } diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 6fe72aeb..1cc81c17 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -45,6 +45,7 @@ #include <rte_log.h> #include <rte_ether.h> +#include <rte_rwlock.h> #include "rte_vhost.h" @@ -81,6 +82,16 @@ struct zcopy_mbuf { }; TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf); +/* + * Structure contains the info for each batched memory copy. + */ +struct batch_copy_elem { + void *dst; + void *src; + uint32_t len; + uint64_t log_addr; +}; + /** * Structure contains variables relevant to RX/TX virtqueues. */ @@ -102,6 +113,7 @@ struct vhost_virtqueue { /* Currently unused as polling mode is enabled */ int kickfd; int enabled; + int access_ok; /* Physical address of used ring, for logging */ uint64_t log_guest_addr; @@ -114,6 +126,17 @@ struct vhost_virtqueue { struct vring_used_elem *shadow_used_ring; uint16_t shadow_used_idx; + struct vhost_vring_addr ring_addrs; + + struct batch_copy_elem *batch_copy_elems; + uint16_t batch_copy_nb_elems; + + rte_rwlock_t iotlb_lock; + rte_rwlock_t iotlb_pending_lock; + struct rte_mempool *iotlb_pool; + TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list; + int iotlb_cache_nr; + TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list; } __rte_cache_aligned; /* Old kernels have no such macros defined */ @@ -132,6 +155,37 @@ struct vhost_virtqueue { #define VIRTIO_NET_F_MTU 3 #endif +/* Declare IOMMU related bits for older kernels */ +#ifndef VIRTIO_F_IOMMU_PLATFORM + +#define VIRTIO_F_IOMMU_PLATFORM 33 + +struct vhost_iotlb_msg { + __u64 iova; + __u64 size; + __u64 uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + __u8 perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 + __u8 type; +}; + +#define VHOST_IOTLB_MSG 0x1 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; +#endif + /* * Define virtio 1.0 for older kernels */ @@ -157,7 +211,8 @@ struct vhost_virtqueue { (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ - (1ULL << VIRTIO_NET_F_MTU)) + (1ULL << VIRTIO_NET_F_MTU) | \ + (1ULL << VIRTIO_F_IOMMU_PLATFORM)) struct guest_page { @@ -196,6 +251,8 @@ struct virtio_net { uint32_t nr_guest_pages; uint32_t max_guest_pages; struct guest_page *guest_pages; + + int slave_req_fd; } __rte_cache_aligned; @@ -281,7 +338,7 @@ extern uint64_t VHOST_FEATURES; extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; /* Convert guest physical address to host physical address */ -static __rte_always_inline phys_addr_t +static __rte_always_inline rte_iova_t gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) { uint32_t i; @@ -321,4 +378,19 @@ struct vhost_device_ops const *vhost_driver_callback_get(const char *path); */ void vhost_backend_cleanup(struct virtio_net *dev); +uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size, uint8_t perm); +int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq); +void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq); + +static __rte_always_inline uint64_t +vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size, uint8_t perm) +{ + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + return rte_vhost_gpa_to_vva(dev->mem, iova); + + return __vhost_iova_to_vva(dev, vq, iova, size, perm); +} + #endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index ad2e8d38..f4c7ce46 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -48,6 +48,7 @@ #include <rte_malloc.h> #include <rte_log.h> +#include "iotlb.h" #include "vhost.h" #include "vhost_user.h" @@ -76,6 +77,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", + [VHOST_USER_SET_SLAVE_REQ_FD] = "VHOST_USER_SET_SLAVE_REQ_FD", + [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG", }; static uint64_t @@ -122,6 +125,11 @@ vhost_backend_cleanup(struct virtio_net *dev) munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); dev->log_addr = 0; } + + if (dev->slave_req_fd >= 0) { + close(dev->slave_req_fd); + dev->slave_req_fd = -1; + } } /* @@ -230,6 +238,15 @@ vhost_user_set_vring_num(struct virtio_net *dev, return -1; } + vq->batch_copy_elems = rte_malloc(NULL, + vq->size * sizeof(struct batch_copy_elem), + RTE_CACHE_LINE_SIZE); + if (!vq->batch_copy_elems) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to allocate memory for batching copy.\n"); + return -1; + } + return 0; } @@ -297,6 +314,9 @@ out: dev->virtqueue[index] = vq; vhost_devices[dev->vid] = dev; + if (old_vq != vq) + vhost_user_iotlb_init(dev, index); + return dev; } #else @@ -307,10 +327,7 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused) } #endif -/* - * Converts QEMU virtual address to Vhost virtual address. This function is - * used to convert the ring addresses to our address space. - */ +/* Converts QEMU virtual address to Vhost virtual address. */ static uint64_t qva_to_vva(struct virtio_net *dev, uint64_t qva) { @@ -331,50 +348,69 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva) return 0; } + /* - * The virtio device sends us the desc, used and avail ring addresses. - * This function then converts these to our address space. + * Converts ring address to Vhost virtual address. + * If IOMMU is enabled, the ring address is a guest IO virtual address, + * else it is a QEMU virtual address. */ -static int -vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) +static uint64_t +ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t ra, uint64_t size) { - struct vhost_virtqueue *vq; + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { + uint64_t vva; - if (dev->mem == NULL) - return -1; + vva = vhost_user_iotlb_cache_find(vq, ra, + &size, VHOST_ACCESS_RW); + if (!vva) + vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW); - /* addr->index refers to the queue index. The txq 1, rxq is 0. */ - vq = dev->virtqueue[msg->payload.addr.index]; + return vva; + } + + return qva_to_vva(dev, ra); +} + +static struct virtio_net * +translate_ring_addresses(struct virtio_net *dev, int vq_index) +{ + struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; + struct vhost_vring_addr *addr = &vq->ring_addrs; /* The addresses are converted from QEMU virtual to Vhost virtual. */ - vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev, - msg->payload.addr.desc_user_addr); + if (vq->desc && vq->avail && vq->used) + return dev; + + vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev, + vq, addr->desc_user_addr, sizeof(struct vring_desc)); if (vq->desc == 0) { - RTE_LOG(ERR, VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%d) failed to find desc ring address.\n", dev->vid); - return -1; + return dev; } - dev = numa_realloc(dev, msg->payload.addr.index); - vq = dev->virtqueue[msg->payload.addr.index]; + dev = numa_realloc(dev, vq_index); + vq = dev->virtqueue[vq_index]; + addr = &vq->ring_addrs; - vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, - msg->payload.addr.avail_user_addr); + vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, + vq, addr->avail_user_addr, sizeof(struct vring_avail)); if (vq->avail == 0) { - RTE_LOG(ERR, VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%d) failed to find avail ring address.\n", dev->vid); - return -1; + return dev; } - vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev, - msg->payload.addr.used_user_addr); + vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, + vq, addr->used_user_addr, sizeof(struct vring_used)); if (vq->used == 0) { - RTE_LOG(ERR, VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%d) failed to find used ring address.\n", dev->vid); - return -1; + return dev; } if (vq->last_used_idx != vq->used->idx) { @@ -386,7 +422,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) vq->last_avail_idx = vq->used->idx; } - vq->log_guest_addr = msg->payload.addr.log_guest_addr; + vq->log_guest_addr = addr->log_guest_addr; LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", dev->vid, vq->desc); @@ -397,6 +433,43 @@ vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", dev->vid, vq->log_guest_addr); + return dev; +} + +/* + * The virtio device sends us the desc, used and avail ring addresses. + * This function then converts these to our address space. + */ +static int +vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg) +{ + struct vhost_virtqueue *vq; + struct vhost_vring_addr *addr = &msg->payload.addr; + struct virtio_net *dev = *pdev; + + if (dev->mem == NULL) + return -1; + + /* addr->index refers to the queue index. The txq 1, rxq is 0. */ + vq = dev->virtqueue[msg->payload.addr.index]; + + /* + * Rings addresses should not be interpreted as long as the ring is not + * started and enabled + */ + memcpy(&vq->ring_addrs, addr, sizeof(*addr)); + + vring_invalidate(dev, vq); + + if (vq->enabled && (dev->features & + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) { + dev = translate_ring_addresses(dev, msg->payload.state.index); + if (!dev) + return -1; + + *pdev = dev; + } + return 0; } @@ -453,7 +526,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, uint64_t host_phys_addr; uint64_t size; - host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr); + host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr); size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); @@ -464,7 +537,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, while (reg_size > 0) { size = RTE_MIN(reg_size, page_size); - host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t) + host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) host_user_addr); add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); @@ -620,7 +693,7 @@ err_mmap: static int vq_is_ready(struct vhost_virtqueue *vq) { - return vq && vq->desc && + return vq && vq->desc && vq->avail && vq->used && vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; } @@ -668,10 +741,11 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg) } static void -vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg) +vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg) { struct vhost_vring_file file; struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) @@ -681,7 +755,23 @@ vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg) RTE_LOG(INFO, VHOST_CONFIG, "vring kick idx:%d file:%d\n", file.index, file.fd); + /* Interpret ring addresses only when ring is started. */ + dev = translate_ring_addresses(dev, file.index); + if (!dev) + return; + + *pdev = dev; + vq = dev->virtqueue[file.index]; + + /* + * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated, + * the ring starts already enabled. Otherwise, it is enabled via + * the SET_VRING_ENABLE message. + */ + if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) + vq->enabled = 1; + if (vq->kickfd >= 0) close(vq->kickfd); vq->kickfd = file.fd; @@ -741,6 +831,9 @@ vhost_user_get_vring_base(struct virtio_net *dev, rte_free(vq->shadow_used_ring); vq->shadow_used_ring = NULL; + rte_free(vq->batch_copy_elems); + vq->batch_copy_elems = NULL; + return 0; } @@ -768,6 +861,27 @@ vhost_user_set_vring_enable(struct virtio_net *dev, } static void +vhost_user_get_protocol_features(struct virtio_net *dev, + struct VhostUserMsg *msg) +{ + uint64_t features, protocol_features = VHOST_USER_PROTOCOL_FEATURES; + + rte_vhost_driver_get_features(dev->ifname, &features); + + /* + * REPLY_ACK protocol feature is only mandatory for now + * for IOMMU feature. If IOMMU is explicitly disabled by the + * application, disable also REPLY_ACK feature for older buggy + * Qemu versions (from v2.7.0 to v2.9.0). + */ + if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK); + + msg->payload.u64 = protocol_features; + msg->size = sizeof(msg->payload.u64); +} + +static void vhost_user_set_protocol_features(struct virtio_net *dev, uint64_t protocol_features) { @@ -874,6 +988,116 @@ vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg) return 0; } +static int +vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg) +{ + int fd = msg->fds[0]; + + if (fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid file descriptor for slave channel (%d)\n", + fd); + return -1; + } + + dev->slave_req_fd = fd; + + return 0; +} + +static int +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg) +{ + struct vhost_vring_addr *ra; + uint64_t start, end; + + start = imsg->iova; + end = start + imsg->size; + + ra = &vq->ring_addrs; + if (ra->desc_user_addr >= start && ra->desc_user_addr < end) + return 1; + if (ra->avail_user_addr >= start && ra->avail_user_addr < end) + return 1; + if (ra->used_user_addr >= start && ra->used_user_addr < end) + return 1; + + return 0; +} + +static int +is_vring_iotlb_invalidate(struct vhost_virtqueue *vq, + struct vhost_iotlb_msg *imsg) +{ + uint64_t istart, iend, vstart, vend; + + istart = imsg->iova; + iend = istart + imsg->size - 1; + + vstart = (uintptr_t)vq->desc; + vend = vstart + sizeof(struct vring_desc) * vq->size - 1; + if (vstart <= iend && istart <= vend) + return 1; + + vstart = (uintptr_t)vq->avail; + vend = vstart + sizeof(struct vring_avail); + vend += sizeof(uint16_t) * vq->size - 1; + if (vstart <= iend && istart <= vend) + return 1; + + vstart = (uintptr_t)vq->used; + vend = vstart + sizeof(struct vring_used); + vend += sizeof(struct vring_used_elem) * vq->size - 1; + if (vstart <= iend && istart <= vend) + return 1; + + return 0; +} + +static int +vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg) +{ + struct virtio_net *dev = *pdev; + struct vhost_iotlb_msg *imsg = &msg->payload.iotlb; + uint16_t i; + uint64_t vva; + + switch (imsg->type) { + case VHOST_IOTLB_UPDATE: + vva = qva_to_vva(dev, imsg->uaddr); + if (!vva) + return -1; + + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + vhost_user_iotlb_cache_insert(vq, imsg->iova, vva, + imsg->size, imsg->perm); + + if (is_vring_iotlb_update(vq, imsg)) + *pdev = dev = translate_ring_addresses(dev, i); + } + break; + case VHOST_IOTLB_INVALIDATE: + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + vhost_user_iotlb_cache_remove(vq, imsg->iova, + imsg->size); + + if (is_vring_iotlb_invalidate(vq, imsg)) + vring_invalidate(dev, vq); + } + break; + default: + RTE_LOG(ERR, VHOST_CONFIG, "Invalid IOTLB message type (%d)\n", + imsg->type); + return -1; + } + + return 0; +} + /* return bytes# of read on success or negative val on failure. */ static int read_vhost_message(int sockfd, struct VhostUserMsg *msg) @@ -907,8 +1131,16 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg) static int send_vhost_message(int sockfd, struct VhostUserMsg *msg) { - int ret; + if (!msg) + return 0; + + return send_fd_message(sockfd, (char *)msg, + VHOST_USER_HDR_SIZE + msg->size, NULL, 0); +} +static int +send_vhost_reply(int sockfd, struct VhostUserMsg *msg) +{ if (!msg) return 0; @@ -917,10 +1149,7 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg) msg->flags |= VHOST_USER_VERSION; msg->flags |= VHOST_USER_REPLY_MASK; - ret = send_fd_message(sockfd, (char *)msg, - VHOST_USER_HDR_SIZE + msg->size, NULL, 0); - - return ret; + return send_vhost_message(sockfd, msg); } /* @@ -931,7 +1160,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg) { uint16_t vring_idx; - switch (msg->request) { + switch (msg->request.master) { case VHOST_USER_SET_VRING_KICK: case VHOST_USER_SET_VRING_CALL: case VHOST_USER_SET_VRING_ERR: @@ -983,7 +1212,7 @@ vhost_user_msg_handler(int vid, int fd) } ret = read_vhost_message(fd, &msg); - if (ret <= 0 || msg.request >= VHOST_USER_MAX) { + if (ret <= 0 || msg.request.master >= VHOST_USER_MAX) { if (ret < 0) RTE_LOG(ERR, VHOST_CONFIG, "vhost read message failed\n"); @@ -998,8 +1227,12 @@ vhost_user_msg_handler(int vid, int fd) } ret = 0; - RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", - vhost_message_str[msg.request]); + if (msg.request.master != VHOST_USER_IOTLB_MSG) + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", + vhost_message_str[msg.request.master]); + else + RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n", + vhost_message_str[msg.request.master]); ret = vhost_user_check_and_alloc_queue_pair(dev, &msg); if (ret < 0) { @@ -1008,20 +1241,19 @@ vhost_user_msg_handler(int vid, int fd) return -1; } - switch (msg.request) { + switch (msg.request.master) { case VHOST_USER_GET_FEATURES: msg.payload.u64 = vhost_user_get_features(dev); msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); + send_vhost_reply(fd, &msg); break; case VHOST_USER_SET_FEATURES: vhost_user_set_features(dev, msg.payload.u64); break; case VHOST_USER_GET_PROTOCOL_FEATURES: - msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); + vhost_user_get_protocol_features(dev, &msg); + send_vhost_reply(fd, &msg); break; case VHOST_USER_SET_PROTOCOL_FEATURES: vhost_user_set_protocol_features(dev, msg.payload.u64); @@ -1043,7 +1275,7 @@ vhost_user_msg_handler(int vid, int fd) /* it needs a reply */ msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); + send_vhost_reply(fd, &msg); break; case VHOST_USER_SET_LOG_FD: close(msg.fds[0]); @@ -1054,7 +1286,7 @@ vhost_user_msg_handler(int vid, int fd) vhost_user_set_vring_num(dev, &msg); break; case VHOST_USER_SET_VRING_ADDR: - vhost_user_set_vring_addr(dev, &msg); + vhost_user_set_vring_addr(&dev, &msg); break; case VHOST_USER_SET_VRING_BASE: vhost_user_set_vring_base(dev, &msg); @@ -1063,11 +1295,11 @@ vhost_user_msg_handler(int vid, int fd) case VHOST_USER_GET_VRING_BASE: vhost_user_get_vring_base(dev, &msg); msg.size = sizeof(msg.payload.state); - send_vhost_message(fd, &msg); + send_vhost_reply(fd, &msg); break; case VHOST_USER_SET_VRING_KICK: - vhost_user_set_vring_kick(dev, &msg); + vhost_user_set_vring_kick(&dev, &msg); break; case VHOST_USER_SET_VRING_CALL: vhost_user_set_vring_call(dev, &msg); @@ -1082,7 +1314,7 @@ vhost_user_msg_handler(int vid, int fd) case VHOST_USER_GET_QUEUE_NUM: msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); + send_vhost_reply(fd, &msg); break; case VHOST_USER_SET_VRING_ENABLE: @@ -1096,6 +1328,14 @@ vhost_user_msg_handler(int vid, int fd) ret = vhost_user_net_set_mtu(dev, &msg); break; + case VHOST_USER_SET_SLAVE_REQ_FD: + ret = vhost_user_set_req_fd(dev, &msg); + break; + + case VHOST_USER_IOTLB_MSG: + ret = vhost_user_iotlb_msg(&dev, &msg); + break; + default: ret = -1; break; @@ -1105,7 +1345,7 @@ vhost_user_msg_handler(int vid, int fd) if (msg.flags & VHOST_USER_NEED_REPLY) { msg.payload.u64 = !!ret; msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); + send_vhost_reply(fd, &msg); } if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) { @@ -1124,3 +1364,29 @@ vhost_user_msg_handler(int vid, int fd) return 0; } + +int +vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) +{ + int ret; + struct VhostUserMsg msg = { + .request.slave = VHOST_USER_SLAVE_IOTLB_MSG, + .flags = VHOST_USER_VERSION, + .size = sizeof(msg.payload.iotlb), + .payload.iotlb = { + .iova = iova, + .perm = perm, + .type = VHOST_IOTLB_MISS, + }, + }; + + ret = send_vhost_message(dev->slave_req_fd, &msg); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to send IOTLB miss message (%d)\n", + ret); + return ret; + } + + return 0; +} diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 35ebd719..76d9fe2f 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -48,16 +48,14 @@ #define VHOST_USER_PROTOCOL_F_RARP 2 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_NET_MTU 4 +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 -/* - * disable REPLY_ACK feature to workaround the buggy QEMU implementation. - * Proved buggy QEMU includes v2.7 - v2.9. - */ #define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \ - (0ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ - (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU)) + (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ + (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \ + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -81,9 +79,17 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, VHOST_USER_MAX } VhostUserRequest; +typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_NONE = 0, + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_MAX +} VhostUserSlaveRequest; + typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; uint64_t memory_size; @@ -103,7 +109,10 @@ typedef struct VhostUserLog { } VhostUserLog; typedef struct VhostUserMsg { - VhostUserRequest request; + union { + VhostUserRequest master; + VhostUserSlaveRequest slave; + } request; #define VHOST_USER_VERSION_MASK 0x3 #define VHOST_USER_REPLY_MASK (0x1 << 2) @@ -118,6 +127,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + struct vhost_iotlb_msg iotlb; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; } __attribute((packed)) VhostUserMsg; @@ -130,6 +140,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); +int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num); diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index a5f0eeba..6fee16e5 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -45,10 +45,13 @@ #include <rte_sctp.h> #include <rte_arp.h> +#include "iotlb.h" #include "vhost.h" #define MAX_PKT_BURST 32 +#define MAX_BATCH_LEN 256 + static bool is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) { @@ -105,6 +108,31 @@ update_shadow_used_ring(struct vhost_virtqueue *vq, vq->shadow_used_ring[i].len = len; } +static inline void +do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct batch_copy_elem *elem = vq->batch_copy_elems; + uint16_t count = vq->batch_copy_nb_elems; + int i; + + for (i = 0; i < count; i++) { + rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); + vhost_log_write(dev, elem[i].log_addr, elem[i].len); + PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); + } +} + +static inline void +do_data_copy_dequeue(struct vhost_virtqueue *vq) +{ + struct batch_copy_elem *elem = vq->batch_copy_elems; + uint16_t count = vq->batch_copy_nb_elems; + int i; + + for (i = 0; i < count; i++) + rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); +} + /* avoid write operation when necessary, to lessen cache issues */ #define ASSIGN_UNLESS_EQUAL(var, val) do { \ if ((var) != (val)) \ @@ -168,8 +196,9 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } static __rte_always_inline int -copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, - struct rte_mbuf *m, uint16_t desc_idx, uint32_t size) +copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct vring_desc *descs, struct rte_mbuf *m, + uint16_t desc_idx, uint32_t size) { uint32_t desc_avail, desc_offset; uint32_t mbuf_avail, mbuf_offset; @@ -178,16 +207,22 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, uint64_t desc_addr; /* A counter to avoid desc dead loop chain */ uint16_t nr_desc = 1; + struct batch_copy_elem *batch_copy = vq->batch_copy_elems; + uint16_t copy_nb = vq->batch_copy_nb_elems; + int error = 0; desc = &descs[desc_idx]; - desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, + desc->len, VHOST_ACCESS_RW); /* * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid * performance issue with some versions of gcc (4.8.4 and 5.3.0) which * otherwise stores offset on the stack instead of in a register. */ - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) - return -1; + if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) { + error = -1; + goto out; + } rte_prefetch0((void *)(uintptr_t)desc_addr); @@ -213,27 +248,45 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, if (desc_avail == 0) { if ((desc->flags & VRING_DESC_F_NEXT) == 0) { /* Room in vring buffer is not enough */ - return -1; + error = -1; + goto out; + } + if (unlikely(desc->next >= size || ++nr_desc > size)) { + error = -1; + goto out; } - if (unlikely(desc->next >= size || ++nr_desc > size)) - return -1; desc = &descs[desc->next]; - desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); - if (unlikely(!desc_addr)) - return -1; + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, + desc->len, + VHOST_ACCESS_RW); + if (unlikely(!desc_addr)) { + error = -1; + goto out; + } desc_offset = 0; desc_avail = desc->len; } cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, desc->addr + desc_offset, cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); + if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) { + rte_memcpy((void *)((uintptr_t)(desc_addr + + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); + vhost_log_write(dev, desc->addr + desc_offset, cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + } else { + batch_copy[copy_nb].dst = + (void *)((uintptr_t)(desc_addr + desc_offset)); + batch_copy[copy_nb].src = + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); + batch_copy[copy_nb].log_addr = desc->addr + desc_offset; + batch_copy[copy_nb].len = cpy_len; + copy_nb++; + } mbuf_avail -= cpy_len; mbuf_offset += cpy_len; @@ -241,7 +294,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, desc_offset += cpy_len; } - return 0; +out: + vq->batch_copy_nb_elems = copy_nb; + + return error; } /** @@ -273,17 +329,29 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (unlikely(vq->enabled == 0)) return 0; + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) { + if (unlikely(vring_translate(dev, vq) < 0)) { + count = 0; + goto out; + } + } + avail_idx = *((volatile uint16_t *)&vq->avail->idx); start_idx = vq->last_used_idx; free_entries = avail_idx - start_idx; count = RTE_MIN(count, free_entries); count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); if (count == 0) - return 0; + goto out; LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", dev->vid, start_idx, start_idx + count); + vq->batch_copy_nb_elems = 0; + /* Retrieve all of the desc indexes first to avoid caching issues. */ rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]); for (i = 0; i < count; i++) { @@ -304,8 +372,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) { descs = (struct vring_desc *)(uintptr_t) - rte_vhost_gpa_to_vva(dev->mem, - vq->desc[desc_idx].addr); + vhost_iova_to_vva(dev, + vq, vq->desc[desc_idx].addr, + vq->desc[desc_idx].len, + VHOST_ACCESS_RO); if (unlikely(!descs)) { count = i; break; @@ -318,19 +388,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, sz = vq->size; } - err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz); + err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz); if (unlikely(err)) { - used_idx = (start_idx + i) & (vq->size - 1); - vq->used->ring[used_idx].len = dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); + count = i; + break; } if (i + 1 < count) rte_prefetch0(&vq->desc[desc_indexes[i+1]]); } + do_data_copy_enqueue(dev, vq); + rte_smp_wmb(); *(volatile uint16_t *)&vq->used->idx += count; @@ -346,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) && (vq->callfd >= 0)) eventfd_write(vq->callfd, (eventfd_t)1); +out: + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_unlock(vq); + return count; } @@ -364,7 +437,9 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { descs = (struct vring_desc *)(uintptr_t) - rte_vhost_gpa_to_vva(dev->mem, vq->desc[idx].addr); + vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, + vq->desc[idx].len, + VHOST_ACCESS_RO); if (unlikely(!descs)) return -1; @@ -439,8 +514,9 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, } static __rte_always_inline int -copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, - struct buf_vector *buf_vec, uint16_t num_buffers) +copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf *m, struct buf_vector *buf_vec, + uint16_t num_buffers) { uint32_t vec_idx = 0; uint64_t desc_addr; @@ -449,13 +525,22 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, uint32_t cpy_len; uint64_t hdr_addr, hdr_phys_addr; struct rte_mbuf *hdr_mbuf; + struct batch_copy_elem *batch_copy = vq->batch_copy_elems; + uint16_t copy_nb = vq->batch_copy_nb_elems; + int error = 0; - if (unlikely(m == NULL)) - return -1; + if (unlikely(m == NULL)) { + error = -1; + goto out; + } - desc_addr = rte_vhost_gpa_to_vva(dev->mem, buf_vec[vec_idx].buf_addr); - if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) - return -1; + desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr, + buf_vec[vec_idx].buf_len, + VHOST_ACCESS_RW); + if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) { + error = -1; + goto out; + } hdr_mbuf = m; hdr_addr = desc_addr; @@ -474,10 +559,15 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, /* done with current desc buf, get the next one */ if (desc_avail == 0) { vec_idx++; - desc_addr = rte_vhost_gpa_to_vva(dev->mem, - buf_vec[vec_idx].buf_addr); - if (unlikely(!desc_addr)) - return -1; + desc_addr = + vhost_iova_to_vva(dev, vq, + buf_vec[vec_idx].buf_addr, + buf_vec[vec_idx].buf_len, + VHOST_ACCESS_RW); + if (unlikely(!desc_addr)) { + error = -1; + goto out; + } /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)desc_addr); @@ -509,13 +599,27 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, } cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset, - cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); + + if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) { + rte_memcpy((void *)((uintptr_t)(desc_addr + + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); + vhost_log_write(dev, + buf_vec[vec_idx].buf_addr + desc_offset, + cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + } else { + batch_copy[copy_nb].dst = + (void *)((uintptr_t)(desc_addr + desc_offset)); + batch_copy[copy_nb].src = + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); + batch_copy[copy_nb].log_addr = + buf_vec[vec_idx].buf_addr + desc_offset; + batch_copy[copy_nb].len = cpy_len; + copy_nb++; + } mbuf_avail -= cpy_len; mbuf_offset += cpy_len; @@ -523,7 +627,10 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, desc_offset += cpy_len; } - return 0; +out: + vq->batch_copy_nb_elems = copy_nb; + + return error; } static __rte_always_inline uint32_t @@ -547,9 +654,18 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, if (unlikely(vq->enabled == 0)) return 0; + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) + if (unlikely(vring_translate(dev, vq) < 0)) + goto out; + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) - return 0; + goto out; + + vq->batch_copy_nb_elems = 0; rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); @@ -572,7 +688,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, dev->vid, vq->last_avail_idx, vq->last_avail_idx + num_buffers); - if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx], + if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx], buf_vec, num_buffers) < 0) { vq->shadow_used_idx -= num_buffers; break; @@ -581,6 +697,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, vq->last_avail_idx += num_buffers; } + do_data_copy_enqueue(dev, vq); + if (likely(vq->shadow_used_idx)) { flush_shadow_used_ring(dev, vq); @@ -593,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, eventfd_write(vq->callfd, (eventfd_t)1); } +out: + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_unlock(vq); + return pkt_idx; } @@ -766,8 +888,9 @@ put_zmbuf(struct zcopy_mbuf *zmbuf) } static __rte_always_inline int -copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, - uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx, +copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct vring_desc *descs, uint16_t max_desc, + struct rte_mbuf *m, uint16_t desc_idx, struct rte_mempool *mbuf_pool) { struct vring_desc *desc; @@ -779,15 +902,25 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, struct virtio_net_hdr *hdr = NULL; /* A counter to avoid desc dead loop chain */ uint32_t nr_desc = 1; + struct batch_copy_elem *batch_copy = vq->batch_copy_elems; + uint16_t copy_nb = vq->batch_copy_nb_elems; + int error = 0; desc = &descs[desc_idx]; if (unlikely((desc->len < dev->vhost_hlen)) || - (desc->flags & VRING_DESC_F_INDIRECT)) - return -1; + (desc->flags & VRING_DESC_F_INDIRECT)) { + error = -1; + goto out; + } - desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); - if (unlikely(!desc_addr)) - return -1; + desc_addr = vhost_iova_to_vva(dev, + vq, desc->addr, + desc->len, + VHOST_ACCESS_RO); + if (unlikely(!desc_addr)) { + error = -1; + goto out; + } if (virtio_net_with_host_offload(dev)) { hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); @@ -802,12 +935,19 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, if (likely((desc->len == dev->vhost_hlen) && (desc->flags & VRING_DESC_F_NEXT) != 0)) { desc = &descs[desc->next]; - if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) - return -1; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { + error = -1; + goto out; + } - desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); - if (unlikely(!desc_addr)) - return -1; + desc_addr = vhost_iova_to_vva(dev, + vq, desc->addr, + desc->len, + VHOST_ACCESS_RO); + if (unlikely(!desc_addr)) { + error = -1; + goto out; + } desc_offset = 0; desc_avail = desc->len; @@ -838,7 +978,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, cur->data_len = cpy_len; cur->data_off = 0; cur->buf_addr = (void *)(uintptr_t)desc_addr; - cur->buf_physaddr = hpa; + cur->buf_iova = hpa; /* * In zero copy mode, one mbuf can only reference data @@ -846,10 +986,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, */ mbuf_avail = cpy_len; } else { - rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, - mbuf_offset), - (void *)((uintptr_t)(desc_addr + desc_offset)), - cpy_len); + if (likely(cpy_len > MAX_BATCH_LEN || + copy_nb >= vq->size || + (hdr && cur == m))) { + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), + (void *)((uintptr_t)(desc_addr + + desc_offset)), + cpy_len); + } else { + batch_copy[copy_nb].dst = + rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset); + batch_copy[copy_nb].src = + (void *)((uintptr_t)(desc_addr + + desc_offset)); + batch_copy[copy_nb].len = cpy_len; + copy_nb++; + } } mbuf_avail -= cpy_len; @@ -863,15 +1017,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, break; if (unlikely(desc->next >= max_desc || - ++nr_desc > max_desc)) - return -1; + ++nr_desc > max_desc)) { + error = -1; + goto out; + } desc = &descs[desc->next]; - if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) - return -1; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { + error = -1; + goto out; + } - desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); - if (unlikely(!desc_addr)) - return -1; + desc_addr = vhost_iova_to_vva(dev, + vq, desc->addr, + desc->len, + VHOST_ACCESS_RO); + if (unlikely(!desc_addr)) { + error = -1; + goto out; + } rte_prefetch0((void *)(uintptr_t)desc_addr); @@ -890,7 +1053,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, if (unlikely(cur == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to " "allocate memory for mbuf.\n"); - return -1; + error = -1; + goto out; } if (unlikely(dev->dequeue_zero_copy)) rte_mbuf_refcnt_update(cur, 1); @@ -912,7 +1076,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, if (hdr) vhost_dequeue_offload(hdr, m); - return 0; +out: + vq->batch_copy_nb_elems = copy_nb; + + return error; } static __rte_always_inline void @@ -1016,6 +1183,15 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (unlikely(vq->enabled == 0)) return 0; + vq->batch_copy_nb_elems = 0; + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) + if (unlikely(vring_translate(dev, vq) < 0)) + goto out; + if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; int nr_updated = 0; @@ -1115,8 +1291,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) { desc = (struct vring_desc *)(uintptr_t) - rte_vhost_gpa_to_vva(dev->mem, - vq->desc[desc_indexes[i]].addr); + vhost_iova_to_vva(dev, vq, + vq->desc[desc_indexes[i]].addr, + sizeof(*desc), + VHOST_ACCESS_RO); if (unlikely(!desc)) break; @@ -1136,7 +1314,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, break; } - err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool); + err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx, + mbuf_pool); if (unlikely(err)) { rte_pktmbuf_free(pkts[i]); break; @@ -1168,11 +1347,15 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, vq->last_avail_idx += i; if (likely(dev->dequeue_zero_copy == 0)) { + do_data_copy_dequeue(vq); vq->last_used_idx += i; update_used_idx(dev, vq, i); } out: + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_unlock(vq); + if (unlikely(rarp_mbuf != NULL)) { /* * Inject it to the head of "pkts" array, so that switch's mac |