diff options
Diffstat (limited to 'lib')
223 files changed, 19054 insertions, 5708 deletions
diff --git a/lib/Makefile b/lib/Makefile index 990f23a4..07e1fd0c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,6 +1,6 @@ # BSD LICENSE # -# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# Copyright(c) 2010-2017 Intel Corporation. All rights reserved. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -34,33 +34,82 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-y += librte_compat DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring +DEPDIRS-librte_ring := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += librte_mempool +DEPDIRS-librte_mempool := librte_eal librte_ring DIRS-$(CONFIG_RTE_LIBRTE_MBUF) += librte_mbuf +DEPDIRS-librte_mbuf := librte_eal librte_mempool DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer +DEPDIRS-librte_timer := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile +DEPDIRS-librte_cfgfile := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline +DEPDIRS-librte_cmdline := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether +DEPDIRS-librte_ether := librte_net librte_eal librte_mempool librte_ring +DEPDIRS-librte_ether += librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev +DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf +DEPDIRS-librte_cryptodev += librte_kvargs +DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev +DEPDIRS-librte_eventdev := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost +DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash +DEPDIRS-librte_hash := librte_eal librte_ring +DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd +DEPDIRS-librte_efd := librte_eal librte_ring librte_hash DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm +DEPDIRS-librte_lpm := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl +DEPDIRS-librte_acl := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net +DEPDIRS-librte_net := librte_mbuf librte_eal DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag +DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_ip_frag += librte_hash DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats +DEPDIRS-librte_jobstats := librte_eal +DIRS-$(CONFIG_RTE_LIBRTE_METRICS) += librte_metrics +DEPDIRS-librte_metrics := librte_eal +DIRS-$(CONFIG_RTE_LIBRTE_BITRATE) += librte_bitratestats +DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ether +DIRS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += librte_latencystats +DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ether librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power +DEPDIRS-librte_power := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter +DEPDIRS-librte_meter := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched +DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net +DEPDIRS-librte_sched += librte_timer DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs +DEPDIRS-librte_kvargs := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor +DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ether DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port +DEPDIRS-librte_port := librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_port += librte_ip_frag librte_sched +ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) +DEPDIRS-librte_port += librte_kni +endif DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table +DEPDIRS-librte_table := librte_eal librte_mempool librte_mbuf +DEPDIRS-librte_table += librte_port librte_lpm librte_hash +ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) +DEPDIRS-librte_table += librte_acl +endif DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline +DEPDIRS-librte_pipeline := librte_eal librte_mempool librte_mbuf +DEPDIRS-librte_pipeline += librte_table librte_port DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder +DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump +DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni endif +DEPDIRS-librte_kni:= librte_eal librte_mempool librte_mbuf librte_ether include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index d05be665..e2dacd60 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -92,7 +92,4 @@ endif SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include := rte_acl_osdep.h SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_ACL) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c index 9e5ad1c6..0768cd3b 100644 --- a/lib/librte_acl/acl_bld.c +++ b/lib/librte_acl/acl_bld.c @@ -830,8 +830,8 @@ acl_gen_range_trie(struct acl_build_context *context, { int32_t n; struct rte_acl_node *root; - const uint8_t *lo = (const uint8_t *)min; - const uint8_t *hi = (const uint8_t *)max; + const uint8_t *lo = min; + const uint8_t *hi = max; *pend = acl_alloc_node(context, level+size); root = acl_alloc_node(context, level++); @@ -886,8 +886,8 @@ acl_gen_mask_trie(struct acl_build_context *context, struct rte_acl_node *root; struct rte_acl_node *node, *prev; struct rte_acl_bitset bits; - const uint8_t *val = (const uint8_t *)value; - const uint8_t *msk = (const uint8_t *)mask; + const uint8_t *val = value; + const uint8_t *msk = mask; root = acl_alloc_node(context, level++); prev = root; diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h index 024f3931..a862ff6e 100644 --- a/lib/librte_acl/acl_run.h +++ b/lib/librte_acl/acl_run.h @@ -69,10 +69,10 @@ struct acl_flow_data { uint32_t trie; /* current trie index (0 to N-1) */ uint32_t cmplt_size; + /* maximum number of packets to process */ uint32_t total_packets; - uint32_t categories; /* number of result categories per packet. */ - /* maximum number of packets to process */ + uint32_t categories; const uint64_t *trans; const uint8_t **data; uint32_t *results; diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c index 8b7e92ce..d1f40bef 100644 --- a/lib/librte_acl/rte_acl.c +++ b/lib/librte_acl/rte_acl.c @@ -313,8 +313,7 @@ acl_check_rule(const struct rte_acl_rule_data *rd) if ((RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES, typeof(rd->category_mask)) & rd->category_mask) == 0 || rd->priority > RTE_ACL_MAX_PRIORITY || - rd->priority < RTE_ACL_MIN_PRIORITY || - rd->userdata == RTE_ACL_INVALID_USERDATA) + rd->priority < RTE_ACL_MIN_PRIORITY) return -EINVAL; return 0; } diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h index caa91f7e..b53179a8 100644 --- a/lib/librte_acl/rte_acl.h +++ b/lib/librte_acl/rte_acl.h @@ -120,8 +120,6 @@ enum { RTE_ACL_MIN_PRIORITY = 0, }; -#define RTE_ACL_INVALID_USERDATA 0 - #define RTE_ACL_MASKLEN_TO_BITMASK(v, s) \ ((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v)))) diff --git a/lib/librte_bitratestats/Makefile b/lib/librte_bitratestats/Makefile new file mode 100644 index 00000000..58a20ea0 --- /dev/null +++ b/lib/librte_bitratestats/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_bitratestats.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_bitratestats_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_BITRATE) := rte_bitrate.c + +# Install header file +SYMLINK-$(CONFIG_RTE_LIBRTE_BITRATE)-include += rte_bitrate.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c new file mode 100644 index 00000000..193aa690 --- /dev/null +++ b/lib/librte_bitratestats/rte_bitrate.c @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_common.h> +#include <rte_ethdev.h> +#include <rte_malloc.h> +#include <rte_metrics.h> +#include <rte_bitrate.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +/* + * Persistent bit-rate data. + * @internal + */ +struct rte_stats_bitrate { + uint64_t last_ibytes; + uint64_t last_obytes; + uint64_t peak_ibits; + uint64_t peak_obits; + uint64_t mean_ibits; + uint64_t mean_obits; + uint64_t ewma_ibits; + uint64_t ewma_obits; +}; + +struct rte_stats_bitrates { + struct rte_stats_bitrate port_stats[RTE_MAX_ETHPORTS]; + uint16_t id_stats_set; +}; + +struct rte_stats_bitrates * +rte_stats_bitrate_create(void) +{ + return rte_zmalloc(NULL, sizeof(struct rte_stats_bitrates), + RTE_CACHE_LINE_SIZE); +} + +int +rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data) +{ + const char * const names[] = { + "ewma_bits_in", "ewma_bits_out", + "mean_bits_in", "mean_bits_out", + "peak_bits_in", "peak_bits_out", + }; + int return_value; + + return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names)); + if (return_value >= 0) + bitrate_data->id_stats_set = return_value; + return return_value; +} + +int +rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, + uint8_t port_id) +{ + struct rte_stats_bitrate *port_data; + struct rte_eth_stats eth_stats; + int ret_code; + uint64_t cnt_bits; + int64_t delta; + const int64_t alpha_percent = 20; + uint64_t values[6]; + + ret_code = rte_eth_stats_get(port_id, ð_stats); + if (ret_code != 0) + return ret_code; + + port_data = &bitrate_data->port_stats[port_id]; + + /* Incoming bitrate. This is an iteratively calculated EWMA + * (Exponentially Weighted Moving Average) that uses a + * weighting factor of alpha_percent. An unsmoothed mean + * for just the current time delta is also calculated for the + * benefit of people who don't understand signal processing. + */ + cnt_bits = (eth_stats.ibytes - port_data->last_ibytes) << 3; + port_data->last_ibytes = eth_stats.ibytes; + if (cnt_bits > port_data->peak_ibits) + port_data->peak_ibits = cnt_bits; + delta = cnt_bits; + delta -= port_data->ewma_ibits; + /* The +-50 fixes integer rounding during divison */ + if (delta > 0) + delta = (delta * alpha_percent + 50) / 100; + else + delta = (delta * alpha_percent - 50) / 100; + port_data->ewma_ibits += delta; + /* Integer roundoff prevents EWMA between 0 and (100/alpha_percent) + * ever reaching zero in no-traffic conditions + */ + if (cnt_bits == 0 && delta == 0) + port_data->ewma_ibits = 0; + port_data->mean_ibits = cnt_bits; + + /* Outgoing bitrate (also EWMA) */ + cnt_bits = (eth_stats.obytes - port_data->last_obytes) << 3; + port_data->last_obytes = eth_stats.obytes; + if (cnt_bits > port_data->peak_obits) + port_data->peak_obits = cnt_bits; + delta = cnt_bits; + delta -= port_data->ewma_obits; + if (delta > 0) + delta = (delta * alpha_percent + 50) / 100; + else + delta = (delta * alpha_percent - 50) / 100; + port_data->ewma_obits += delta; + if (cnt_bits == 0 && delta == 0) + port_data->ewma_obits = 0; + port_data->mean_obits = cnt_bits; + + values[0] = port_data->ewma_ibits; + values[1] = port_data->ewma_obits; + values[2] = port_data->mean_ibits; + values[3] = port_data->mean_obits; + values[4] = port_data->peak_ibits; + values[5] = port_data->peak_obits; + rte_metrics_update_values(port_id, bitrate_data->id_stats_set, + values, ARRAY_SIZE(values)); + return 0; +} diff --git a/lib/librte_eal/common/include/rte_warnings.h b/lib/librte_bitratestats/rte_bitrate.h index 54b545c9..15fc270a 100644 --- a/lib/librte_eal/common/include/rte_warnings.h +++ b/lib/librte_bitratestats/rte_bitrate.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,54 +31,64 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/** - * @file - * Definitions of warnings for use of various insecure functions - */ +#ifndef _RTE_BITRATE_H_ +#define _RTE_BITRATE_H_ -#ifndef _RTE_WARNINGS_H_ -#define _RTE_WARNINGS_H_ +#include <stdint.h> -#ifdef RTE_INSECURE_FUNCTION_WARNING +#ifdef __cplusplus +extern "C" { +#endif -/* we need to include all used standard header files so that they appear - * _before_ we poison the function names. +/** + * Bitrate statistics data structure. + * This data structure is intentionally opaque. */ +struct rte_stats_bitrates; -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <errno.h> -#ifdef RTE_EXEC_ENV_LINUXAPP -#include <dirent.h> -#endif -/* the following function are deemed not fully secure for use e.g. they - * do not always null-terminate arguments */ -#pragma GCC poison sprintf strtok snprintf vsnprintf -#pragma GCC poison strlen strcpy strcat -#pragma GCC poison sscanf +/** + * Allocate a bitrate statistics structure + * + * @return + * - Pointer to structure on success + * - NULL on error (zmalloc failure) + */ +struct rte_stats_bitrates *rte_stats_bitrate_create(void); -/* other unsafe functions may be implemented as macros so just undef them */ -#ifdef strsep -#undef strsep -#else -#pragma GCC poison strsep -#endif -#ifdef strncpy -#undef strncpy -#else -#pragma GCC poison strncpy -#endif +/** + * Register bitrate statistics with the metric library. + * + * @param bitrate_data + * Pointer allocated by rte_stats_create() + * + * @return + * Zero on success + * Negative on error + */ +int rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data); -#ifdef strncat -#undef strncat -#else -#pragma GCC poison strncat -#endif +/** + * Calculate statistics for current time window. The period with which + * this function is called should be the intended sampling window width. + * + * @param bitrate_data + * Bitrate statistics data pointer + * + * @param port_id + * Port id to calculate statistics for + * + * @return + * - Zero on success + * - Negative value on error + */ +int rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, + uint8_t port_id); + +#ifdef __cplusplus +} #endif -#endif /* RTE_WARNINGS_H */ +#endif /* _RTE_BITRATE_H_ */ diff --git a/lib/librte_bitratestats/rte_bitratestats_version.map b/lib/librte_bitratestats/rte_bitratestats_version.map new file mode 100644 index 00000000..fe745445 --- /dev/null +++ b/lib/librte_bitratestats/rte_bitratestats_version.map @@ -0,0 +1,9 @@ +DPDK_17.05 { + global: + + rte_stats_bitrate_calc; + rte_stats_bitrate_create; + rte_stats_bitrate_reg; + + local: *; +}; diff --git a/lib/librte_cfgfile/Makefile b/lib/librte_cfgfile/Makefile index 616aef09..755ef11f 100644 --- a/lib/librte_cfgfile/Makefile +++ b/lib/librte_cfgfile/Makefile @@ -51,7 +51,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_CFGFILE) += rte_cfgfile.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_CFGFILE)-include += rte_cfgfile.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c index d72052a0..b54a523d 100644 --- a/lib/librte_cfgfile/rte_cfgfile.c +++ b/lib/librte_cfgfile/rte_cfgfile.c @@ -35,6 +35,7 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <rte_common.h> #include <rte_string_fns.h> #include "rte_cfgfile.h" @@ -58,6 +59,25 @@ struct rte_cfgfile { * for new entries do we add in */ #define CFG_ALLOC_ENTRY_BATCH 16 +/** + * Default cfgfile load parameters. + */ +static const struct rte_cfgfile_parameters default_cfgfile_params = { + .comment_character = CFG_DEFAULT_COMMENT_CHARACTER, +}; + +/** + * Defines the list of acceptable comment characters supported by this + * library. + */ +static const char valid_comment_chars[] = { + '!', + '#', + '%', + ';', + '@' +}; + static unsigned _strip(char *str, unsigned len) { @@ -85,17 +105,56 @@ _strip(char *str, unsigned len) return newlen; } +static int +rte_cfgfile_check_params(const struct rte_cfgfile_parameters *params) +{ + unsigned int valid_comment; + unsigned int i; + + if (!params) { + printf("Error - missing cfgfile parameters\n"); + return -EINVAL; + } + + valid_comment = 0; + for (i = 0; i < RTE_DIM(valid_comment_chars); i++) { + if (params->comment_character == valid_comment_chars[i]) { + valid_comment = 1; + break; + } + } + + if (valid_comment == 0) { + printf("Error - invalid comment characters %c\n", + params->comment_character); + return -ENOTSUP; + } + + return 0; +} + struct rte_cfgfile * rte_cfgfile_load(const char *filename, int flags) { + return rte_cfgfile_load_with_params(filename, flags, + &default_cfgfile_params); +} + +struct rte_cfgfile * +rte_cfgfile_load_with_params(const char *filename, int flags, + const struct rte_cfgfile_parameters *params) +{ int allocated_sections = CFG_ALLOC_SECTION_BATCH; int allocated_entries = 0; int curr_section = -1; int curr_entry = -1; - char buffer[256] = {0}; + char buffer[CFG_NAME_LEN + CFG_VALUE_LEN + 4] = {0}; int lineno = 0; struct rte_cfgfile *cfg = NULL; + if (rte_cfgfile_check_params(params)) + return NULL; + FILE *f = fopen(filename, "r"); if (f == NULL) return NULL; @@ -107,6 +166,22 @@ rte_cfgfile_load(const char *filename, int flags) memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections); + if (flags & CFG_FLAG_GLOBAL_SECTION) { + curr_section = 0; + allocated_entries = CFG_ALLOC_ENTRY_BATCH; + cfg->sections[curr_section] = malloc( + sizeof(*cfg->sections[0]) + + sizeof(cfg->sections[0]->entries[0]) * + allocated_entries); + if (cfg->sections[curr_section] == NULL) { + printf("Error - no memory for global section\n"); + goto error1; + } + + snprintf(cfg->sections[curr_section]->name, + sizeof(cfg->sections[0]->name), "GLOBAL"); + } + while (fgets(buffer, sizeof(buffer), f) != NULL) { char *pos = NULL; size_t len = strnlen(buffer, sizeof(buffer)); @@ -116,7 +191,7 @@ rte_cfgfile_load(const char *filename, int flags) "Check if line too long\n", lineno); goto error1; } - pos = memchr(buffer, ';', sizeof(buffer)); + pos = memchr(buffer, params->comment_character, len); if (pos != NULL) { *pos = '\0'; len = pos - buffer; @@ -151,6 +226,7 @@ rte_cfgfile_load(const char *filename, int flags) sizeof(*cfg) + sizeof(cfg->sections[0]) * allocated_sections); if (n_cfg == NULL) { + curr_section--; printf("Error - no more memory\n"); goto error1; } @@ -182,12 +258,21 @@ rte_cfgfile_load(const char *filename, int flags) struct rte_cfgfile_section *sect = cfg->sections[curr_section]; - char *split[2]; - if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') - != 2) { - printf("Error at line %d - cannot split " - "string\n", lineno); - goto error1; + int n; + char *split[2] = {NULL}; + n = rte_strsplit(buffer, sizeof(buffer), split, 2, '='); + if (flags & CFG_FLAG_EMPTY_VALUES) { + if ((n < 1) || (n > 2)) { + printf("Error at line %d - cannot split string, n=%d\n", + lineno, n); + goto error1; + } + } else { + if (n != 2) { + printf("Error at line %d - cannot split string, n=%d\n", + lineno, n); + goto error1; + } } curr_entry++; @@ -198,6 +283,7 @@ rte_cfgfile_load(const char *filename, int flags) sizeof(sect->entries[0]) * allocated_entries); if (n_sect == NULL) { + curr_entry--; printf("Error - no more memory\n"); goto error1; } @@ -216,7 +302,7 @@ rte_cfgfile_load(const char *filename, int flags) snprintf(entry->name, sizeof(entry->name), "%s", split[0]); snprintf(entry->value, sizeof(entry->value), "%s", - split[1]); + split[1] ? split[1] : ""); _strip(entry->name, strnlen(entry->name, sizeof(entry->name))); _strip(entry->value, strnlen(entry->value, @@ -233,6 +319,8 @@ rte_cfgfile_load(const char *filename, int flags) error1: cfg->num_sections = curr_section + 1; + if (curr_section >= 0) + cfg->sections[curr_section]->num_entries = curr_entry + 1; rte_cfgfile_close(cfg); error2: fclose(f); diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h index b40e6a13..fa10d408 100644 --- a/lib/librte_cfgfile/rte_cfgfile.h +++ b/lib/librte_cfgfile/rte_cfgfile.h @@ -66,19 +66,61 @@ struct rte_cfgfile_entry { char value[CFG_VALUE_LEN]; /**< Value */ }; +/** Configuration file operation optional arguments */ +struct rte_cfgfile_parameters { + /** Config file comment character; one of '!', '#', '%', ';', '@' */ + char comment_character; +}; + +/**@{ cfgfile load operation flags */ +enum { + /** + * Indicates that the file supports key value entries before the first + * defined section. These entries can be accessed in the "GLOBAL" + * section. + */ + CFG_FLAG_GLOBAL_SECTION = 1, + + /** + * Indicates that file supports key value entries where the value can + * be zero length (e.g., "key="). + */ + CFG_FLAG_EMPTY_VALUES = 2, +}; +/**@} */ + +/** Defines the default comment character used for parsing config files. */ +#define CFG_DEFAULT_COMMENT_CHARACTER ';' + /** * Open config file * * @param filename * Config file name * @param flags -* Config file flags, Reserved for future use. Must be set to 0. +* Config file flags * @return * Handle to configuration file on success, NULL otherwise */ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags); /** + * Open config file with specified optional parameters. + * + * @param filename + * Config file name + * @param flags + * Config file flags + * @param params + * Additional configuration attributes. Must be configured with desired + * values prior to invoking this API. + * @return + * Handle to configuration file on success, NULL otherwise + */ +struct rte_cfgfile *rte_cfgfile_load_with_params(const char *filename, + int flags, const struct rte_cfgfile_parameters *params); + +/** * Get number of sections in config file * * @param cfg diff --git a/lib/librte_cfgfile/rte_cfgfile_version.map b/lib/librte_cfgfile/rte_cfgfile_version.map index 3c2f0dbf..5fe60f72 100644 --- a/lib/librte_cfgfile/rte_cfgfile_version.map +++ b/lib/librte_cfgfile/rte_cfgfile_version.map @@ -20,3 +20,10 @@ DPDK_16.04 { rte_cfgfile_section_entries_by_index; } DPDK_2.0; + +DPDK_17.05 { + global: + + rte_cfgfile_load_with_params; + +} DPDK_16.04; diff --git a/lib/librte_cmdline/Makefile b/lib/librte_cmdline/Makefile index 7d2d148c..644f68e4 100644 --- a/lib/librte_cmdline/Makefile +++ b/lib/librte_cmdline/Makefile @@ -61,7 +61,4 @@ INCS += cmdline_parse_etheraddr.h cmdline_parse_string.h cmdline_rdline.h INCS += cmdline_vt100.h cmdline_socket.h cmdline_cirbuf.h cmdline_parse_portlist.h SYMLINK-$(CONFIG_RTE_LIBRTE_CMDLINE)-include := $(INCS) -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c index b496067a..b8148808 100644 --- a/lib/librte_cmdline/cmdline_parse.c +++ b/lib/librte_cmdline/cmdline_parse.c @@ -146,7 +146,9 @@ nb_common_chars(const char * s1, const char * s2) */ static int match_inst(cmdline_parse_inst_t *inst, const char *buf, - unsigned int nb_match_token, void *resbuf, unsigned resbuf_size) + unsigned int nb_match_token, void *resbuf, unsigned resbuf_size, + cmdline_parse_token_hdr_t + *(*dyn_tokens)[CMDLINE_PARSE_DYNAMIC_TOKENS]) { unsigned int token_num=0; cmdline_parse_token_hdr_t * token_p; @@ -155,6 +157,11 @@ match_inst(cmdline_parse_inst_t *inst, const char *buf, struct cmdline_token_hdr token_hdr; token_p = inst->tokens[token_num]; + if (!token_p && dyn_tokens && inst->f) { + if (!(*dyn_tokens)[0]) + inst->f(&(*dyn_tokens)[0], NULL, dyn_tokens); + token_p = (*dyn_tokens)[0]; + } if (token_p) memcpy(&token_hdr, token_p, sizeof(token_hdr)); @@ -196,7 +203,17 @@ match_inst(cmdline_parse_inst_t *inst, const char *buf, buf += n; token_num ++; - token_p = inst->tokens[token_num]; + if (!inst->tokens[0]) { + if (token_num < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) { + if (!(*dyn_tokens)[token_num]) + inst->f(&(*dyn_tokens)[token_num], + NULL, + dyn_tokens); + token_p = (*dyn_tokens)[token_num]; + } else + token_p = NULL; + } else + token_p = inst->tokens[token_num]; if (token_p) memcpy(&token_hdr, token_p, sizeof(token_hdr)); } @@ -238,7 +255,11 @@ cmdline_parse(struct cmdline *cl, const char * buf) unsigned int inst_num=0; cmdline_parse_inst_t *inst; const char *curbuf; - char result_buf[CMDLINE_PARSE_RESULT_BUFSIZE]; + union { + char buf[CMDLINE_PARSE_RESULT_BUFSIZE]; + long double align; /* strong alignment constraint for buf */ + } result, tmp_result; + cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS]; void (*f)(void *, struct cmdline *, void *) = NULL; void *data = NULL; int comment = 0; @@ -255,6 +276,7 @@ cmdline_parse(struct cmdline *cl, const char * buf) return CMDLINE_PARSE_BAD_ARGS; ctx = cl->ctx; + memset(&dyn_tokens, 0, sizeof(dyn_tokens)); /* * - look if the buffer contains at least one line @@ -299,13 +321,16 @@ cmdline_parse(struct cmdline *cl, const char * buf) debug_printf("INST %d\n", inst_num); /* fully parsed */ - tok = match_inst(inst, buf, 0, result_buf, sizeof(result_buf)); + tok = match_inst(inst, buf, 0, tmp_result.buf, + sizeof(tmp_result.buf), &dyn_tokens); if (tok > 0) /* we matched at least one token */ err = CMDLINE_PARSE_BAD_ARGS; else if (!tok) { debug_printf("INST fully parsed\n"); + memcpy(&result, &tmp_result, + sizeof(result)); /* skip spaces */ while (isblank2(*curbuf)) { curbuf++; @@ -333,7 +358,7 @@ cmdline_parse(struct cmdline *cl, const char * buf) /* call func */ if (f) { - f(result_buf, cl, data); + f(result.buf, cl, data); } /* no match */ @@ -355,6 +380,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, cmdline_parse_token_hdr_t *token_p; struct cmdline_token_hdr token_hdr; char tmpbuf[CMDLINE_BUFFER_SIZE], comp_buf[CMDLINE_BUFFER_SIZE]; + cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS]; unsigned int partial_tok_len; int comp_len = -1; int tmp_len = -1; @@ -374,6 +400,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, debug_printf("%s called\n", __func__); memset(&token_hdr, 0, sizeof(token_hdr)); + memset(&dyn_tokens, 0, sizeof(dyn_tokens)); /* count the number of complete token to parse */ for (i=0 ; buf[i] ; i++) { @@ -396,11 +423,24 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, inst = ctx[inst_num]; while (inst) { /* parse the first tokens of the inst */ - if (nb_token && match_inst(inst, buf, nb_token, NULL, 0)) + if (nb_token && + match_inst(inst, buf, nb_token, NULL, 0, + &dyn_tokens)) goto next; debug_printf("instruction match\n"); - token_p = inst->tokens[nb_token]; + if (!inst->tokens[0]) { + if (nb_token < + (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) { + if (!dyn_tokens[nb_token]) + inst->f(&dyn_tokens[nb_token], + NULL, + &dyn_tokens); + token_p = dyn_tokens[nb_token]; + } else + token_p = NULL; + } else + token_p = inst->tokens[nb_token]; if (token_p) memcpy(&token_hdr, token_p, sizeof(token_hdr)); @@ -490,10 +530,21 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, /* we need to redo it */ inst = ctx[inst_num]; - if (nb_token && match_inst(inst, buf, nb_token, NULL, 0)) + if (nb_token && + match_inst(inst, buf, nb_token, NULL, 0, &dyn_tokens)) goto next2; - token_p = inst->tokens[nb_token]; + if (!inst->tokens[0]) { + if (nb_token < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) { + if (!dyn_tokens[nb_token]) + inst->f(&dyn_tokens[nb_token], + NULL, + &dyn_tokens); + token_p = dyn_tokens[nb_token]; + } else + token_p = NULL; + } else + token_p = inst->tokens[nb_token]; if (token_p) memcpy(&token_hdr, token_p, sizeof(token_hdr)); diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h index 4ac05d6b..65b18d4f 100644 --- a/lib/librte_cmdline/cmdline_parse.h +++ b/lib/librte_cmdline/cmdline_parse.h @@ -83,6 +83,9 @@ extern "C" { /* maximum buffer size for parsed result */ #define CMDLINE_PARSE_RESULT_BUFSIZE 8192 +/* maximum number of dynamic tokens */ +#define CMDLINE_PARSE_DYNAMIC_TOKENS 128 + /** * Stores a pointer to the ops struct, and the offset: the place to * write the parsed result in the destination structure. @@ -130,6 +133,24 @@ struct cmdline; * Store a instruction, which is a pointer to a callback function and * its parameter that is called when the instruction is parsed, a help * string, and a list of token composing this instruction. + * + * When no tokens are defined (tokens[0] == NULL), they are retrieved + * dynamically by calling f() as follows: + * + * f((struct cmdline_token_hdr **)&token_hdr, + * NULL, + * (struct cmdline_token_hdr *[])tokens)); + * + * The address of the resulting token is expected at the location pointed by + * the first argument. Can be set to NULL to end the list. + * + * The cmdline argument (struct cmdline *) is always NULL. + * + * The last argument points to the NULL-terminated list of dynamic tokens + * defined so far. Since token_hdr points to an index of that list, the + * current index can be derived as follows: + * + * int index = token_hdr - &(*tokens)[0]; */ struct cmdline_inst { /* f(parsed_struct, data) */ diff --git a/lib/librte_cmdline/cmdline_parse_num.c b/lib/librte_cmdline/cmdline_parse_num.c index b0f9a35d..e507ec4f 100644 --- a/lib/librte_cmdline/cmdline_parse_num.c +++ b/lib/librte_cmdline/cmdline_parse_num.c @@ -250,7 +250,7 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, case HEX: st = HEX_OK; - /* no break */ + /* fall-through no break */ case HEX_OK: if (c >= '0' && c <= '9') { if (add_to_res(c - '0', &res1, 16) < 0) @@ -282,7 +282,7 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, case BIN: st = BIN_OK; - /* no break */ + /* fall-through */ case BIN_OK: if (c >= '0' && c <= '1') { if (add_to_res(c - '0', &res1, 2) < 0) diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile index aebf5d9f..18f5e8c5 100644 --- a/lib/librte_cryptodev/Makefile +++ b/lib/librte_cryptodev/Makefile @@ -52,11 +52,4 @@ SYMLINK-y-include += rte_cryptodev_pmd.h # versioning export map EXPORT_MAP := rte_cryptodev_version.map -# library dependencies -DEPDIRS-y += lib/librte_eal -DEPDIRS-y += lib/librte_mempool -DEPDIRS-y += lib/librte_ring -DEPDIRS-y += lib/librte_mbuf -DEPDIRS-y += lib/librte_kvargs - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h index d3d38e4f..3a408448 100644 --- a/lib/librte_cryptodev/rte_crypto_sym.h +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -105,9 +105,31 @@ enum rte_crypto_cipher_algorithm { RTE_CRYPTO_CIPHER_ZUC_EEA3, /**< ZUC algorithm in EEA3 mode */ + RTE_CRYPTO_CIPHER_DES_CBC, + /**< DES algorithm in CBC mode */ + + RTE_CRYPTO_CIPHER_AES_DOCSISBPI, + /**< AES algorithm using modes required by + * DOCSIS Baseline Privacy Plus Spec. + * Chained mbufs are not supported in this mode, i.e. rte_mbuf.next + * for m_src and m_dst in the rte_crypto_sym_op must be NULL. + */ + + RTE_CRYPTO_CIPHER_DES_DOCSISBPI, + /**< DES algorithm using modes required by + * DOCSIS Baseline Privacy Plus Spec. + * Chained mbufs are not supported in this mode, i.e. rte_mbuf.next + * for m_src and m_dst in the rte_crypto_sym_op must be NULL. + */ + RTE_CRYPTO_CIPHER_LIST_END + }; +/** Cipher algorithm name strings */ +extern const char * +rte_crypto_cipher_algorithm_strings[]; + /** Symmetric Cipher Direction */ enum rte_crypto_cipher_operation { RTE_CRYPTO_CIPHER_OP_ENCRYPT, @@ -116,6 +138,10 @@ enum rte_crypto_cipher_operation { /**< Decrypt cipher operation */ }; +/** Cipher operation name strings */ +extern const char * +rte_crypto_cipher_operation_strings[]; + /** * Symmetric Cipher Setup Data. * @@ -241,12 +267,20 @@ enum rte_crypto_auth_algorithm { RTE_CRYPTO_AUTH_LIST_END }; +/** Authentication algorithm name strings */ +extern const char * +rte_crypto_auth_algorithm_strings[]; + /** Symmetric Authentication / Hash Operations */ enum rte_crypto_auth_operation { RTE_CRYPTO_AUTH_OP_VERIFY, /**< Verify authentication digest */ RTE_CRYPTO_AUTH_OP_GENERATE /**< Generate authentication digest */ }; +/** Authentication operation name strings */ +extern const char * +rte_crypto_auth_operation_strings[]; + /** * Authentication / Hash transform data. * @@ -276,17 +310,16 @@ struct rte_crypto_auth_xform { * this specifies the length of the digest to be compared for the * session. * + * It is the caller's responsibility to ensure that the + * digest length is compliant with the hash algorithm being used. * If the value is less than the maximum length allowed by the hash, - * the result shall be truncated. If the value is greater than the - * maximum length allowed by the hash then an error will be generated - * by *rte_cryptodev_sym_session_create* or by the - * *rte_cryptodev_sym_enqueue_burst* if using session-less APIs. + * the result shall be truncated. */ uint32_t add_auth_data_length; /**< The length of the additional authenticated data (AAD) in bytes. - * The maximum permitted value is 240 bytes, unless otherwise specified - * below. + * The maximum permitted value is 65535 (2^16 - 1) bytes, unless + * otherwise specified below. * * This field must be specified when the hash algorithm is one of the * following: @@ -541,8 +574,7 @@ struct rte_crypto_sym_op { struct { uint8_t *data; - /**< If this member of this structure is set this is a - * pointer to the location where the digest result + /**< This points to the location where the digest result * should be inserted (in the case of digest generation) * or where the purported digest exists (in the case of * digest verification). @@ -560,18 +592,13 @@ struct rte_crypto_sym_op { * @note * For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), for * "digest result" read "authentication tag T". - * - * If this member is not set the digest result is - * understood to be in the destination buffer for - * digest generation, and in the source buffer for - * digest verification. The location of the digest - * result in this case is immediately following the - * region over which the digest is computed. */ phys_addr_t phys_addr; /**< Physical address of digest */ uint16_t length; - /**< Length of digest */ + /**< Length of digest. This must be the same value as + * @ref rte_crypto_auth_xform.digest_length. + */ } digest; /**< Digest parameters */ struct { @@ -586,7 +613,7 @@ struct rte_crypto_sym_op { * set up for the session in the @ref * rte_crypto_auth_xform structure as part of the @ref * rte_cryptodev_sym_session_create function call. - * This length must not exceed 240 bytes. + * This length must not exceed 65535 (2^16-1) bytes. * * Specifically for CCM (@ref RTE_CRYPTO_AUTH_AES_CCM), * the caller should setup this field as follows: @@ -619,7 +646,10 @@ struct rte_crypto_sym_op { * operation, this field is used to pass plaintext. */ phys_addr_t phys_addr; /**< physical address */ - uint16_t length; /**< Length of digest */ + uint16_t length; + /**< Length of additional authenticated data (AAD) + * in bytes + */ } aad; /**< Additional authentication parameters */ } auth; diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c index 54e95d5c..b65cd9ce 100644 --- a/lib/librte_cryptodev/rte_cryptodev.c +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2015-2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -101,16 +101,138 @@ struct rte_cryptodev_callback { uint32_t active; /**< Callback is executing */ }; +#define RTE_CRYPTODEV_VDEV_NAME ("name") #define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs") #define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions") #define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id") static const char *cryptodev_vdev_valid_params[] = { + RTE_CRYPTODEV_VDEV_NAME, RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, RTE_CRYPTODEV_VDEV_SOCKET_ID }; +/** + * The crypto cipher algorithm strings identifiers. + * It could be used in application command line. + */ +const char * +rte_crypto_cipher_algorithm_strings[] = { + [RTE_CRYPTO_CIPHER_3DES_CBC] = "3des-cbc", + [RTE_CRYPTO_CIPHER_3DES_ECB] = "3des-ecb", + [RTE_CRYPTO_CIPHER_3DES_CTR] = "3des-ctr", + + [RTE_CRYPTO_CIPHER_AES_CBC] = "aes-cbc", + [RTE_CRYPTO_CIPHER_AES_CCM] = "aes-ccm", + [RTE_CRYPTO_CIPHER_AES_CTR] = "aes-ctr", + [RTE_CRYPTO_CIPHER_AES_DOCSISBPI] = "aes-docsisbpi", + [RTE_CRYPTO_CIPHER_AES_ECB] = "aes-ecb", + [RTE_CRYPTO_CIPHER_AES_GCM] = "aes-gcm", + [RTE_CRYPTO_CIPHER_AES_F8] = "aes-f8", + [RTE_CRYPTO_CIPHER_AES_XTS] = "aes-xts", + + [RTE_CRYPTO_CIPHER_ARC4] = "arc4", + + [RTE_CRYPTO_CIPHER_DES_CBC] = "des-cbc", + [RTE_CRYPTO_CIPHER_DES_DOCSISBPI] = "des-docsisbpi", + + [RTE_CRYPTO_CIPHER_NULL] = "null", + + [RTE_CRYPTO_CIPHER_KASUMI_F8] = "kasumi-f8", + [RTE_CRYPTO_CIPHER_SNOW3G_UEA2] = "snow3g-uea2", + [RTE_CRYPTO_CIPHER_ZUC_EEA3] = "zuc-eea3" +}; + +/** + * The crypto cipher operation strings identifiers. + * It could be used in application command line. + */ +const char * +rte_crypto_cipher_operation_strings[] = { + [RTE_CRYPTO_CIPHER_OP_ENCRYPT] = "encrypt", + [RTE_CRYPTO_CIPHER_OP_DECRYPT] = "decrypt" +}; + +/** + * The crypto auth algorithm strings identifiers. + * It could be used in application command line. + */ +const char * +rte_crypto_auth_algorithm_strings[] = { + [RTE_CRYPTO_AUTH_AES_CBC_MAC] = "aes-cbc-mac", + [RTE_CRYPTO_AUTH_AES_CCM] = "aes-ccm", + [RTE_CRYPTO_AUTH_AES_CMAC] = "aes-cmac", + [RTE_CRYPTO_AUTH_AES_GCM] = "aes-gcm", + [RTE_CRYPTO_AUTH_AES_GMAC] = "aes-gmac", + [RTE_CRYPTO_AUTH_AES_XCBC_MAC] = "aes-xcbc-mac", + + [RTE_CRYPTO_AUTH_MD5] = "md5", + [RTE_CRYPTO_AUTH_MD5_HMAC] = "md5-hmac", + + [RTE_CRYPTO_AUTH_NULL] = "null", + + [RTE_CRYPTO_AUTH_SHA1] = "sha1", + [RTE_CRYPTO_AUTH_SHA1_HMAC] = "sha1-hmac", + + [RTE_CRYPTO_AUTH_SHA224] = "sha2-224", + [RTE_CRYPTO_AUTH_SHA224_HMAC] = "sha2-224-hmac", + [RTE_CRYPTO_AUTH_SHA256] = "sha2-256", + [RTE_CRYPTO_AUTH_SHA256_HMAC] = "sha2-256-hmac", + [RTE_CRYPTO_AUTH_SHA384] = "sha2-384", + [RTE_CRYPTO_AUTH_SHA384_HMAC] = "sha2-384-hmac", + [RTE_CRYPTO_AUTH_SHA512] = "sha2-512", + [RTE_CRYPTO_AUTH_SHA512_HMAC] = "sha2-512-hmac", + + [RTE_CRYPTO_AUTH_KASUMI_F9] = "kasumi-f9", + [RTE_CRYPTO_AUTH_SNOW3G_UIA2] = "snow3g-uia2", + [RTE_CRYPTO_AUTH_ZUC_EIA3] = "zuc-eia3" +}; + +int +rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum, + const char *algo_string) +{ + unsigned int i; + + for (i = 1; i < RTE_DIM(rte_crypto_cipher_algorithm_strings); i++) { + if (strcmp(algo_string, rte_crypto_cipher_algorithm_strings[i]) == 0) { + *algo_enum = (enum rte_crypto_cipher_algorithm) i; + return 0; + } + } + + /* Invalid string */ + return -1; +} + +int +rte_cryptodev_get_auth_algo_enum(enum rte_crypto_auth_algorithm *algo_enum, + const char *algo_string) +{ + unsigned int i; + + for (i = 1; i < RTE_DIM(rte_crypto_auth_algorithm_strings); i++) { + if (strcmp(algo_string, rte_crypto_auth_algorithm_strings[i]) == 0) { + *algo_enum = (enum rte_crypto_auth_algorithm) i; + return 0; + } + } + + /* Invalid string */ + return -1; +} + +/** + * The crypto auth operation strings identifiers. + * It could be used in application command line. + */ +const char * +rte_crypto_auth_operation_strings[] = { + [RTE_CRYPTO_AUTH_OP_VERIFY] = "verify", + [RTE_CRYPTO_AUTH_OP_GENERATE] = "generate" +}; + static uint8_t number_of_sockets(void) { @@ -132,7 +254,7 @@ static int parse_integer_arg(const char *key __rte_unused, const char *value, void *extra_args) { - int *i = (int *) extra_args; + int *i = extra_args; *i = atoi(value); if (*i < 0) { @@ -143,6 +265,25 @@ parse_integer_arg(const char *key __rte_unused, return 0; } +/** Parse name */ +static int +parse_name_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + struct rte_crypto_vdev_init_params *params = extra_args; + + if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) { + CDEV_LOG_ERR("Invalid name %s, should be less than " + "%u bytes", value, + RTE_CRYPTODEV_NAME_MAX_LEN - 1); + return -1; + } + + strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN); + + return 0; +} + int rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params, const char *input_args) @@ -179,6 +320,12 @@ rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME, + &parse_name_arg, + params); + if (ret < 0) + goto free_kvlist; + if (params->socket_id >= number_of_sockets()) { CDEV_LOG_ERR("Invalid socket id specified to create " "the virtual crypto device on"); @@ -191,6 +338,73 @@ free_kvlist: return ret; } +const struct rte_cryptodev_symmetric_capability * +rte_cryptodev_sym_capability_get(uint8_t dev_id, + const struct rte_cryptodev_sym_capability_idx *idx) +{ + const struct rte_cryptodev_capabilities *capability; + struct rte_cryptodev_info dev_info; + int i = 0; + + rte_cryptodev_info_get(dev_id, &dev_info); + + while ((capability = &dev_info.capabilities[i++])->op != + RTE_CRYPTO_OP_TYPE_UNDEFINED) { + if (capability->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + continue; + + if (capability->sym.xform_type != idx->type) + continue; + + if (idx->type == RTE_CRYPTO_SYM_XFORM_AUTH && + capability->sym.auth.algo == idx->algo.auth) + return &capability->sym; + + if (idx->type == RTE_CRYPTO_SYM_XFORM_CIPHER && + capability->sym.cipher.algo == idx->algo.cipher) + return &capability->sym; + } + + return NULL; + +} + +#define param_range_check(x, y) \ + (((x < y.min) || (x > y.max)) || \ + (y.increment != 0 && (x % y.increment) != 0)) + +int +rte_cryptodev_sym_capability_check_cipher( + const struct rte_cryptodev_symmetric_capability *capability, + uint16_t key_size, uint16_t iv_size) +{ + if (param_range_check(key_size, capability->cipher.key_size)) + return -1; + + if (param_range_check(iv_size, capability->cipher.iv_size)) + return -1; + + return 0; +} + +int +rte_cryptodev_sym_capability_check_auth( + const struct rte_cryptodev_symmetric_capability *capability, + uint16_t key_size, uint16_t digest_size, uint16_t aad_size) +{ + if (param_range_check(key_size, capability->auth.key_size)) + return -1; + + if (param_range_check(digest_size, capability->auth.digest_size)) + return -1; + + if (param_range_check(aad_size, capability->auth.aad_size)) + return -1; + + return 0; +} + + const char * rte_cryptodev_get_feature_name(uint64_t flag) { @@ -211,19 +425,65 @@ rte_cryptodev_get_feature_name(uint64_t flag) return "CPU_AESNI"; case RTE_CRYPTODEV_FF_HW_ACCELERATED: return "HW_ACCELERATED"; - + case RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER: + return "MBUF_SCATTER_GATHER"; + case RTE_CRYPTODEV_FF_CPU_NEON: + return "CPU_NEON"; + case RTE_CRYPTODEV_FF_CPU_ARM_CE: + return "CPU_ARM_CE"; default: return NULL; } } - int rte_cryptodev_create_vdev(const char *name, const char *args) { - return rte_eal_vdev_init(name, args); + return rte_vdev_init(name, args); +} + +struct rte_cryptodev * +rte_cryptodev_pmd_get_dev(uint8_t dev_id) +{ + return &rte_cryptodev_globals->devs[dev_id]; +} + +struct rte_cryptodev * +rte_cryptodev_pmd_get_named_dev(const char *name) +{ + struct rte_cryptodev *dev; + unsigned int i; + + if (name == NULL) + return NULL; + + for (i = 0; i < rte_cryptodev_globals->max_devs; i++) { + dev = &rte_cryptodev_globals->devs[i]; + + if ((dev->attached == RTE_CRYPTODEV_ATTACHED) && + (strcmp(dev->data->name, name) == 0)) + return dev; + } + + return NULL; +} + +unsigned int +rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id) +{ + struct rte_cryptodev *dev = NULL; + + if (dev_id >= rte_cryptodev_globals->nb_devs) + return 0; + + dev = rte_cryptodev_pmd_get_dev(dev_id); + if (dev->attached != RTE_CRYPTODEV_ATTACHED) + return 0; + else + return 1; } + int rte_cryptodev_get_dev_id(const char *name) { @@ -262,6 +522,35 @@ rte_cryptodev_count_devtype(enum rte_cryptodev_type type) return dev_count; } +uint8_t +rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices, + uint8_t nb_devices) +{ + uint8_t i, count = 0; + struct rte_cryptodev *devs = rte_cryptodev_globals->devs; + uint8_t max_devs = rte_cryptodev_globals->max_devs; + + for (i = 0; i < max_devs && count < nb_devices; i++) { + + if (devs[i].attached == RTE_CRYPTODEV_ATTACHED) { + const struct rte_cryptodev_driver *drv = devs[i].driver; + int cmp; + + if (drv) + cmp = strncmp(drv->pci_drv.driver.name, + dev_name, strlen(dev_name)); + else + cmp = strncmp(devs[i].data->name, + dev_name, strlen(dev_name)); + + if (cmp == 0) + devices[count++] = devs[i].data->dev_id; + } + } + + return count; +} + int rte_cryptodev_socket_id(uint8_t dev_id) { @@ -427,7 +716,7 @@ rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, if (cryptodrv == NULL) return -ENODEV; - rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name, + rte_pci_device_name(&pci_dev->addr, cryptodev_name, sizeof(cryptodev_name)); cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id()); @@ -447,7 +736,7 @@ rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, "device data"); } - cryptodev->pci_dev = pci_dev; + cryptodev->device = &pci_dev->device; cryptodev->driver = cryptodrv; /* init user callbacks */ @@ -483,7 +772,7 @@ rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev) if (pci_dev == NULL) return -EINVAL; - rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name, + rte_pci_device_name(&pci_dev->addr, cryptodev_name, sizeof(cryptodev_name)); cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); @@ -507,7 +796,7 @@ rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(cryptodev->data->dev_private); - cryptodev->pci_dev = NULL; + cryptodev->device = NULL; cryptodev->driver = NULL; cryptodev->data = NULL; @@ -668,6 +957,8 @@ rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config) return -EBUSY; } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + /* Setup new number of queue pairs and reconfigure device. */ diag = rte_cryptodev_queue_pairs_config(dev, config->nb_queue_pairs, config->socket_id); @@ -678,10 +969,14 @@ rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config) } /* Setup Session mempool for device */ - return rte_cryptodev_sym_session_pool_create(dev, + diag = rte_cryptodev_sym_session_pool_create(dev, config->session_mp.nb_objs, config->session_mp.cache_size, config->socket_id); + if (diag != 0) + return diag; + + return (*dev->dev_ops->dev_configure)(dev, config); } @@ -868,7 +1163,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->pci_dev = dev->pci_dev; + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); if (dev->driver) dev_info->driver_name = dev->driver->pci_drv.driver.name; } @@ -1088,7 +1383,7 @@ rte_cryptodev_sym_session_create(uint8_t dev_id, return NULL; } - sess = (struct rte_cryptodev_sym_session *)_sess; + sess = _sess; RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_configure, NULL); if (dev->dev_ops->session_configure(dev, xform, sess->_private) == @@ -1104,6 +1399,53 @@ rte_cryptodev_sym_session_create(uint8_t dev_id, return sess; } +int +rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id, + struct rte_cryptodev_sym_session *sess) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(sess->dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", sess->dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[sess->dev_id]; + + /* The API is optional, not returning error if driver do not suuport */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0); + if (dev->dev_ops->qp_attach_session(dev, qp_id, sess->_private)) { + CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session", + sess->dev_id, qp_id); + return -EPERM; + } + + return 0; +} + +int +rte_cryptodev_queue_pair_detach_sym_session(uint16_t qp_id, + struct rte_cryptodev_sym_session *sess) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(sess->dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", sess->dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[sess->dev_id]; + + /* The API is optional, not returning error if driver do not suuport */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0); + if (dev->dev_ops->qp_detach_session(dev, qp_id, sess->_private)) { + CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session", + sess->dev_id, qp_id); + return -EPERM; + } + + return 0; +} struct rte_cryptodev_sym_session * rte_cryptodev_sym_session_free(uint8_t dev_id, struct rte_cryptodev_sym_session *sess) @@ -1206,3 +1548,27 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type, return mp; } + +int +rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix) +{ + struct rte_cryptodev *dev = NULL; + uint32_t i = 0; + + if (name == NULL) + return -EINVAL; + + for (i = 0; i < RTE_CRYPTO_MAX_DEVS; i++) { + int ret = snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, + "%s_%u", dev_name_prefix, i); + + if (ret < 0) + return ret; + + dev = rte_cryptodev_pmd_get_named_dev(name); + if (!dev) + return 0; + } + + return -1; +} diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h index 8f63e8f6..88aeb873 100644 --- a/lib/librte_cryptodev/rte_cryptodev.h +++ b/lib/librte_cryptodev/rte_cryptodev.h @@ -1,6 +1,6 @@ /*- * - * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2015-2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -66,6 +66,12 @@ extern "C" { /**< KASUMI PMD device name */ #define CRYPTODEV_NAME_ZUC_PMD crypto_zuc /**< KASUMI PMD device name */ +#define CRYPTODEV_NAME_ARMV8_PMD crypto_armv8 +/**< ARMv8 Crypto PMD device name */ +#define CRYPTODEV_NAME_SCHEDULER_PMD crypto_scheduler +/**< Scheduler Crypto PMD device name */ +#define CRYPTODEV_NAME_DPAA2_SEC_PMD cryptodev_dpaa2_sec_pmd +/**< NXP DPAA2 - SEC PMD device name */ /** Crypto device type */ enum rte_cryptodev_type { @@ -77,6 +83,9 @@ enum rte_cryptodev_type { RTE_CRYPTODEV_KASUMI_PMD, /**< KASUMI PMD */ RTE_CRYPTODEV_ZUC_PMD, /**< ZUC PMD */ RTE_CRYPTODEV_OPENSSL_PMD, /**< OpenSSL PMD */ + RTE_CRYPTODEV_ARMV8_PMD, /**< ARMv8 crypto PMD */ + RTE_CRYPTODEV_SCHEDULER_PMD, /**< Crypto Scheduler PMD */ + RTE_CRYPTODEV_DPAA2_SEC_PMD, /**< NXP DPAA2 - SEC PMD */ }; extern const char **rte_cyptodev_names; @@ -110,6 +119,20 @@ extern const char **rte_cyptodev_names; #endif /** + * Crypto parameters range description + */ +struct rte_crypto_param_range { + uint16_t min; /**< minimum size */ + uint16_t max; /**< maximum size */ + uint16_t increment; + /**< if a range of sizes are supported, + * this parameter is used to indicate + * increments in byte size that are supported + * between the minimum and maximum + */ +}; + +/** * Symmetric Crypto Capability */ struct rte_cryptodev_symmetric_capability { @@ -122,35 +145,11 @@ struct rte_cryptodev_symmetric_capability { /**< authentication algorithm */ uint16_t block_size; /**< algorithm block size */ - struct { - uint16_t min; /**< minimum key size */ - uint16_t max; /**< maximum key size */ - uint16_t increment; - /**< if a range of sizes are supported, - * this parameter is used to indicate - * increments in byte size that are supported - * between the minimum and maximum */ - } key_size; + struct rte_crypto_param_range key_size; /**< auth key size range */ - struct { - uint16_t min; /**< minimum digest size */ - uint16_t max; /**< maximum digest size */ - uint16_t increment; - /**< if a range of sizes are supported, - * this parameter is used to indicate - * increments in byte size that are supported - * between the minimum and maximum */ - } digest_size; + struct rte_crypto_param_range digest_size; /**< digest size range */ - struct { - uint16_t min; /**< minimum aad size */ - uint16_t max; /**< maximum aad size */ - uint16_t increment; - /**< if a range of sizes are supported, - * this parameter is used to indicate - * increments in byte size that are supported - * between the minimum and maximum */ - } aad_size; + struct rte_crypto_param_range aad_size; /**< Additional authentication data size range */ } auth; /**< Symmetric Authentication transform capabilities */ @@ -159,25 +158,9 @@ struct rte_cryptodev_symmetric_capability { /**< cipher algorithm */ uint16_t block_size; /**< algorithm block size */ - struct { - uint16_t min; /**< minimum key size */ - uint16_t max; /**< maximum key size */ - uint16_t increment; - /**< if a range of sizes are supported, - * this parameter is used to indicate - * increments in byte size that are supported - * between the minimum and maximum */ - } key_size; + struct rte_crypto_param_range key_size; /**< cipher key size range */ - struct { - uint16_t min; /**< minimum iv size */ - uint16_t max; /**< maximum iv size */ - uint16_t increment; - /**< if a range of sizes are supported, - * this parameter is used to indicate - * increments in byte size that are supported - * between the minimum and maximum */ - } iv_size; + struct rte_crypto_param_range iv_size; /**< Initialisation vector data size range */ } cipher; /**< Symmetric Cipher transform capabilities */ @@ -196,6 +179,94 @@ struct rte_cryptodev_capabilities { }; }; +/** Structure used to describe crypto algorithms */ +struct rte_cryptodev_sym_capability_idx { + enum rte_crypto_sym_xform_type type; + union { + enum rte_crypto_cipher_algorithm cipher; + enum rte_crypto_auth_algorithm auth; + } algo; +}; + +/** + * Provide capabilities available for defined device and algorithm + * + * @param dev_id The identifier of the device. + * @param idx Description of crypto algorithms. + * + * @return + * - Return description of the symmetric crypto capability if exist. + * - Return NULL if the capability not exist. + */ +const struct rte_cryptodev_symmetric_capability * +rte_cryptodev_sym_capability_get(uint8_t dev_id, + const struct rte_cryptodev_sym_capability_idx *idx); + +/** + * Check if key size and initial vector are supported + * in crypto cipher capability + * + * @param capability Description of the symmetric crypto capability. + * @param key_size Cipher key size. + * @param iv_size Cipher initial vector size. + * + * @return + * - Return 0 if the parameters are in range of the capability. + * - Return -1 if the parameters are out of range of the capability. + */ +int +rte_cryptodev_sym_capability_check_cipher( + const struct rte_cryptodev_symmetric_capability *capability, + uint16_t key_size, uint16_t iv_size); + +/** + * Check if key size and initial vector are supported + * in crypto auth capability + * + * @param capability Description of the symmetric crypto capability. + * @param key_size Auth key size. + * @param digest_size Auth digest size. + * @param aad_size Auth aad size. + * + * @return + * - Return 0 if the parameters are in range of the capability. + * - Return -1 if the parameters are out of range of the capability. + */ +int +rte_cryptodev_sym_capability_check_auth( + const struct rte_cryptodev_symmetric_capability *capability, + uint16_t key_size, uint16_t digest_size, uint16_t aad_size); + +/** + * Provide the cipher algorithm enum, given an algorithm string + * + * @param algo_enum A pointer to the cipher algorithm + * enum to be filled + * @param algo_string Authentication algo string + * + * @return + * - Return -1 if string is not valid + * - Return 0 is the string is valid + */ +int +rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum, + const char *algo_string); + +/** + * Provide the authentication algorithm enum, given an algorithm string + * + * @param algo_enum A pointer to the authentication algorithm + * enum to be filled + * @param algo_string Authentication algo string + * + * @return + * - Return -1 if string is not valid + * - Return 0 is the string is valid + */ +int +rte_cryptodev_get_auth_algo_enum(enum rte_crypto_auth_algorithm *algo_enum, + const char *algo_string); + /** Macro used at end of crypto PMD list */ #define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \ { RTE_CRYPTO_OP_TYPE_UNDEFINED } @@ -225,6 +296,14 @@ struct rte_cryptodev_capabilities { /**< Utilises CPU AES-NI instructions */ #define RTE_CRYPTODEV_FF_HW_ACCELERATED (1ULL << 7) /**< Operations are off-loaded to an external hardware accelerator */ +#define RTE_CRYPTODEV_FF_CPU_AVX512 (1ULL << 8) +/**< Utilises CPU SIMD AVX512 instructions */ +#define RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER (1ULL << 9) +/**< Scatter-gather mbufs are supported */ +#define RTE_CRYPTODEV_FF_CPU_NEON (1ULL << 10) +/**< Utilises CPU NEON instructions */ +#define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 11) +/**< Utilises ARM CPU Cryptographic Extensions */ /** @@ -256,6 +335,10 @@ struct rte_cryptodev_info { struct { unsigned max_nb_sessions; /**< Maximum number of sessions supported by device. */ + unsigned int max_nb_sessions_per_qp; + /**< Maximum number of sessions per queue pair. + * Default 0 for infinite sessions + */ } sym; }; @@ -300,6 +383,8 @@ struct rte_cryptodev_stats { /**< Total error count on operations dequeued */ }; +#define RTE_CRYPTODEV_NAME_MAX_LEN (64) +/**< Max length of name of crypto PMD */ #define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8 #define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048 @@ -311,6 +396,7 @@ struct rte_crypto_vdev_init_params { unsigned max_nb_queue_pairs; unsigned max_nb_sessions; uint8_t socket_id; + char name[RTE_CRYPTODEV_NAME_MAX_LEN]; }; /** @@ -365,8 +451,30 @@ rte_cryptodev_get_dev_id(const char *name); extern uint8_t rte_cryptodev_count(void); +/** + * Get number of crypto device defined type. + * + * @param type type of device. + * + * @return + * Returns number of crypto device. + */ extern uint8_t rte_cryptodev_count_devtype(enum rte_cryptodev_type type); + +/** + * Get number and identifiers of attached crypto device. + * + * @param dev_name device name. + * @param devices output devices identifiers. + * @param nb_devices maximal number of devices. + * + * @return + * Returns number of attached crypto device. + */ +uint8_t +rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices, + uint8_t nb_devices); /* * Return the NUMA socket to which a device is connected * @@ -621,8 +729,8 @@ struct rte_cryptodev { /**< Functions exported by PMD */ uint64_t feature_flags; /**< Supported features */ - struct rte_pci_device *pci_dev; - /**< PCI info. supplied by probing */ + struct rte_device *device; + /**< Backing device */ enum rte_cryptodev_type dev_type; /**< Crypto device type */ @@ -635,10 +743,6 @@ struct rte_cryptodev { /**< Flag indicating the device is attached */ } __rte_cache_aligned; - -#define RTE_CRYPTODEV_NAME_MAX_LEN (64) -/**< Max length of name of crypto PMD */ - /** * * The data part, with no function pointers, associated with each device. @@ -818,6 +922,36 @@ extern struct rte_cryptodev_sym_session * rte_cryptodev_sym_session_free(uint8_t dev_id, struct rte_cryptodev_sym_session *session); +/** + * Attach queue pair with sym session. + * + * @param qp_id Queue pair to which session will be attached. + * @param session Session pointer previously allocated by + * *rte_cryptodev_sym_session_create*. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id, + struct rte_cryptodev_sym_session *session); + +/** + * Detach queue pair with sym session. + * + * @param qp_id Queue pair to which session is attached. + * @param session Session pointer previously allocated by + * *rte_cryptodev_sym_session_create*. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_cryptodev_queue_pair_detach_sym_session(uint16_t qp_id, + struct rte_cryptodev_sym_session *session); + #ifdef __cplusplus } diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h index c6a57945..17ef37c7 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -57,14 +57,6 @@ extern "C" { #include "rte_crypto.h" #include "rte_cryptodev.h" - -#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG -#define RTE_PMD_DEBUG_TRACE(...) \ - rte_pmd_debug_trace(__func__, __VA_ARGS__) -#else -#define RTE_PMD_DEBUG_TRACE(...) -#endif - struct rte_cryptodev_session { RTE_STD_C11 struct { @@ -160,11 +152,8 @@ extern struct rte_cryptodev_global *rte_cryptodev_globals; * @return * - The rte_cryptodev structure pointer for the given device ID. */ -static inline struct rte_cryptodev * -rte_cryptodev_pmd_get_dev(uint8_t dev_id) -{ - return &rte_cryptodev_globals->devs[dev_id]; -} +struct rte_cryptodev * +rte_cryptodev_pmd_get_dev(uint8_t dev_id); /** * Get the rte_cryptodev structure device pointer for the named device. @@ -174,25 +163,8 @@ rte_cryptodev_pmd_get_dev(uint8_t dev_id) * @return * - The rte_cryptodev structure pointer for the given device ID. */ -static inline struct rte_cryptodev * -rte_cryptodev_pmd_get_named_dev(const char *name) -{ - struct rte_cryptodev *dev; - unsigned i; - - if (name == NULL) - return NULL; - - for (i = 0; i < rte_cryptodev_globals->max_devs; i++) { - dev = &rte_cryptodev_globals->devs[i]; - - if ((dev->attached == RTE_CRYPTODEV_ATTACHED) && - (strcmp(dev->data->name, name) == 0)) - return dev; - } - - return NULL; -} +struct rte_cryptodev * +rte_cryptodev_pmd_get_named_dev(const char *name); /** * Validate if the crypto device index is valid attached crypto device. @@ -202,20 +174,8 @@ rte_cryptodev_pmd_get_named_dev(const char *name) * @return * - If the device index is valid (1) or not (0). */ -static inline unsigned -rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id) -{ - struct rte_cryptodev *dev = NULL; - - if (dev_id >= rte_cryptodev_globals->nb_devs) - return 0; - - dev = rte_cryptodev_pmd_get_dev(dev_id); - if (dev->attached != RTE_CRYPTODEV_ATTACHED) - return 0; - else - return 1; -} +unsigned int +rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id); /** * The pool of rte_cryptodev structures. @@ -233,10 +193,12 @@ extern struct rte_cryptodev *rte_cryptodevs; * Function used to configure device. * * @param dev Crypto device pointer + * config Crypto device configurations * * @return Returns 0 on success */ -typedef int (*cryptodev_configure_t)(struct rte_cryptodev *dev); +typedef int (*cryptodev_configure_t)(struct rte_cryptodev *dev, + struct rte_cryptodev_config *config); /** * Function used to start a configured device. @@ -413,6 +375,31 @@ typedef void * (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev, void *session_private); +/** + * Optional API for drivers to attach sessions with queue pair. + * @param dev Crypto device pointer + * @param qp_id queue pair id for attaching session + * @param priv_sess Pointer to cryptodev's private session structure + * @return + * - Return 0 on success + */ +typedef int (*cryptodev_sym_queue_pair_attach_session_t)( + struct rte_cryptodev *dev, + uint16_t qp_id, + void *session_private); + +/** + * Optional API for drivers to detach sessions from queue pair. + * @param dev Crypto device pointer + * @param qp_id queue pair id for detaching session + * @param priv_sess Pointer to cryptodev's private session structure + * @return + * - Return 0 on success + */ +typedef int (*cryptodev_sym_queue_pair_detach_session_t)( + struct rte_cryptodev *dev, + uint16_t qp_id, + void *session_private); /** Crypto device operations function pointer table */ struct rte_cryptodev_ops { @@ -447,6 +434,10 @@ struct rte_cryptodev_ops { /**< Configure a Crypto session. */ cryptodev_sym_free_session_t session_clear; /**< Clear a Crypto sessions private data. */ + cryptodev_sym_queue_pair_attach_session_t qp_attach_session; + /**< Attach session to queue pair. */ + cryptodev_sym_queue_pair_attach_session_t qp_detach_session; + /**< Detach session from queue pair. */ }; @@ -520,6 +511,13 @@ int rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, */ int rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev); +/** + * @internal + * Create unique device name + */ +int +rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix); + #ifdef __cplusplus } #endif diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map index 9dde0e72..9ac510ec 100644 --- a/lib/librte_cryptodev/rte_cryptodev_version.map +++ b/lib/librte_cryptodev/rte_cryptodev_version.map @@ -46,3 +46,31 @@ DPDK_16.11 { rte_cryptodev_pci_remove; } DPDK_16.07; + +DPDK_17.02 { + global: + + rte_cryptodev_devices_get; + rte_cryptodev_pmd_create_dev_name; + rte_cryptodev_pmd_get_dev; + rte_cryptodev_pmd_get_named_dev; + rte_cryptodev_pmd_is_valid_dev; + rte_cryptodev_sym_capability_check_auth; + rte_cryptodev_sym_capability_check_cipher; + rte_cryptodev_sym_capability_get; + rte_crypto_auth_algorithm_strings; + rte_crypto_auth_operation_strings; + rte_crypto_cipher_algorithm_strings; + rte_crypto_cipher_operation_strings; + +} DPDK_16.11; + +DPDK_17.05 { + global: + + rte_cryptodev_get_auth_algo_enum; + rte_cryptodev_get_cipher_algo_enum; + rte_cryptodev_queue_pair_attach_sym_session; + rte_cryptodev_queue_pair_detach_sym_session; + +} DPDK_17.02; diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile index 4c9af172..3ffb911c 100644 --- a/lib/librte_distributor/Makefile +++ b/lib/librte_distributor/Makefile @@ -42,13 +42,20 @@ EXPORT_MAP := rte_distributor_version.map LIBABIVER := 1 # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor_v20.c +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor.c +ifeq ($(CONFIG_RTE_ARCH_X86),y) +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c +# distributor SIMD algo needs SSE4.2 support +ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_2,$(CFLAGS)),) +CFLAGS_rte_distributor_match_sse.o += -msse4.2 +endif +else +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_generic.c +endif + # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += lib/librte_mbuf - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c index f3f778c9..e4dfa7f0 100644 --- a/lib/librte_distributor/rte_distributor.c +++ b/lib/librte_distributor/rte_distributor.c @@ -1,8 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -36,367 +35,492 @@ #include <string.h> #include <rte_mbuf.h> #include <rte_memory.h> +#include <rte_cycles.h> +#include <rte_compat.h> #include <rte_memzone.h> #include <rte_errno.h> #include <rte_string_fns.h> #include <rte_eal_memconfig.h> +#include <rte_compat.h> +#include "rte_distributor_private.h" #include "rte_distributor.h" +#include "rte_distributor_v20.h" +#include "rte_distributor_v1705.h" -#define NO_FLAGS 0 -#define RTE_DISTRIB_PREFIX "DT_" - -/* we will use the bottom four bits of pointer for flags, shifting out - * the top four bits to make room (since a 64-bit pointer actually only uses - * 48 bits). An arithmetic-right-shift will then appropriately restore the - * original pointer value with proper sign extension into the top bits. */ -#define RTE_DISTRIB_FLAG_BITS 4 -#define RTE_DISTRIB_FLAGS_MASK (0x0F) -#define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ -#define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ -#define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ - -#define RTE_DISTRIB_BACKLOG_SIZE 8 -#define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) - -#define RTE_DISTRIB_MAX_RETURNS 128 -#define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) - -/** - * Maximum number of workers allowed. - * Be aware of increasing the limit, becaus it is limited by how we track - * in-flight tags. See @in_flight_bitmask and @rte_distributor_process - */ -#define RTE_DISTRIB_MAX_WORKERS 64 - -/** - * Buffer structure used to pass the pointer data between cores. This is cache - * line aligned, but to improve performance and prevent adjacent cache-line - * prefetches of buffers for other workers, e.g. when worker 1's buffer is on - * the next cache line to worker 0, we pad this out to three cache lines. - * Only 64-bits of the memory is actually used though. - */ -union rte_distributor_buffer { - volatile int64_t bufptr64; - char pad[RTE_CACHE_LINE_SIZE*3]; -} __rte_cache_aligned; - -struct rte_distributor_backlog { - unsigned start; - unsigned count; - int64_t pkts[RTE_DISTRIB_BACKLOG_SIZE]; -}; +TAILQ_HEAD(rte_dist_burst_list, rte_distributor); -struct rte_distributor_returned_pkts { - unsigned start; - unsigned count; - struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; +static struct rte_tailq_elem rte_dist_burst_tailq = { + .name = "RTE_DIST_BURST", }; +EAL_REGISTER_TAILQ(rte_dist_burst_tailq) -struct rte_distributor { - TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ - - char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ - unsigned num_workers; /**< Number of workers polling */ - - uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; - /**< Tracks the tag being processed per core */ - uint64_t in_flight_bitmask; - /**< on/off bits for in-flight tags. - * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then - * the bitmask has to expand. - */ +/**** APIs called by workers ****/ - struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; +/**** Burst Packet APIs called by workers ****/ - union rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; +void +rte_distributor_request_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, + unsigned int count) +{ + struct rte_distributor_buffer *buf = &(d->bufs[worker_id]); + unsigned int i; - struct rte_distributor_returned_pkts returns; -}; + volatile int64_t *retptr64; -TAILQ_HEAD(rte_distributor_list, rte_distributor); + if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) { + rte_distributor_request_pkt_v20(d->d_v20, + worker_id, oldpkt[0]); + return; + } -static struct rte_tailq_elem rte_distributor_tailq = { - .name = "RTE_DISTRIBUTOR", -}; -EAL_REGISTER_TAILQ(rte_distributor_tailq) + retptr64 = &(buf->retptr64[0]); + /* Spin while handshake bits are set (scheduler clears it) */ + while (unlikely(*retptr64 & RTE_DISTRIB_GET_BUF)) { + rte_pause(); + uint64_t t = rte_rdtsc()+100; -/**** APIs called by workers ****/ + while (rte_rdtsc() < t) + rte_pause(); + } -void -rte_distributor_request_pkt(struct rte_distributor *d, - unsigned worker_id, struct rte_mbuf *oldpkt) -{ - union rte_distributor_buffer *buf = &d->bufs[worker_id]; - int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) - | RTE_DISTRIB_GET_BUF; - while (unlikely(buf->bufptr64 & RTE_DISTRIB_FLAGS_MASK)) - rte_pause(); - buf->bufptr64 = req; + /* + * OK, if we've got here, then the scheduler has just cleared the + * handshake bits. Populate the retptrs with returning packets. + */ + + for (i = count; i < RTE_DIST_BURST_SIZE; i++) + buf->retptr64[i] = 0; + + /* Set Return bit for each packet returned */ + for (i = count; i-- > 0; ) + buf->retptr64[i] = + (((int64_t)(uintptr_t)(oldpkt[i])) << + RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF; + + /* + * Finally, set the GET_BUF to signal to distributor that cache + * line is ready for processing + */ + *retptr64 |= RTE_DISTRIB_GET_BUF; } +BIND_DEFAULT_SYMBOL(rte_distributor_request_pkt, _v1705, 17.05); +MAP_STATIC_SYMBOL(void rte_distributor_request_pkt(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, + unsigned int count), + rte_distributor_request_pkt_v1705); -struct rte_mbuf * -rte_distributor_poll_pkt(struct rte_distributor *d, - unsigned worker_id) +int +rte_distributor_poll_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **pkts) { - union rte_distributor_buffer *buf = &d->bufs[worker_id]; - if (buf->bufptr64 & RTE_DISTRIB_GET_BUF) - return NULL; + struct rte_distributor_buffer *buf = &d->bufs[worker_id]; + uint64_t ret; + int count = 0; + unsigned int i; + + if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) { + pkts[0] = rte_distributor_poll_pkt_v20(d->d_v20, worker_id); + return (pkts[0]) ? 1 : 0; + } + + /* If bit is set, return */ + if (buf->bufptr64[0] & RTE_DISTRIB_GET_BUF) + return -1; /* since bufptr64 is signed, this should be an arithmetic shift */ - int64_t ret = buf->bufptr64 >> RTE_DISTRIB_FLAG_BITS; - return (struct rte_mbuf *)((uintptr_t)ret); + for (i = 0; i < RTE_DIST_BURST_SIZE; i++) { + if (likely(buf->bufptr64[i] & RTE_DISTRIB_VALID_BUF)) { + ret = buf->bufptr64[i] >> RTE_DISTRIB_FLAG_BITS; + pkts[count++] = (struct rte_mbuf *)((uintptr_t)(ret)); + } + } + + /* + * so now we've got the contents of the cacheline into an array of + * mbuf pointers, so toggle the bit so scheduler can start working + * on the next cacheline while we're working. + */ + buf->bufptr64[0] |= RTE_DISTRIB_GET_BUF; + + return count; } +BIND_DEFAULT_SYMBOL(rte_distributor_poll_pkt, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_distributor_poll_pkt(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **pkts), + rte_distributor_poll_pkt_v1705); -struct rte_mbuf * -rte_distributor_get_pkt(struct rte_distributor *d, - unsigned worker_id, struct rte_mbuf *oldpkt) +int +rte_distributor_get_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **pkts, + struct rte_mbuf **oldpkt, unsigned int return_count) { - struct rte_mbuf *ret; - rte_distributor_request_pkt(d, worker_id, oldpkt); - while ((ret = rte_distributor_poll_pkt(d, worker_id)) == NULL) - rte_pause(); - return ret; + int count; + + if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) { + if (return_count <= 1) { + pkts[0] = rte_distributor_get_pkt_v20(d->d_v20, + worker_id, oldpkt[0]); + return (pkts[0]) ? 1 : 0; + } else + return -EINVAL; + } + + rte_distributor_request_pkt(d, worker_id, oldpkt, return_count); + + count = rte_distributor_poll_pkt(d, worker_id, pkts); + while (count == -1) { + uint64_t t = rte_rdtsc() + 100; + + while (rte_rdtsc() < t) + rte_pause(); + + count = rte_distributor_poll_pkt(d, worker_id, pkts); + } + return count; } +BIND_DEFAULT_SYMBOL(rte_distributor_get_pkt, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_distributor_get_pkt(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **pkts, + struct rte_mbuf **oldpkt, unsigned int return_count), + rte_distributor_get_pkt_v1705); int -rte_distributor_return_pkt(struct rte_distributor *d, - unsigned worker_id, struct rte_mbuf *oldpkt) +rte_distributor_return_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, int num) { - union rte_distributor_buffer *buf = &d->bufs[worker_id]; - uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) - | RTE_DISTRIB_RETURN_BUF; - buf->bufptr64 = req; - return 0; -} + struct rte_distributor_buffer *buf = &d->bufs[worker_id]; + unsigned int i; + + if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) { + if (num == 1) + return rte_distributor_return_pkt_v20(d->d_v20, + worker_id, oldpkt[0]); + else + return -EINVAL; + } -/**** APIs called on distributor core ***/ + for (i = 0; i < RTE_DIST_BURST_SIZE; i++) + /* Switch off the return bit first */ + buf->retptr64[i] &= ~RTE_DISTRIB_RETURN_BUF; -/* as name suggests, adds a packet to the backlog for a particular worker */ -static int -add_to_backlog(struct rte_distributor_backlog *bl, int64_t item) -{ - if (bl->count == RTE_DISTRIB_BACKLOG_SIZE) - return -1; + for (i = num; i-- > 0; ) + buf->retptr64[i] = (((int64_t)(uintptr_t)oldpkt[i]) << + RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF; + + /* set the GET_BUF but even if we got no returns */ + buf->retptr64[0] |= RTE_DISTRIB_GET_BUF; - bl->pkts[(bl->start + bl->count++) & (RTE_DISTRIB_BACKLOG_MASK)] - = item; return 0; } +BIND_DEFAULT_SYMBOL(rte_distributor_return_pkt, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_distributor_return_pkt(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, int num), + rte_distributor_return_pkt_v1705); -/* takes the next packet for a worker off the backlog */ -static int64_t -backlog_pop(struct rte_distributor_backlog *bl) -{ - bl->count--; - return bl->pkts[bl->start++ & RTE_DISTRIB_BACKLOG_MASK]; -} +/**** APIs called on distributor core ***/ /* stores a packet returned from a worker inside the returns array */ static inline void store_return(uintptr_t oldbuf, struct rte_distributor *d, - unsigned *ret_start, unsigned *ret_count) + unsigned int *ret_start, unsigned int *ret_count) { - /* store returns in a circular buffer - code is branch-free */ + if (!oldbuf) + return; + /* store returns in a circular buffer */ d->returns.mbufs[(*ret_start + *ret_count) & RTE_DISTRIB_RETURNS_MASK] = (void *)oldbuf; - *ret_start += (*ret_count == RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf); - *ret_count += (*ret_count != RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf); + *ret_start += (*ret_count == RTE_DISTRIB_RETURNS_MASK); + *ret_count += (*ret_count != RTE_DISTRIB_RETURNS_MASK); } -static inline void -handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) +/* + * Match then flow_ids (tags) of the incoming packets to the flow_ids + * of the inflight packets (both inflight on the workers and in each worker + * backlog). This will then allow us to pin those packets to the relevant + * workers to give us our atomic flow pinning. + */ +void +find_match_scalar(struct rte_distributor *d, + uint16_t *data_ptr, + uint16_t *output_ptr) { - d->in_flight_tags[wkr] = 0; - d->in_flight_bitmask &= ~(1UL << wkr); - d->bufs[wkr].bufptr64 = 0; - if (unlikely(d->backlog[wkr].count != 0)) { - /* On return of a packet, we need to move the - * queued packets for this core elsewhere. - * Easiest solution is to set things up for - * a recursive call. That will cause those - * packets to be queued up for the next free - * core, i.e. it will return as soon as a - * core becomes free to accept the first - * packet, as subsequent ones will be added to - * the backlog for that core. - */ - struct rte_mbuf *pkts[RTE_DISTRIB_BACKLOG_SIZE]; - unsigned i; - struct rte_distributor_backlog *bl = &d->backlog[wkr]; - - for (i = 0; i < bl->count; i++) { - unsigned idx = (bl->start + i) & - RTE_DISTRIB_BACKLOG_MASK; - pkts[i] = (void *)((uintptr_t)(bl->pkts[idx] >> + struct rte_distributor_backlog *bl; + uint16_t i, j, w; + + /* + * Function overview: + * 1. Loop through all worker ID's + * 2. Compare the current inflights to the incoming tags + * 3. Compare the current backlog to the incoming tags + * 4. Add any matches to the output + */ + + for (j = 0 ; j < RTE_DIST_BURST_SIZE; j++) + output_ptr[j] = 0; + + for (i = 0; i < d->num_workers; i++) { + bl = &d->backlog[i]; + + for (j = 0; j < RTE_DIST_BURST_SIZE ; j++) + for (w = 0; w < RTE_DIST_BURST_SIZE; w++) + if (d->in_flight_tags[i][j] == data_ptr[w]) { + output_ptr[j] = i+1; + break; + } + for (j = 0; j < RTE_DIST_BURST_SIZE; j++) + for (w = 0; w < RTE_DIST_BURST_SIZE; w++) + if (bl->tags[j] == data_ptr[w]) { + output_ptr[j] = i+1; + break; + } + } + + /* + * At this stage, the output contains 8 16-bit values, with + * each non-zero value containing the worker ID on which the + * corresponding flow is pinned to. + */ +} + + +/* + * When the handshake bits indicate that there are packets coming + * back from the worker, this function is called to copy and store + * the valid returned pointers (store_return). + */ +static unsigned int +handle_returns(struct rte_distributor *d, unsigned int wkr) +{ + struct rte_distributor_buffer *buf = &(d->bufs[wkr]); + uintptr_t oldbuf; + unsigned int ret_start = d->returns.start, + ret_count = d->returns.count; + unsigned int count = 0; + unsigned int i; + + if (buf->retptr64[0] & RTE_DISTRIB_GET_BUF) { + for (i = 0; i < RTE_DIST_BURST_SIZE; i++) { + if (buf->retptr64[i] & RTE_DISTRIB_RETURN_BUF) { + oldbuf = ((uintptr_t)(buf->retptr64[i] >> RTE_DISTRIB_FLAG_BITS)); + /* store returns in a circular buffer */ + store_return(oldbuf, d, &ret_start, &ret_count); + count++; + buf->retptr64[i] &= ~RTE_DISTRIB_RETURN_BUF; + } } - /* recursive call. - * Note that the tags were set before first level call - * to rte_distributor_process. - */ - rte_distributor_process(d, pkts, i); - bl->count = bl->start = 0; + d->returns.start = ret_start; + d->returns.count = ret_count; + /* Clear for the worker to populate with more returns */ + buf->retptr64[0] = 0; } + return count; } -/* this function is called when process() fn is called without any new - * packets. It goes through all the workers and clears any returned packets - * to do a partial flush. +/* + * This function releases a burst (cache line) to a worker. + * It is called from the process function when a cacheline is + * full to make room for more packets for that worker, or when + * all packets have been assigned to bursts and need to be flushed + * to the workers. + * It also needs to wait for any outstanding packets from the worker + * before sending out new packets. */ -static int -process_returns(struct rte_distributor *d) +static unsigned int +release(struct rte_distributor *d, unsigned int wkr) { - unsigned wkr; - unsigned flushed = 0; - unsigned ret_start = d->returns.start, - ret_count = d->returns.count; + struct rte_distributor_buffer *buf = &(d->bufs[wkr]); + unsigned int i; - for (wkr = 0; wkr < d->num_workers; wkr++) { + while (!(d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF)) + rte_pause(); - const int64_t data = d->bufs[wkr].bufptr64; - uintptr_t oldbuf = 0; + handle_returns(d, wkr); - if (data & RTE_DISTRIB_GET_BUF) { - flushed++; - if (d->backlog[wkr].count) - d->bufs[wkr].bufptr64 = - backlog_pop(&d->backlog[wkr]); - else { - d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF; - d->in_flight_tags[wkr] = 0; - d->in_flight_bitmask &= ~(1UL << wkr); - } - oldbuf = data >> RTE_DISTRIB_FLAG_BITS; - } else if (data & RTE_DISTRIB_RETURN_BUF) { - handle_worker_shutdown(d, wkr); - oldbuf = data >> RTE_DISTRIB_FLAG_BITS; - } + buf->count = 0; - store_return(oldbuf, d, &ret_start, &ret_count); + for (i = 0; i < d->backlog[wkr].count; i++) { + d->bufs[wkr].bufptr64[i] = d->backlog[wkr].pkts[i] | + RTE_DISTRIB_GET_BUF | RTE_DISTRIB_VALID_BUF; + d->in_flight_tags[wkr][i] = d->backlog[wkr].tags[i]; + } + buf->count = i; + for ( ; i < RTE_DIST_BURST_SIZE ; i++) { + buf->bufptr64[i] = RTE_DISTRIB_GET_BUF; + d->in_flight_tags[wkr][i] = 0; } - d->returns.start = ret_start; - d->returns.count = ret_count; + d->backlog[wkr].count = 0; + + /* Clear the GET bit */ + buf->bufptr64[0] &= ~RTE_DISTRIB_GET_BUF; + return buf->count; - return flushed; } + /* process a set of packets to distribute them to workers */ int -rte_distributor_process(struct rte_distributor *d, - struct rte_mbuf **mbufs, unsigned num_mbufs) +rte_distributor_process_v1705(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned int num_mbufs) { - unsigned next_idx = 0; - unsigned wkr = 0; + unsigned int next_idx = 0; + static unsigned int wkr; struct rte_mbuf *next_mb = NULL; int64_t next_value = 0; - uint32_t new_tag = 0; - unsigned ret_start = d->returns.start, - ret_count = d->returns.count; + uint16_t new_tag = 0; + uint16_t flows[RTE_DIST_BURST_SIZE] __rte_cache_aligned; + unsigned int i, j, w, wid; + + if (d->alg_type == RTE_DIST_ALG_SINGLE) { + /* Call the old API */ + return rte_distributor_process_v20(d->d_v20, mbufs, num_mbufs); + } + + if (unlikely(num_mbufs == 0)) { + /* Flush out all non-full cache-lines to workers. */ + for (wid = 0 ; wid < d->num_workers; wid++) { + if ((d->bufs[wid].bufptr64[0] & RTE_DISTRIB_GET_BUF)) { + release(d, wid); + handle_returns(d, wid); + } + } + return 0; + } - if (unlikely(num_mbufs == 0)) - return process_returns(d); + while (next_idx < num_mbufs) { + uint16_t matches[RTE_DIST_BURST_SIZE]; + unsigned int pkts; - while (next_idx < num_mbufs || next_mb != NULL) { + if (d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF) + d->bufs[wkr].count = 0; - int64_t data = d->bufs[wkr].bufptr64; - uintptr_t oldbuf = 0; + if ((num_mbufs - next_idx) < RTE_DIST_BURST_SIZE) + pkts = num_mbufs - next_idx; + else + pkts = RTE_DIST_BURST_SIZE; + + for (i = 0; i < pkts; i++) { + if (mbufs[next_idx + i]) { + /* flows have to be non-zero */ + flows[i] = mbufs[next_idx + i]->hash.usr | 1; + } else + flows[i] = 0; + } + for (; i < RTE_DIST_BURST_SIZE; i++) + flows[i] = 0; + + switch (d->dist_match_fn) { + case RTE_DIST_MATCH_VECTOR: + find_match_vec(d, &flows[0], &matches[0]); + break; + default: + find_match_scalar(d, &flows[0], &matches[0]); + } + + /* + * Matches array now contain the intended worker ID (+1) of + * the incoming packets. Any zeroes need to be assigned + * workers. + */ + + for (j = 0; j < pkts; j++) { - if (!next_mb) { next_mb = mbufs[next_idx++]; - next_value = (((int64_t)(uintptr_t)next_mb) - << RTE_DISTRIB_FLAG_BITS); + next_value = (((int64_t)(uintptr_t)next_mb) << + RTE_DISTRIB_FLAG_BITS); /* * User is advocated to set tag vaue for each * mbuf before calling rte_distributor_process. * User defined tags are used to identify flows, * or sessions. */ - new_tag = next_mb->hash.usr; + /* flows MUST be non-zero */ + new_tag = (uint16_t)(next_mb->hash.usr) | 1; /* - * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 - * then the size of match has to be expanded. - */ - uint64_t match = 0; - unsigned i; - /* - * to scan for a match use "xor" and "not" to get a 0/1 - * value, then use shifting to merge to single "match" - * variable, where a one-bit indicates a match for the - * worker given by the bit-position + * Uncommenting the next line will cause the find_match + * function to be optimised out, making this function + * do parallel (non-atomic) distribution */ - for (i = 0; i < d->num_workers; i++) - match |= (!(d->in_flight_tags[i] ^ new_tag) - << i); - - /* Only turned-on bits are considered as match */ - match &= d->in_flight_bitmask; - - if (match) { - next_mb = NULL; - unsigned worker = __builtin_ctzl(match); - if (add_to_backlog(&d->backlog[worker], - next_value) < 0) - next_idx--; + /* matches[j] = 0; */ + + if (matches[j]) { + struct rte_distributor_backlog *bl = + &d->backlog[matches[j]-1]; + if (unlikely(bl->count == + RTE_DIST_BURST_SIZE)) { + release(d, matches[j]-1); + } + + /* Add to worker that already has flow */ + unsigned int idx = bl->count++; + + bl->tags[idx] = new_tag; + bl->pkts[idx] = next_value; + + } else { + struct rte_distributor_backlog *bl = + &d->backlog[wkr]; + if (unlikely(bl->count == + RTE_DIST_BURST_SIZE)) { + release(d, wkr); + } + + /* Add to current worker worker */ + unsigned int idx = bl->count++; + + bl->tags[idx] = new_tag; + bl->pkts[idx] = next_value; + /* + * Now that we've just added an unpinned flow + * to a worker, we need to ensure that all + * other packets with that same flow will go + * to the same worker in this burst. + */ + for (w = j; w < pkts; w++) + if (flows[w] == new_tag) + matches[w] = wkr+1; } } - - if ((data & RTE_DISTRIB_GET_BUF) && - (d->backlog[wkr].count || next_mb)) { - - if (d->backlog[wkr].count) - d->bufs[wkr].bufptr64 = - backlog_pop(&d->backlog[wkr]); - - else { - d->bufs[wkr].bufptr64 = next_value; - d->in_flight_tags[wkr] = new_tag; - d->in_flight_bitmask |= (1UL << wkr); - next_mb = NULL; - } - oldbuf = data >> RTE_DISTRIB_FLAG_BITS; - } else if (data & RTE_DISTRIB_RETURN_BUF) { - handle_worker_shutdown(d, wkr); - oldbuf = data >> RTE_DISTRIB_FLAG_BITS; - } - - /* store returns in a circular buffer */ - store_return(oldbuf, d, &ret_start, &ret_count); - - if (++wkr == d->num_workers) + wkr++; + if (wkr >= d->num_workers) wkr = 0; } - /* to finish, check all workers for backlog and schedule work for them - * if they are ready */ - for (wkr = 0; wkr < d->num_workers; wkr++) - if (d->backlog[wkr].count && - (d->bufs[wkr].bufptr64 & RTE_DISTRIB_GET_BUF)) { - - int64_t oldbuf = d->bufs[wkr].bufptr64 >> - RTE_DISTRIB_FLAG_BITS; - store_return(oldbuf, d, &ret_start, &ret_count); - d->bufs[wkr].bufptr64 = backlog_pop(&d->backlog[wkr]); - } + /* Flush out all non-full cache-lines to workers. */ + for (wid = 0 ; wid < d->num_workers; wid++) + if ((d->bufs[wid].bufptr64[0] & RTE_DISTRIB_GET_BUF)) + release(d, wid); - d->returns.start = ret_start; - d->returns.count = ret_count; return num_mbufs; } +BIND_DEFAULT_SYMBOL(rte_distributor_process, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_distributor_process(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned int num_mbufs), + rte_distributor_process_v1705); /* return to the caller, packets returned from workers */ int -rte_distributor_returned_pkts(struct rte_distributor *d, - struct rte_mbuf **mbufs, unsigned max_mbufs) +rte_distributor_returned_pkts_v1705(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned int max_mbufs) { struct rte_distributor_returned_pkts *returns = &d->returns; - unsigned retval = (max_mbufs < returns->count) ? + unsigned int retval = (max_mbufs < returns->count) ? max_mbufs : returns->count; - unsigned i; + unsigned int i; + + if (d->alg_type == RTE_DIST_ALG_SINGLE) { + /* Call the old API */ + return rte_distributor_returned_pkts_v20(d->d_v20, + mbufs, max_mbufs); + } for (i = 0; i < retval; i++) { - unsigned idx = (returns->start + i) & RTE_DISTRIB_RETURNS_MASK; + unsigned int idx = (returns->start + i) & + RTE_DISTRIB_RETURNS_MASK; + mbufs[i] = returns->mbufs[idx]; } returns->start += i; @@ -404,15 +528,19 @@ rte_distributor_returned_pkts(struct rte_distributor *d, return retval; } - -/* return the number of packets in-flight in a distributor, i.e. packets - * being workered on or queued up in a backlog. */ -static inline unsigned +BIND_DEFAULT_SYMBOL(rte_distributor_returned_pkts, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_distributor_returned_pkts(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned int max_mbufs), + rte_distributor_returned_pkts_v1705); + +/* + * Return the number of packets in-flight in a distributor, i.e. packets + * being workered on or queued up in a backlog. + */ +static inline unsigned int total_outstanding(const struct rte_distributor *d) { - unsigned wkr, total_outstanding; - - total_outstanding = __builtin_popcountl(d->in_flight_bitmask); + unsigned int wkr, total_outstanding = 0; for (wkr = 0; wkr < d->num_workers; wkr++) total_outstanding += d->backlog[wkr].count; @@ -420,45 +548,96 @@ total_outstanding(const struct rte_distributor *d) return total_outstanding; } -/* flush the distributor, so that there are no outstanding packets in flight or - * queued up. */ +/* + * Flush the distributor, so that there are no outstanding packets in flight or + * queued up. + */ int -rte_distributor_flush(struct rte_distributor *d) +rte_distributor_flush_v1705(struct rte_distributor *d) { - const unsigned flushed = total_outstanding(d); + unsigned int flushed; + unsigned int wkr; + + if (d->alg_type == RTE_DIST_ALG_SINGLE) { + /* Call the old API */ + return rte_distributor_flush_v20(d->d_v20); + } + + flushed = total_outstanding(d); while (total_outstanding(d) > 0) rte_distributor_process(d, NULL, 0); + /* + * Send empty burst to all workers to allow them to exit + * gracefully, should they need to. + */ + rte_distributor_process(d, NULL, 0); + + for (wkr = 0; wkr < d->num_workers; wkr++) + handle_returns(d, wkr); + return flushed; } +BIND_DEFAULT_SYMBOL(rte_distributor_flush, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_distributor_flush(struct rte_distributor *d), + rte_distributor_flush_v1705); /* clears the internal returns array in the distributor */ void -rte_distributor_clear_returns(struct rte_distributor *d) +rte_distributor_clear_returns_v1705(struct rte_distributor *d) { - d->returns.start = d->returns.count = 0; -#ifndef __OPTIMIZE__ - memset(d->returns.mbufs, 0, sizeof(d->returns.mbufs)); -#endif + unsigned int wkr; + + if (d->alg_type == RTE_DIST_ALG_SINGLE) { + /* Call the old API */ + rte_distributor_clear_returns_v20(d->d_v20); + return; + } + + /* throw away returns, so workers can exit */ + for (wkr = 0; wkr < d->num_workers; wkr++) + d->bufs[wkr].retptr64[0] = 0; } +BIND_DEFAULT_SYMBOL(rte_distributor_clear_returns, _v1705, 17.05); +MAP_STATIC_SYMBOL(void rte_distributor_clear_returns(struct rte_distributor *d), + rte_distributor_clear_returns_v1705); /* creates a distributor instance */ struct rte_distributor * -rte_distributor_create(const char *name, - unsigned socket_id, - unsigned num_workers) +rte_distributor_create_v1705(const char *name, + unsigned int socket_id, + unsigned int num_workers, + unsigned int alg_type) { struct rte_distributor *d; - struct rte_distributor_list *distributor_list; + struct rte_dist_burst_list *dist_burst_list; char mz_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; + unsigned int i; + + /* TODO Reorganise function properly around RTE_DIST_ALG_SINGLE/BURST */ /* compilation-time checks */ RTE_BUILD_BUG_ON((sizeof(*d) & RTE_CACHE_LINE_MASK) != 0); RTE_BUILD_BUG_ON((RTE_DISTRIB_MAX_WORKERS & 7) != 0); - RTE_BUILD_BUG_ON(RTE_DISTRIB_MAX_WORKERS > - sizeof(d->in_flight_bitmask) * CHAR_BIT); + + if (alg_type == RTE_DIST_ALG_SINGLE) { + d = malloc(sizeof(struct rte_distributor)); + if (d == NULL) { + rte_errno = ENOMEM; + return NULL; + } + d->d_v20 = rte_distributor_create_v20(name, + socket_id, num_workers); + if (d->d_v20 == NULL) { + free(d); + /* rte_errno will have been set */ + return NULL; + } + d->alg_type = alg_type; + return d; + } if (name == NULL || num_workers >= RTE_DISTRIB_MAX_WORKERS) { rte_errno = EINVAL; @@ -475,13 +654,34 @@ rte_distributor_create(const char *name, d = mz->addr; snprintf(d->name, sizeof(d->name), "%s", name); d->num_workers = num_workers; + d->alg_type = alg_type; + +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) + d->dist_match_fn = RTE_DIST_MATCH_VECTOR; + else +#endif + d->dist_match_fn = RTE_DIST_MATCH_SCALAR; + + /* + * Set up the backog tags so they're pointing at the second cache + * line for performance during flow matching + */ + for (i = 0 ; i < num_workers ; i++) + d->backlog[i].tags = &d->in_flight_tags[i][RTE_DIST_BURST_SIZE]; + + dist_burst_list = RTE_TAILQ_CAST(rte_dist_burst_tailq.head, + rte_dist_burst_list); - distributor_list = RTE_TAILQ_CAST(rte_distributor_tailq.head, - rte_distributor_list); rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); - TAILQ_INSERT_TAIL(distributor_list, d, next); + TAILQ_INSERT_TAIL(dist_burst_list, d, next); rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); return d; } +BIND_DEFAULT_SYMBOL(rte_distributor_create, _v1705, 17.05); +MAP_STATIC_SYMBOL(struct rte_distributor *rte_distributor_create( + const char *name, unsigned int socket_id, + unsigned int num_workers, unsigned int alg_type), + rte_distributor_create_v1705); diff --git a/lib/librte_distributor/rte_distributor.h b/lib/librte_distributor/rte_distributor.h index 7d36bc8a..9b9efdbe 100644 --- a/lib/librte_distributor/rte_distributor.h +++ b/lib/librte_distributor/rte_distributor.h @@ -1,8 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright(c) 2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,8 +30,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RTE_DISTRIBUTE_H_ -#define _RTE_DISTRIBUTE_H_ +#ifndef _RTE_DISTRIBUTOR_H_ +#define _RTE_DISTRIBUTOR_H_ /** * @file @@ -46,7 +45,12 @@ extern "C" { #endif -#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ +/* Type of distribution (burst/single) */ +enum rte_distributor_alg_type { + RTE_DIST_ALG_BURST = 0, + RTE_DIST_ALG_SINGLE, + RTE_DIST_NUM_ALG_TYPES +}; struct rte_distributor; struct rte_mbuf; @@ -64,12 +68,17 @@ struct rte_mbuf; * @param num_workers * The maximum number of workers that will request packets from this * distributor + * @param alg_type + * Call the legacy API, or use the new burst API. legacy uses 32-bit + * flow ID, and works on a single packet at a time. Latest uses 15- + * bit flow ID and works on up to 8 packets at a time to worers. * @return * The newly created distributor instance */ struct rte_distributor * -rte_distributor_create(const char *name, unsigned socket_id, - unsigned num_workers); +rte_distributor_create(const char *name, unsigned int socket_id, + unsigned int num_workers, + unsigned int alg_type); /* *** APIS to be called on the distributor lcore *** */ /* @@ -85,7 +94,8 @@ rte_distributor_create(const char *name, unsigned socket_id, /** * Process a set of packets by distributing them among workers that request * packets. The distributor will ensure that no two packets that have the - * same flow id, or tag, in the mbuf will be procesed at the same time. + * same flow id, or tag, in the mbuf will be processed on different cores at + * the same time. * * The user is advocated to set tag for each mbuf before calling this function. * If user doesn't set the tag, the tag value can be various values depending on @@ -104,7 +114,7 @@ rte_distributor_create(const char *name, unsigned socket_id, */ int rte_distributor_process(struct rte_distributor *d, - struct rte_mbuf **mbufs, unsigned num_mbufs); + struct rte_mbuf **mbufs, unsigned int num_mbufs); /** * Get a set of mbufs that have been returned to the distributor by workers @@ -122,7 +132,7 @@ rte_distributor_process(struct rte_distributor *d, */ int rte_distributor_returned_pkts(struct rte_distributor *d, - struct rte_mbuf **mbufs, unsigned max_mbufs); + struct rte_mbuf **mbufs, unsigned int max_mbufs); /** * Flush the distributor component, so that there are no in-flight or @@ -161,7 +171,7 @@ rte_distributor_clear_returns(struct rte_distributor *d); */ /** - * API called by a worker to get a new packet to process. Any previous packet + * API called by a worker to get new packets to process. Any previous packets * given to the worker is assumed to have completed processing, and may be * optionally returned to the distributor via the oldpkt parameter. * @@ -170,15 +180,20 @@ rte_distributor_clear_returns(struct rte_distributor *d); * @param worker_id * The worker instance number to use - must be less that num_workers passed * at distributor creation time. + * @param pkts + * The mbufs pointer array to be filled in (up to 8 packets) * @param oldpkt * The previous packet, if any, being processed by the worker + * @param retcount + * The number of packets being returned * * @return - * A new packet to be processed by the worker thread. + * The number of packets in the pkts array */ -struct rte_mbuf * +int rte_distributor_get_pkt(struct rte_distributor *d, - unsigned worker_id, struct rte_mbuf *oldpkt); + unsigned int worker_id, struct rte_mbuf **pkts, + struct rte_mbuf **oldpkt, unsigned int retcount); /** * API called by a worker to return a completed packet without requesting a @@ -189,23 +204,25 @@ rte_distributor_get_pkt(struct rte_distributor *d, * @param worker_id * The worker instance number to use - must be less that num_workers passed * at distributor creation time. - * @param mbuf - * The previous packet being processed by the worker + * @param oldpkt + * The previous packets being processed by the worker + * @param num + * The number of packets in the oldpkt array */ int -rte_distributor_return_pkt(struct rte_distributor *d, unsigned worker_id, - struct rte_mbuf *mbuf); +rte_distributor_return_pkt(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, int num); /** * API called by a worker to request a new packet to process. * Any previous packet given to the worker is assumed to have completed * processing, and may be optionally returned to the distributor via * the oldpkt parameter. - * Unlike rte_distributor_get_pkt(), this function does not wait for a new - * packet to be provided by the distributor. + * Unlike rte_distributor_get_pkt_burst(), this function does not wait for a + * new packet to be provided by the distributor. * - * NOTE: after calling this function, rte_distributor_poll_pkt() should - * be used to poll for the packet requested. The rte_distributor_get_pkt() + * NOTE: after calling this function, rte_distributor_poll_pkt_burst() should + * be used to poll for the packet requested. The rte_distributor_get_pkt_burst() * API should *not* be used to try and retrieve the new packet. * * @param d @@ -214,11 +231,14 @@ rte_distributor_return_pkt(struct rte_distributor *d, unsigned worker_id, * The worker instance number to use - must be less that num_workers passed * at distributor creation time. * @param oldpkt - * The previous packet, if any, being processed by the worker + * The returning packets, if any, processed by the worker + * @param count + * The number of returning packets */ void rte_distributor_request_pkt(struct rte_distributor *d, - unsigned worker_id, struct rte_mbuf *oldpkt); + unsigned int worker_id, struct rte_mbuf **oldpkt, + unsigned int count); /** * API called by a worker to check for a new packet that was previously @@ -231,14 +251,16 @@ rte_distributor_request_pkt(struct rte_distributor *d, * @param worker_id * The worker instance number to use - must be less that num_workers passed * at distributor creation time. + * @param mbufs + * The array of mbufs being given to the worker * * @return - * A new packet to be processed by the worker thread, or NULL if no + * The number of packets being given to the worker thread, zero if no * packet is yet available. */ -struct rte_mbuf * +int rte_distributor_poll_pkt(struct rte_distributor *d, - unsigned worker_id); + unsigned int worker_id, struct rte_mbuf **mbufs); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h b/lib/librte_distributor/rte_distributor_match_generic.c index 8f67a190..4925a788 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h +++ b/lib/librte_distributor/rte_distributor_match_generic.c @@ -1,7 +1,7 @@ -/* +/*- * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,43 +28,16 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_RWLOCK_TILE_H_ -#define _RTE_RWLOCK_TILE_H_ +#include <rte_mbuf.h> +#include "rte_distributor_private.h" +#include "rte_distributor.h" -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_rwlock.h" - -static inline void -rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_read_lock(rwl); -} - -static inline void -rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_read_unlock(rwl); -} - -static inline void -rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_write_lock(rwl); -} - -static inline void -rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +void +find_match_vec(struct rte_distributor *d, + uint16_t *data_ptr, + uint16_t *output_ptr) { - rte_rwlock_write_unlock(rwl); + find_match_scalar(d, data_ptr, output_ptr); } - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_RWLOCK_TILE_H_ */ diff --git a/lib/librte_distributor/rte_distributor_match_sse.c b/lib/librte_distributor/rte_distributor_match_sse.c new file mode 100644 index 00000000..44935a69 --- /dev/null +++ b/lib/librte_distributor/rte_distributor_match_sse.c @@ -0,0 +1,114 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_mbuf.h> +#include "rte_distributor_private.h" +#include "rte_distributor.h" +#include "smmintrin.h" +#include "nmmintrin.h" + + +void +find_match_vec(struct rte_distributor *d, + uint16_t *data_ptr, + uint16_t *output_ptr) +{ + /* Setup */ + __m128i incoming_fids; + __m128i inflight_fids; + __m128i preflight_fids; + __m128i wkr; + __m128i mask1; + __m128i mask2; + __m128i output; + struct rte_distributor_backlog *bl; + uint16_t i; + + /* + * Function overview: + * 2. Loop through all worker ID's + * 2a. Load the current inflights for that worker into an xmm reg + * 2b. Load the current backlog for that worker into an xmm reg + * 2c. use cmpestrm to intersect flow_ids with backlog and inflights + * 2d. Add any matches to the output + * 3. Write the output xmm (matching worker ids). + */ + + + output = _mm_set1_epi16(0); + incoming_fids = _mm_load_si128((__m128i *)data_ptr); + + for (i = 0; i < d->num_workers; i++) { + bl = &d->backlog[i]; + + inflight_fids = + _mm_load_si128((__m128i *)&(d->in_flight_tags[i])); + preflight_fids = + _mm_load_si128((__m128i *)(bl->tags)); + + /* + * Any incoming_fid that exists anywhere in inflight_fids will + * have 0xffff in same position of the mask as the incoming fid + * Example (shortened to bytes for brevity): + * incoming_fids 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 + * inflight_fids 0x03 0x05 0x07 0x00 0x00 0x00 0x00 0x00 + * mask 0x00 0x00 0xff 0x00 0xff 0x00 0xff 0x00 + */ + + mask1 = _mm_cmpestrm(inflight_fids, 8, incoming_fids, 8, + _SIDD_UWORD_OPS | + _SIDD_CMP_EQUAL_ANY | + _SIDD_UNIT_MASK); + mask2 = _mm_cmpestrm(preflight_fids, 8, incoming_fids, 8, + _SIDD_UWORD_OPS | + _SIDD_CMP_EQUAL_ANY | + _SIDD_UNIT_MASK); + + mask1 = _mm_or_si128(mask1, mask2); + /* + * Now mask contains 0xffff where there's a match. + * Next we need to store the worker_id in the relevant position + * in the output. + */ + + wkr = _mm_set1_epi16(i+1); + mask1 = _mm_and_si128(mask1, wkr); + output = _mm_or_si128(mask1, output); + } + + /* + * At this stage, the output 128-bit contains 8 16-bit values, with + * each non-zero value containing the worker ID on which the + * corresponding flow is pinned to. + */ + _mm_store_si128((__m128i *)output_ptr, output); +} diff --git a/lib/librte_distributor/rte_distributor_private.h b/lib/librte_distributor/rte_distributor_private.h new file mode 100644 index 00000000..250b23e1 --- /dev/null +++ b/lib/librte_distributor/rte_distributor_private.h @@ -0,0 +1,202 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DIST_PRIV_H_ +#define _RTE_DIST_PRIV_H_ + +/** + * @file + * RTE distributor + * + * The distributor is a component which is designed to pass packets + * one-at-a-time to workers, with dynamic load balancing. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define NO_FLAGS 0 +#define RTE_DISTRIB_PREFIX "DT_" + +/* + * We will use the bottom four bits of pointer for flags, shifting out + * the top four bits to make room (since a 64-bit pointer actually only uses + * 48 bits). An arithmetic-right-shift will then appropriately restore the + * original pointer value with proper sign extension into the top bits. + */ +#define RTE_DISTRIB_FLAG_BITS 4 +#define RTE_DISTRIB_FLAGS_MASK (0x0F) +#define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ +#define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ +#define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ +#define RTE_DISTRIB_VALID_BUF (4) /**< set if bufptr contains ptr */ + +#define RTE_DISTRIB_BACKLOG_SIZE 8 +#define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) + +#define RTE_DISTRIB_MAX_RETURNS 128 +#define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) + +/** + * Maximum number of workers allowed. + * Be aware of increasing the limit, becaus it is limited by how we track + * in-flight tags. See in_flight_bitmask and rte_distributor_process + */ +#define RTE_DISTRIB_MAX_WORKERS 64 + +#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ + +/** + * Buffer structure used to pass the pointer data between cores. This is cache + * line aligned, but to improve performance and prevent adjacent cache-line + * prefetches of buffers for other workers, e.g. when worker 1's buffer is on + * the next cache line to worker 0, we pad this out to three cache lines. + * Only 64-bits of the memory is actually used though. + */ +union rte_distributor_buffer_v20 { + volatile int64_t bufptr64; + char pad[RTE_CACHE_LINE_SIZE*3]; +} __rte_cache_aligned; + +/* + * Transfer up to 8 mbufs at a time to/from workers, and + * flow matching algorithm optimised for 8 flow IDs at a time + */ +#define RTE_DIST_BURST_SIZE 8 + +struct rte_distributor_backlog { + unsigned int start; + unsigned int count; + int64_t pkts[RTE_DIST_BURST_SIZE] __rte_cache_aligned; + uint16_t *tags; /* will point to second cacheline of inflights */ +} __rte_cache_aligned; + + +struct rte_distributor_returned_pkts { + unsigned int start; + unsigned int count; + struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; +}; + +struct rte_distributor_v20 { + TAILQ_ENTRY(rte_distributor_v20) next; /**< Next in list. */ + + char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ + unsigned int num_workers; /**< Number of workers polling */ + + uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; + /**< Tracks the tag being processed per core */ + uint64_t in_flight_bitmask; + /**< on/off bits for in-flight tags. + * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then + * the bitmask has to expand. + */ + + struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; + + union rte_distributor_buffer_v20 bufs[RTE_DISTRIB_MAX_WORKERS]; + + struct rte_distributor_returned_pkts returns; +}; + +/* All different signature compare functions */ +enum rte_distributor_match_function { + RTE_DIST_MATCH_SCALAR = 0, + RTE_DIST_MATCH_VECTOR, + RTE_DIST_NUM_MATCH_FNS +}; + +/** + * Buffer structure used to pass the pointer data between cores. This is cache + * line aligned, but to improve performance and prevent adjacent cache-line + * prefetches of buffers for other workers, e.g. when worker 1's buffer is on + * the next cache line to worker 0, we pad this out to two cache lines. + * We can pass up to 8 mbufs at a time in one cacheline. + * There is a separate cacheline for returns in the burst API. + */ +struct rte_distributor_buffer { + volatile int64_t bufptr64[RTE_DIST_BURST_SIZE] + __rte_cache_aligned; /* <= outgoing to worker */ + + int64_t pad1 __rte_cache_aligned; /* <= one cache line */ + + volatile int64_t retptr64[RTE_DIST_BURST_SIZE] + __rte_cache_aligned; /* <= incoming from worker */ + + int64_t pad2 __rte_cache_aligned; /* <= one cache line */ + + int count __rte_cache_aligned; /* <= number of current mbufs */ +}; + +struct rte_distributor { + TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ + + char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ + unsigned int num_workers; /**< Number of workers polling */ + unsigned int alg_type; /**< Number of alg types */ + + /**> + * First cache line in the this array are the tags inflight + * on the worker core. Second cache line are the backlog + * that are going to go to the worker core. + */ + uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2] + __rte_cache_aligned; + + struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS] + __rte_cache_aligned; + + struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; + + struct rte_distributor_returned_pkts returns; + + enum rte_distributor_match_function dist_match_fn; + + struct rte_distributor_v20 *d_v20; +}; + +void +find_match_scalar(struct rte_distributor *d, + uint16_t *data_ptr, + uint16_t *output_ptr); + +void +find_match_vec(struct rte_distributor *d, + uint16_t *data_ptr, + uint16_t *output_ptr); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h b/lib/librte_distributor/rte_distributor_v1705.h index e91f99ee..81b26915 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h +++ b/lib/librte_distributor/rte_distributor_v1705.h @@ -1,7 +1,7 @@ -/* +/*- * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,65 +28,62 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_SPINLOCK_TILE_H_ -#define _RTE_SPINLOCK_TILE_H_ +#ifndef _RTE_DISTRIB_V1705_H_ +#define _RTE_DISTRIB_V1705_H_ -#ifndef RTE_FORCE_INTRINSICS -# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS -#endif +/** + * @file + * RTE distributor + * + * The distributor is a component which is designed to pass packets + * one-at-a-time to workers, with dynamic load balancing. + */ #ifdef __cplusplus extern "C" { #endif -#include <rte_common.h> -#include "generic/rte_spinlock.h" +struct rte_distributor * +rte_distributor_create_v1705(const char *name, unsigned int socket_id, + unsigned int num_workers, + unsigned int alg_type); -static inline int rte_tm_supported(void) -{ - return 0; -} +int +rte_distributor_process_v1705(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned int num_mbufs); -static inline void -rte_spinlock_lock_tm(rte_spinlock_t *sl) -{ - rte_spinlock_lock(sl); /* fall-back */ -} +int +rte_distributor_returned_pkts_v1705(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned int max_mbufs); -static inline int -rte_spinlock_trylock_tm(rte_spinlock_t *sl) -{ - return rte_spinlock_trylock(sl); -} +int +rte_distributor_flush_v1705(struct rte_distributor *d); -static inline void -rte_spinlock_unlock_tm(rte_spinlock_t *sl) -{ - rte_spinlock_unlock(sl); -} +void +rte_distributor_clear_returns_v1705(struct rte_distributor *d); -static inline void -rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) -{ - rte_spinlock_recursive_lock(slr); /* fall-back */ -} +int +rte_distributor_get_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **pkts, + struct rte_mbuf **oldpkt, unsigned int retcount); -static inline void -rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) -{ - rte_spinlock_recursive_unlock(slr); -} +int +rte_distributor_return_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, int num); -static inline int -rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) -{ - return rte_spinlock_recursive_trylock(slr); -} +void +rte_distributor_request_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **oldpkt, + unsigned int count); + +int +rte_distributor_poll_pkt_v1705(struct rte_distributor *d, + unsigned int worker_id, struct rte_mbuf **mbufs); #ifdef __cplusplus } #endif -#endif /* _RTE_SPINLOCK_TILE_H_ */ +#endif diff --git a/lib/librte_distributor/rte_distributor_v20.c b/lib/librte_distributor/rte_distributor_v20.c new file mode 100644 index 00000000..bb6c5d70 --- /dev/null +++ b/lib/librte_distributor/rte_distributor_v20.c @@ -0,0 +1,427 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <sys/queue.h> +#include <string.h> +#include <rte_mbuf.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_errno.h> +#include <rte_compat.h> +#include <rte_string_fns.h> +#include <rte_eal_memconfig.h> +#include "rte_distributor_v20.h" +#include "rte_distributor_private.h" + +TAILQ_HEAD(rte_distributor_list, rte_distributor_v20); + +static struct rte_tailq_elem rte_distributor_tailq = { + .name = "RTE_DISTRIBUTOR", +}; +EAL_REGISTER_TAILQ(rte_distributor_tailq) + +/**** APIs called by workers ****/ + +void +rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d, + unsigned worker_id, struct rte_mbuf *oldpkt) +{ + union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id]; + int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) + | RTE_DISTRIB_GET_BUF; + while (unlikely(buf->bufptr64 & RTE_DISTRIB_FLAGS_MASK)) + rte_pause(); + buf->bufptr64 = req; +} +VERSION_SYMBOL(rte_distributor_request_pkt, _v20, 2.0); + +struct rte_mbuf * +rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d, + unsigned worker_id) +{ + union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id]; + if (buf->bufptr64 & RTE_DISTRIB_GET_BUF) + return NULL; + + /* since bufptr64 is signed, this should be an arithmetic shift */ + int64_t ret = buf->bufptr64 >> RTE_DISTRIB_FLAG_BITS; + return (struct rte_mbuf *)((uintptr_t)ret); +} +VERSION_SYMBOL(rte_distributor_poll_pkt, _v20, 2.0); + +struct rte_mbuf * +rte_distributor_get_pkt_v20(struct rte_distributor_v20 *d, + unsigned worker_id, struct rte_mbuf *oldpkt) +{ + struct rte_mbuf *ret; + rte_distributor_request_pkt_v20(d, worker_id, oldpkt); + while ((ret = rte_distributor_poll_pkt_v20(d, worker_id)) == NULL) + rte_pause(); + return ret; +} +VERSION_SYMBOL(rte_distributor_get_pkt, _v20, 2.0); + +int +rte_distributor_return_pkt_v20(struct rte_distributor_v20 *d, + unsigned worker_id, struct rte_mbuf *oldpkt) +{ + union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id]; + uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) + | RTE_DISTRIB_RETURN_BUF; + buf->bufptr64 = req; + return 0; +} +VERSION_SYMBOL(rte_distributor_return_pkt, _v20, 2.0); + +/**** APIs called on distributor core ***/ + +/* as name suggests, adds a packet to the backlog for a particular worker */ +static int +add_to_backlog(struct rte_distributor_backlog *bl, int64_t item) +{ + if (bl->count == RTE_DISTRIB_BACKLOG_SIZE) + return -1; + + bl->pkts[(bl->start + bl->count++) & (RTE_DISTRIB_BACKLOG_MASK)] + = item; + return 0; +} + +/* takes the next packet for a worker off the backlog */ +static int64_t +backlog_pop(struct rte_distributor_backlog *bl) +{ + bl->count--; + return bl->pkts[bl->start++ & RTE_DISTRIB_BACKLOG_MASK]; +} + +/* stores a packet returned from a worker inside the returns array */ +static inline void +store_return(uintptr_t oldbuf, struct rte_distributor_v20 *d, + unsigned *ret_start, unsigned *ret_count) +{ + /* store returns in a circular buffer - code is branch-free */ + d->returns.mbufs[(*ret_start + *ret_count) & RTE_DISTRIB_RETURNS_MASK] + = (void *)oldbuf; + *ret_start += (*ret_count == RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf); + *ret_count += (*ret_count != RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf); +} + +static inline void +handle_worker_shutdown(struct rte_distributor_v20 *d, unsigned int wkr) +{ + d->in_flight_tags[wkr] = 0; + d->in_flight_bitmask &= ~(1UL << wkr); + d->bufs[wkr].bufptr64 = 0; + if (unlikely(d->backlog[wkr].count != 0)) { + /* On return of a packet, we need to move the + * queued packets for this core elsewhere. + * Easiest solution is to set things up for + * a recursive call. That will cause those + * packets to be queued up for the next free + * core, i.e. it will return as soon as a + * core becomes free to accept the first + * packet, as subsequent ones will be added to + * the backlog for that core. + */ + struct rte_mbuf *pkts[RTE_DISTRIB_BACKLOG_SIZE]; + unsigned i; + struct rte_distributor_backlog *bl = &d->backlog[wkr]; + + for (i = 0; i < bl->count; i++) { + unsigned idx = (bl->start + i) & + RTE_DISTRIB_BACKLOG_MASK; + pkts[i] = (void *)((uintptr_t)(bl->pkts[idx] >> + RTE_DISTRIB_FLAG_BITS)); + } + /* recursive call. + * Note that the tags were set before first level call + * to rte_distributor_process. + */ + rte_distributor_process_v20(d, pkts, i); + bl->count = bl->start = 0; + } +} + +/* this function is called when process() fn is called without any new + * packets. It goes through all the workers and clears any returned packets + * to do a partial flush. + */ +static int +process_returns(struct rte_distributor_v20 *d) +{ + unsigned wkr; + unsigned flushed = 0; + unsigned ret_start = d->returns.start, + ret_count = d->returns.count; + + for (wkr = 0; wkr < d->num_workers; wkr++) { + + const int64_t data = d->bufs[wkr].bufptr64; + uintptr_t oldbuf = 0; + + if (data & RTE_DISTRIB_GET_BUF) { + flushed++; + if (d->backlog[wkr].count) + d->bufs[wkr].bufptr64 = + backlog_pop(&d->backlog[wkr]); + else { + d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF; + d->in_flight_tags[wkr] = 0; + d->in_flight_bitmask &= ~(1UL << wkr); + } + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } else if (data & RTE_DISTRIB_RETURN_BUF) { + handle_worker_shutdown(d, wkr); + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } + + store_return(oldbuf, d, &ret_start, &ret_count); + } + + d->returns.start = ret_start; + d->returns.count = ret_count; + + return flushed; +} + +/* process a set of packets to distribute them to workers */ +int +rte_distributor_process_v20(struct rte_distributor_v20 *d, + struct rte_mbuf **mbufs, unsigned num_mbufs) +{ + unsigned next_idx = 0; + unsigned wkr = 0; + struct rte_mbuf *next_mb = NULL; + int64_t next_value = 0; + uint32_t new_tag = 0; + unsigned ret_start = d->returns.start, + ret_count = d->returns.count; + + if (unlikely(num_mbufs == 0)) + return process_returns(d); + + while (next_idx < num_mbufs || next_mb != NULL) { + + int64_t data = d->bufs[wkr].bufptr64; + uintptr_t oldbuf = 0; + + if (!next_mb) { + next_mb = mbufs[next_idx++]; + next_value = (((int64_t)(uintptr_t)next_mb) + << RTE_DISTRIB_FLAG_BITS); + /* + * User is advocated to set tag vaue for each + * mbuf before calling rte_distributor_process. + * User defined tags are used to identify flows, + * or sessions. + */ + new_tag = next_mb->hash.usr; + + /* + * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 + * then the size of match has to be expanded. + */ + uint64_t match = 0; + unsigned i; + /* + * to scan for a match use "xor" and "not" to get a 0/1 + * value, then use shifting to merge to single "match" + * variable, where a one-bit indicates a match for the + * worker given by the bit-position + */ + for (i = 0; i < d->num_workers; i++) + match |= (!(d->in_flight_tags[i] ^ new_tag) + << i); + + /* Only turned-on bits are considered as match */ + match &= d->in_flight_bitmask; + + if (match) { + next_mb = NULL; + unsigned worker = __builtin_ctzl(match); + if (add_to_backlog(&d->backlog[worker], + next_value) < 0) + next_idx--; + } + } + + if ((data & RTE_DISTRIB_GET_BUF) && + (d->backlog[wkr].count || next_mb)) { + + if (d->backlog[wkr].count) + d->bufs[wkr].bufptr64 = + backlog_pop(&d->backlog[wkr]); + + else { + d->bufs[wkr].bufptr64 = next_value; + d->in_flight_tags[wkr] = new_tag; + d->in_flight_bitmask |= (1UL << wkr); + next_mb = NULL; + } + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } else if (data & RTE_DISTRIB_RETURN_BUF) { + handle_worker_shutdown(d, wkr); + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } + + /* store returns in a circular buffer */ + store_return(oldbuf, d, &ret_start, &ret_count); + + if (++wkr == d->num_workers) + wkr = 0; + } + /* to finish, check all workers for backlog and schedule work for them + * if they are ready */ + for (wkr = 0; wkr < d->num_workers; wkr++) + if (d->backlog[wkr].count && + (d->bufs[wkr].bufptr64 & RTE_DISTRIB_GET_BUF)) { + + int64_t oldbuf = d->bufs[wkr].bufptr64 >> + RTE_DISTRIB_FLAG_BITS; + store_return(oldbuf, d, &ret_start, &ret_count); + + d->bufs[wkr].bufptr64 = backlog_pop(&d->backlog[wkr]); + } + + d->returns.start = ret_start; + d->returns.count = ret_count; + return num_mbufs; +} +VERSION_SYMBOL(rte_distributor_process, _v20, 2.0); + +/* return to the caller, packets returned from workers */ +int +rte_distributor_returned_pkts_v20(struct rte_distributor_v20 *d, + struct rte_mbuf **mbufs, unsigned max_mbufs) +{ + struct rte_distributor_returned_pkts *returns = &d->returns; + unsigned retval = (max_mbufs < returns->count) ? + max_mbufs : returns->count; + unsigned i; + + for (i = 0; i < retval; i++) { + unsigned idx = (returns->start + i) & RTE_DISTRIB_RETURNS_MASK; + mbufs[i] = returns->mbufs[idx]; + } + returns->start += i; + returns->count -= i; + + return retval; +} +VERSION_SYMBOL(rte_distributor_returned_pkts, _v20, 2.0); + +/* return the number of packets in-flight in a distributor, i.e. packets + * being workered on or queued up in a backlog. */ +static inline unsigned +total_outstanding(const struct rte_distributor_v20 *d) +{ + unsigned wkr, total_outstanding; + + total_outstanding = __builtin_popcountl(d->in_flight_bitmask); + + for (wkr = 0; wkr < d->num_workers; wkr++) + total_outstanding += d->backlog[wkr].count; + + return total_outstanding; +} + +/* flush the distributor, so that there are no outstanding packets in flight or + * queued up. */ +int +rte_distributor_flush_v20(struct rte_distributor_v20 *d) +{ + const unsigned flushed = total_outstanding(d); + + while (total_outstanding(d) > 0) + rte_distributor_process_v20(d, NULL, 0); + + return flushed; +} +VERSION_SYMBOL(rte_distributor_flush, _v20, 2.0); + +/* clears the internal returns array in the distributor */ +void +rte_distributor_clear_returns_v20(struct rte_distributor_v20 *d) +{ + d->returns.start = d->returns.count = 0; +#ifndef __OPTIMIZE__ + memset(d->returns.mbufs, 0, sizeof(d->returns.mbufs)); +#endif +} +VERSION_SYMBOL(rte_distributor_clear_returns, _v20, 2.0); + +/* creates a distributor instance */ +struct rte_distributor_v20 * +rte_distributor_create_v20(const char *name, + unsigned socket_id, + unsigned num_workers) +{ + struct rte_distributor_v20 *d; + struct rte_distributor_list *distributor_list; + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(*d) & RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((RTE_DISTRIB_MAX_WORKERS & 7) != 0); + RTE_BUILD_BUG_ON(RTE_DISTRIB_MAX_WORKERS > + sizeof(d->in_flight_bitmask) * CHAR_BIT); + + if (name == NULL || num_workers >= RTE_DISTRIB_MAX_WORKERS) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(mz_name, sizeof(mz_name), RTE_DISTRIB_PREFIX"%s", name); + mz = rte_memzone_reserve(mz_name, sizeof(*d), socket_id, NO_FLAGS); + if (mz == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + d = mz->addr; + snprintf(d->name, sizeof(d->name), "%s", name); + d->num_workers = num_workers; + + distributor_list = RTE_TAILQ_CAST(rte_distributor_tailq.head, + rte_distributor_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_INSERT_TAIL(distributor_list, d, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return d; +} +VERSION_SYMBOL(rte_distributor_create, _v20, 2.0); diff --git a/lib/librte_distributor/rte_distributor_v20.h b/lib/librte_distributor/rte_distributor_v20.h new file mode 100644 index 00000000..f02e6aac --- /dev/null +++ b/lib/librte_distributor/rte_distributor_v20.h @@ -0,0 +1,247 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DISTRIB_V20_H_ +#define _RTE_DISTRIB_V20_H_ + +/** + * @file + * RTE distributor + * + * The distributor is a component which is designed to pass packets + * one-at-a-time to workers, with dynamic load balancing. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ + +struct rte_distributor_v20; +struct rte_mbuf; + +/** + * Function to create a new distributor instance + * + * Reserves the memory needed for the distributor operation and + * initializes the distributor to work with the configured number of workers. + * + * @param name + * The name to be given to the distributor instance. + * @param socket_id + * The NUMA node on which the memory is to be allocated + * @param num_workers + * The maximum number of workers that will request packets from this + * distributor + * @return + * The newly created distributor instance + */ +struct rte_distributor_v20 * +rte_distributor_create_v20(const char *name, unsigned int socket_id, + unsigned int num_workers); + +/* *** APIS to be called on the distributor lcore *** */ +/* + * The following APIs are the public APIs which are designed for use on a + * single lcore which acts as the distributor lcore for a given distributor + * instance. These functions cannot be called on multiple cores simultaneously + * without using locking to protect access to the internals of the distributor. + * + * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore + * for the same distributor instance, otherwise deadlock will result. + */ + +/** + * Process a set of packets by distributing them among workers that request + * packets. The distributor will ensure that no two packets that have the + * same flow id, or tag, in the mbuf will be processed at the same time. + * + * The user is advocated to set tag for each mbuf before calling this function. + * If user doesn't set the tag, the tag value can be various values depending on + * driver implementation and configuration. + * + * This is not multi-thread safe and should only be called on a single lcore. + * + * @param d + * The distributor instance to be used + * @param mbufs + * The mbufs to be distributed + * @param num_mbufs + * The number of mbufs in the mbufs array + * @return + * The number of mbufs processed. + */ +int +rte_distributor_process_v20(struct rte_distributor_v20 *d, + struct rte_mbuf **mbufs, unsigned int num_mbufs); + +/** + * Get a set of mbufs that have been returned to the distributor by workers + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + * @param mbufs + * The mbufs pointer array to be filled in + * @param max_mbufs + * The size of the mbufs array + * @return + * The number of mbufs returned in the mbufs array. + */ +int +rte_distributor_returned_pkts_v20(struct rte_distributor_v20 *d, + struct rte_mbuf **mbufs, unsigned int max_mbufs); + +/** + * Flush the distributor component, so that there are no in-flight or + * backlogged packets awaiting processing + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + * @return + * The number of queued/in-flight packets that were completed by this call. + */ +int +rte_distributor_flush_v20(struct rte_distributor_v20 *d); + +/** + * Clears the array of returned packets used as the source for the + * rte_distributor_returned_pkts() API call. + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + */ +void +rte_distributor_clear_returns_v20(struct rte_distributor_v20 *d); + +/* *** APIS to be called on the worker lcores *** */ +/* + * The following APIs are the public APIs which are designed for use on + * multiple lcores which act as workers for a distributor. Each lcore should use + * a unique worker id when requesting packets. + * + * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore + * for the same distributor instance, otherwise deadlock will result. + */ + +/** + * API called by a worker to get a new packet to process. Any previous packet + * given to the worker is assumed to have completed processing, and may be + * optionally returned to the distributor via the oldpkt parameter. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param oldpkt + * The previous packet, if any, being processed by the worker + * + * @return + * A new packet to be processed by the worker thread. + */ +struct rte_mbuf * +rte_distributor_get_pkt_v20(struct rte_distributor_v20 *d, + unsigned int worker_id, struct rte_mbuf *oldpkt); + +/** + * API called by a worker to return a completed packet without requesting a + * new packet, for example, because a worker thread is shutting down + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param mbuf + * The previous packet being processed by the worker + */ +int +rte_distributor_return_pkt_v20(struct rte_distributor_v20 *d, + unsigned int worker_id, struct rte_mbuf *mbuf); + +/** + * API called by a worker to request a new packet to process. + * Any previous packet given to the worker is assumed to have completed + * processing, and may be optionally returned to the distributor via + * the oldpkt parameter. + * Unlike rte_distributor_get_pkt(), this function does not wait for a new + * packet to be provided by the distributor. + * + * NOTE: after calling this function, rte_distributor_poll_pkt() should + * be used to poll for the packet requested. The rte_distributor_get_pkt() + * API should *not* be used to try and retrieve the new packet. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param oldpkt + * The previous packet, if any, being processed by the worker + */ +void +rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d, + unsigned int worker_id, struct rte_mbuf *oldpkt); + +/** + * API called by a worker to check for a new packet that was previously + * requested by a call to rte_distributor_request_pkt(). It does not wait + * for the new packet to be available, but returns NULL if the request has + * not yet been fulfilled by the distributor. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * + * @return + * A new packet to be processed by the worker thread, or NULL if no + * packet is yet available. + */ +struct rte_mbuf * +rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d, + unsigned int worker_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_distributor/rte_distributor_version.map b/lib/librte_distributor/rte_distributor_version.map index 73fdc437..3a285b39 100644 --- a/lib/librte_distributor/rte_distributor_version.map +++ b/lib/librte_distributor/rte_distributor_version.map @@ -13,3 +13,17 @@ DPDK_2.0 { local: *; }; + +DPDK_17.05 { + global: + + rte_distributor_clear_returns; + rte_distributor_create; + rte_distributor_flush; + rte_distributor_get_pkt; + rte_distributor_poll_pkt; + rte_distributor_process; + rte_distributor_request_pkt; + rte_distributor_return_pkt; + rte_distributor_returned_pkts; +} DPDK_2.0; diff --git a/lib/librte_eal/Makefile b/lib/librte_eal/Makefile index cf11a099..5690bb49 100644 --- a/lib/librte_eal/Makefile +++ b/lib/librte_eal/Makefile @@ -33,6 +33,8 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-y += common DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += linuxapp +DEPDIRS-linuxapp := common DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += bsdapp +DEPDIRS-bsdapp := common include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index a15b762b..a0f99502 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -48,7 +48,7 @@ LDLIBS += -lgcc_s EXPORT_MAP := rte_eal_version.map -LIBABIVER := 3 +LIBABIVER := 4 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c @@ -78,6 +78,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c @@ -110,7 +111,4 @@ INC := rte_interrupts.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 35e3117a..05f0c1f9 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -56,6 +56,7 @@ #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> +#include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_log.h> @@ -64,6 +65,7 @@ #include <rte_string_fns.h> #include <rte_cpuflags.h> #include <rte_interrupts.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> @@ -193,7 +195,7 @@ rte_eal_config_create(void) rte_panic("Cannot mmap memory for rte_config\n"); } memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; } /* attach to an existing shared memory config */ @@ -218,7 +220,7 @@ rte_eal_config_attach(void) if (rte_mem_cfg_addr == MAP_FAILED) rte_panic("Cannot mmap memory for rte_config\n"); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; } /* Detect if we are a primary or a secondary process */ @@ -321,8 +323,6 @@ eal_log_level_parse(int argc, char **argv) optind = 1; optreset = 1; - eal_reset_internal_config(&internal_config); - while ((opt = getopt_long(argc, argvopt, eal_short_options, eal_long_options, &option_index)) != EOF) { @@ -486,6 +486,12 @@ rte_eal_iopl_init(void) return 0; } +static void rte_eal_init_alert(const char *msg) +{ + fprintf(stderr, "EAL: FATAL: %s\n", msg); + RTE_LOG(ERR, EAL, "%s\n", msg); +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) @@ -497,29 +503,47 @@ rte_eal_init(int argc, char **argv) char thread_name[RTE_MAX_THREAD_NAME_LEN]; /* checks if the machine is adequate */ - rte_cpu_check_supported(); + if (!rte_cpu_is_supported()) { + rte_eal_init_alert("unsupported cpu type."); + rte_errno = ENOTSUP; + return -1; + } - if (!rte_atomic32_test_and_set(&run_once)) + if (!rte_atomic32_test_and_set(&run_once)) { + rte_eal_init_alert("already called initialization."); + rte_errno = EALREADY; return -1; + } thread_id = pthread_self(); - eal_log_level_parse(argc, argv); + eal_reset_internal_config(&internal_config); /* set log level as early as possible */ - rte_set_log_level(internal_config.log_level); + eal_log_level_parse(argc, argv); - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); + if (rte_eal_cpu_init() < 0) { + rte_eal_init_alert("Cannot detect lcores."); + rte_errno = ENOTSUP; + return -1; + } fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); + if (fctret < 0) { + rte_eal_init_alert("Invalid 'command line' arguments."); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); + eal_hugepage_info_init() < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) @@ -543,31 +567,45 @@ rte_eal_init(int argc, char **argv) rte_config_init(); - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); - - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory\n"); + rte_errno = ENOMEM; + return -1; + } - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_tailqs_init() < 0) { + rte_eal_init_alert("Cannot init tail queues for objects\n"); + rte_errno = EFAULT; + return -1; + } - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_alarm_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + /* rte_eal_alarm_init sets rte_errno on failure. */ + return -1; + } - if (rte_eal_timer_init() < 0) - rte_panic("Cannot init HPET or TSC timers\n"); + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); + if (rte_eal_timer_init() < 0) { + rte_eal_init_alert("Cannot init HPET or TSC timers\n"); + rte_errno = ENOTSUP; + return -1; + } eal_check_mem_on_local_socket(); if (eal_plugins_init() < 0) - rte_panic("Cannot init plugins\n"); + rte_eal_init_alert("Cannot init plugins\n"); eal_thread_init_master(rte_config.master_lcore); @@ -577,8 +615,11 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + return -1; + } RTE_LCORE_FOREACH_SLAVE(i) { @@ -612,9 +653,12 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); - /* Probe & Initialize PCI devices */ - if (rte_eal_pci_probe()) - rte_panic("Cannot probe PCI\n"); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) { + rte_eal_init_alert("Cannot probe devices\n"); + rte_errno = ENOTSUP; + return -1; + } rte_eal_mcfg_complete(); diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c index 5fbc17c5..e1c75548 100644 --- a/lib/librte_eal/bsdapp/eal/eal_debug.c +++ b/lib/librte_eal/bsdapp/eal/eal_debug.c @@ -31,7 +31,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef RTE_BACKTRACE #include <execinfo.h> +#endif #include <stdarg.h> #include <signal.h> #include <stdlib.h> @@ -47,6 +49,7 @@ /* dump the stack of the calling core */ void rte_dump_stack(void) { +#ifdef RTE_BACKTRACE void *func[BACKTRACE_SIZE]; char **symb = NULL; int size; @@ -64,6 +67,7 @@ void rte_dump_stack(void) } free(symb); +#endif /* RTE_BACKTRACE */ } /* not implemented in this environment */ diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c index 836e4836..ea2afff4 100644 --- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -36,29 +36,37 @@ #include "eal_private.h" int -rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb_fn __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } int -rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } diff --git a/lib/librte_eal/bsdapp/eal/eal_lcore.c b/lib/librte_eal/bsdapp/eal/eal_lcore.c index b8bfafde..bc584dd5 100644 --- a/lib/librte_eal/bsdapp/eal/eal_lcore.c +++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c @@ -53,12 +53,14 @@ eal_cpu_core_id(__rte_unused unsigned lcore_id) static int eal_get_ncpus(void) { + static int ncpu = -1; int mib[2] = {CTL_HW, HW_NCPU}; - int ncpu; size_t len = sizeof(ncpu); - sysctl(mib, 2, &ncpu, &len, NULL, 0); - RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu); + if (ncpu < 0) { + sysctl(mib, 2, &ncpu, &len, NULL, 0); + RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu); + } return ncpu; } diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c index 8b3ed881..e321461d 100644 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -87,18 +87,11 @@ * enabling bus master. */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) -{ - RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " - "for BSD\n"); - return -ENOTSUP; -} +extern struct rte_pci_bus rte_pci_bus; /* Map pci device */ int -rte_eal_pci_map_device(struct rte_pci_device *dev) +rte_pci_map_device(struct rte_pci_device *dev) { int ret = -1; @@ -120,7 +113,7 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) /* Unmap pci device */ void -rte_eal_pci_unmap_device(struct rte_pci_device *dev) +rte_pci_unmap_device(struct rte_pci_device *dev) { /* try unmapping the NIC resources */ switch (dev->kdrv) { @@ -289,6 +282,9 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) /* FreeBSD has no NUMA support (yet) */ dev->device.numa_node = 0; + rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name)); + dev->device.name = dev->name; + /* FreeBSD has only one pass through driver */ dev->kdrv = RTE_KDRV_NIC_UIO; @@ -322,20 +318,19 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) } /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); } else { struct rte_pci_device *dev2 = NULL; int ret; - TAILQ_FOREACH(dev2, &pci_device_list, next) { + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); if (ret > 0) continue; else if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); - return 0; + rte_pci_insert_device(dev2, dev); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -343,10 +338,10 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) dev->mem_resource, sizeof(dev->mem_resource)); free(dev); - return 0; } + return 0; } - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + rte_pci_add_device(dev); } return 0; @@ -361,7 +356,7 @@ skipdev: * list. Call pci_scan_one() for each pci entry found. */ int -rte_eal_pci_scan(void) +rte_pci_scan(void) { int fd; unsigned dev_count = 0; @@ -374,6 +369,10 @@ rte_eal_pci_scan(void) .matches = &matches[0], }; + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + fd = open("/dev/pci", O_RDONLY); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); @@ -456,10 +455,11 @@ error: } /* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *dev, - void *buf, size_t len, off_t offset) +int rte_pci_read_config(const struct rte_pci_device *dev, + void *buf, size_t len, off_t offset) { int fd = -1; + int size; struct pci_io pi = { .pi_sel = { .pc_domain = dev->addr.domain, @@ -468,25 +468,28 @@ int rte_eal_pci_read_config(const struct rte_pci_device *dev, .pc_func = dev->addr.function, }, .pi_reg = offset, - .pi_width = len, }; - if (len == 3 || len > sizeof(pi.pi_data)) { - RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); - goto error; - } - fd = open("/dev/pci", O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); goto error; } - if (ioctl(fd, PCIOCREAD, &pi) < 0) - goto error; + while (len > 0) { + size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1); + pi.pi_width = size; + + if (ioctl(fd, PCIOCREAD, &pi) < 0) + goto error; + memcpy(buf, &pi.pi_data, size); + + buf = (char *)buf + size; + pi.pi_reg += size; + len -= size; + } close(fd); - memcpy(buf, &pi.pi_data, len); return 0; error: @@ -496,8 +499,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *dev, } /* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *dev, - const void *buf, size_t len, off_t offset) +int rte_pci_write_config(const struct rte_pci_device *dev, + const void *buf, size_t len, off_t offset) { int fd = -1; @@ -539,8 +542,8 @@ int rte_eal_pci_write_config(const struct rte_pci_device *dev, } int -rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) { int ret; @@ -567,7 +570,7 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, static void pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) + void *data, size_t len, off_t offset) { #if defined(RTE_ARCH_X86) uint8_t *d; @@ -595,8 +598,8 @@ pci_uio_ioport_read(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { case RTE_KDRV_NIC_UIO: @@ -609,7 +612,7 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p, static void pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) + const void *data, size_t len, off_t offset) { #if defined(RTE_ARCH_X86) const uint8_t *s; @@ -619,13 +622,13 @@ pci_uio_ioport_write(struct rte_pci_ioport *p, for (s = data; len > 0; s += size, reg += size, len -= size) { if (len >= 4) { size = 4; - outl(*(const uint32_t *)s, reg); + outl(reg, *(const uint32_t *)s); } else if (len >= 2) { size = 2; - outw(*(const uint16_t *)s, reg); + outw(reg, *(const uint16_t *)s); } else { size = 1; - outb(*s, reg); + outb(reg, *s); } } #else @@ -637,8 +640,8 @@ pci_uio_ioport_write(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { case RTE_KDRV_NIC_UIO: @@ -650,7 +653,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p, } int -rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +rte_pci_ioport_unmap(struct rte_pci_ioport *p) { int ret; @@ -667,18 +670,3 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) return ret; } - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } - return 0; -} diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 2f81f7c0..2e48a736 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -6,8 +6,6 @@ DPDK_2.0 { eal_parse_sysfs_value; eal_timer_source; lcore_config; - pci_device_list; - pci_driver_list; per_lcore__lcore_id; per_lcore__rte_errno; rte_calloc; @@ -22,12 +20,9 @@ DPDK_2.0 { rte_dump_tailq; rte_eal_alarm_cancel; rte_eal_alarm_set; - rte_eal_dev_init; rte_eal_devargs_add; rte_eal_devargs_dump; rte_eal_devargs_type_count; - rte_eal_driver_register; - rte_eal_driver_unregister; rte_eal_get_configuration; rte_eal_get_lcore_state; rte_eal_get_physmem_layout; @@ -40,18 +35,10 @@ DPDK_2.0 { rte_eal_mp_remote_launch; rte_eal_mp_wait_lcore; rte_eal_parse_devargs_str; - rte_eal_pci_dump; - rte_eal_pci_probe; - rte_eal_pci_probe_one; - rte_eal_pci_register; - rte_eal_pci_scan; - rte_eal_pci_unregister; rte_eal_process_type; rte_eal_remote_launch; rte_eal_tailq_lookup; rte_eal_tailq_register; - rte_eal_vdev_init; - rte_eal_vdev_uninit; rte_eal_wait_lcore; rte_exit; rte_free; @@ -66,11 +53,8 @@ DPDK_2.0 { rte_intr_disable; rte_intr_enable; rte_log; - rte_log_add_in_history; rte_log_cur_msg_loglevel; rte_log_cur_msg_logtype; - rte_log_dump_history; - rte_log_set_history; rte_logs; rte_malloc; rte_malloc_dump_stats; @@ -114,9 +98,6 @@ DPDK_2.0 { DPDK_2.1 { global: - rte_eal_pci_detach; - rte_eal_pci_read_config; - rte_eal_pci_write_config; rte_intr_allow_others; rte_intr_dp_is_en; rte_intr_efd_disable; @@ -142,12 +123,6 @@ DPDK_16.04 { global: rte_cpu_get_flag_name; - rte_eal_pci_ioport_map; - rte_eal_pci_ioport_read; - rte_eal_pci_ioport_unmap; - rte_eal_pci_ioport_write; - rte_eal_pci_map_device; - rte_eal_pci_unmap_device; rte_eal_primary_proc_alive; } DPDK_2.2; @@ -170,7 +145,51 @@ DPDK_16.11 { rte_delay_us_callback_register; rte_eal_dev_attach; rte_eal_dev_detach; - rte_eal_vdrv_register; - rte_eal_vdrv_unregister; } DPDK_16.07; + +DPDK_17.02 { + global: + + rte_bus_dump; + rte_bus_probe; + rte_bus_register; + rte_bus_scan; + rte_bus_unregister; + +} DPDK_16.11; + +DPDK_17.05 { + global: + + rte_cpu_is_supported; + rte_log_dump; + rte_log_register; + rte_log_get_global_level; + rte_log_set_global_level; + rte_log_set_level; + rte_log_set_level_regexp; + rte_pci_detach; + rte_pci_dump; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_probe; + rte_pci_probe_one; + rte_pci_read_config; + rte_pci_register; + rte_pci_scan; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + rte_vdev_init; + rte_vdev_register; + rte_vdev_uninit; + rte_vdev_unregister; + vfio_get_container_fd; + vfio_get_group_fd; + vfio_get_group_no; + +} DPDK_17.02; diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c index 99a4975c..4bd7545a 100644 --- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c +++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c @@ -180,6 +180,10 @@ nic_uio_probe (device_t dev) unsigned int device = pci_get_slot(dev); unsigned int function = pci_get_function(dev); + char bdf_str[256]; + char *token, *remaining; + + /* First check if we found this on load */ for (i = 0; i < num_detached; i++) if (bus == pci_get_bus(detached_devices[i]) && device == pci_get_slot(detached_devices[i]) && @@ -188,6 +192,45 @@ nic_uio_probe (device_t dev) return BUS_PROBE_SPECIFIC; } + /* otherwise check if it's a new device and if it matches the BDF */ + memset(bdf_str, 0, sizeof(bdf_str)); + TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); + remaining = bdf_str; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + bus = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + device = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + function = strtol(token, NULL, 10); + + if (bus == pci_get_bus(dev) && + device == pci_get_slot(dev) && + function == pci_get_function(dev)) { + + if (num_detached < MAX_DETACHED_DEVICES) { + printf("%s: probed dev=%p\n", + __func__, dev); + detached_devices[num_detached++] = dev; + device_set_desc(dev, "DPDK PCI Device"); + return BUS_PROBE_SPECIFIC; + } else { + printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", + __func__, MAX_DETACHED_DEVICES, + dev); + break; + } + } + } + return ENXIO; } @@ -248,6 +291,7 @@ nic_uio_load(void) memset(bdf_str, 0, sizeof(bdf_str)); TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); remaining = bdf_str; + printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str); /* * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f". * But the code below does not try differentiate between : and , diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index dfd64aa5..a5bd1089 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -38,16 +38,14 @@ INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_dev.h rte_vdev.h +INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h -ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y) -INC += rte_warnings.h -endif - GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h +GENERIC_INC += rte_vect.h rte_io.h + # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk ARCH_DIR ?= $(RTE_ARCH) ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)) diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c new file mode 100644 index 00000000..8f9baf8b --- /dev/null +++ b/lib/librte_eal/common/eal_common_bus.c @@ -0,0 +1,147 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> + +#include <rte_bus.h> + +#include "eal_private.h" + +struct rte_bus_list rte_bus_list = + TAILQ_HEAD_INITIALIZER(rte_bus_list); + +void +rte_bus_register(struct rte_bus *bus) +{ + RTE_VERIFY(bus); + RTE_VERIFY(bus->name && strlen(bus->name)); + /* A bus should mandatorily have the scan implemented */ + RTE_VERIFY(bus->scan); + RTE_VERIFY(bus->probe); + + TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); +} + +void +rte_bus_unregister(struct rte_bus *bus) +{ + TAILQ_REMOVE(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name); +} + +/* Scan all the buses for registered devices */ +int +rte_bus_scan(void) +{ + int ret; + struct rte_bus *bus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus->scan(); + if (ret) { + RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n", + bus->name); + return ret; + } + } + + return 0; +} + +/* Probe all devices of all buses */ +int +rte_bus_probe(void) +{ + int ret; + struct rte_bus *bus, *vbus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + if (!strcmp(bus->name, "virtual")) { + vbus = bus; + continue; + } + + ret = bus->probe(); + if (ret) { + RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", + bus->name); + return ret; + } + } + + if (vbus) { + ret = vbus->probe(); + if (ret) { + RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", + vbus->name); + return ret; + } + } + + return 0; +} + +/* Dump information of a single bus */ +static int +bus_dump_one(FILE *f, struct rte_bus *bus) +{ + int ret; + + /* For now, dump only the bus name */ + ret = fprintf(f, " %s\n", bus->name); + + /* Error in case of inability in writing to stream */ + if (ret < 0) + return ret; + + return 0; +} + +void +rte_bus_dump(FILE *f) +{ + int ret; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus_dump_one(f, bus); + if (ret) { + RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n", + ret); + break; + } + } +} diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c index b5f76f7f..9a2d080a 100644 --- a/lib/librte_eal/common/eal_common_cpuflags.c +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -43,6 +43,13 @@ void rte_cpu_check_supported(void) { + if (!rte_cpu_is_supported()) + exit(1); +} + +int +rte_cpu_is_supported(void) +{ /* This is generated at compile-time by the build system */ static const enum rte_cpu_flag_t compile_time_flags[] = { RTE_COMPILE_TIME_CPUFLAGS @@ -57,14 +64,16 @@ rte_cpu_check_supported(void) fprintf(stderr, "ERROR: CPU feature flag lookup failed with error %d\n", ret); - exit(1); + return 0; } if (!ret) { fprintf(stderr, "ERROR: This system does not support \"%s\".\n" "Please check that RTE_MACHINE is set correctly.\n", rte_cpu_get_flag_name(compile_time_flags[i])); - exit(1); + return 0; } } + + return 1; } diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index 4f3b4934..a400ddd0 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -45,65 +45,6 @@ #include "eal_private.h" -/** Global list of device drivers. */ -static struct rte_driver_list dev_driver_list = - TAILQ_HEAD_INITIALIZER(dev_driver_list); -/** Global list of device drivers. */ -static struct rte_device_list dev_device_list = - TAILQ_HEAD_INITIALIZER(dev_device_list); - -/* register a driver */ -void -rte_eal_driver_register(struct rte_driver *driver) -{ - TAILQ_INSERT_TAIL(&dev_driver_list, driver, next); -} - -/* unregister a driver */ -void -rte_eal_driver_unregister(struct rte_driver *driver) -{ - TAILQ_REMOVE(&dev_driver_list, driver, next); -} - -void rte_eal_device_insert(struct rte_device *dev) -{ - TAILQ_INSERT_TAIL(&dev_device_list, dev, next); -} - -void rte_eal_device_remove(struct rte_device *dev) -{ - TAILQ_REMOVE(&dev_device_list, dev, next); -} - -int -rte_eal_dev_init(void) -{ - struct rte_devargs *devargs; - - /* - * Note that the dev_driver_list is populated here - * from calls made to rte_eal_driver_register from constructor functions - * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro - */ - - /* call the init function for each virtual device */ - TAILQ_FOREACH(devargs, &devargs_list, next) { - - if (devargs->type != RTE_DEVTYPE_VIRTUAL) - continue; - - if (rte_eal_vdev_init(devargs->virt.drv_name, - devargs->args)) { - RTE_LOG(ERR, EAL, "failed to initialize %s device\n", - devargs->virt.drv_name); - return -1; - } - } - - return 0; -} - int rte_eal_dev_attach(const char *name, const char *devargs) { struct rte_pci_addr addr; @@ -114,11 +55,11 @@ int rte_eal_dev_attach(const char *name, const char *devargs) } if (eal_parse_pci_DomBDF(name, &addr) == 0) { - if (rte_eal_pci_probe_one(&addr) < 0) + if (rte_pci_probe_one(&addr) < 0) goto err; } else { - if (rte_eal_vdev_init(name, devargs)) + if (rte_vdev_init(name, devargs)) goto err; } @@ -139,10 +80,10 @@ int rte_eal_dev_detach(const char *name) } if (eal_parse_pci_DomBDF(name, &addr) == 0) { - if (rte_eal_pci_detach(&addr) < 0) + if (rte_pci_detach(&addr) < 0) goto err; } else { - if (rte_eal_vdev_uninit(name)) + if (rte_vdev_uninit(name)) goto err; } return 0; diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index 2cd41320..84fa0cb5 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -83,16 +83,17 @@ rte_eal_cpu_init(void) config->lcore_role[lcore_id] = ROLE_RTE; lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); - if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) + if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) { #ifdef RTE_EAL_ALLOW_INV_SOCKET_ID lcore_config[lcore_id].socket_id = 0; #else - rte_panic("Socket ID (%u) is greater than " + RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than " "RTE_MAX_NUMA_NODES (%d)\n", lcore_config[lcore_id].socket_id, RTE_MAX_NUMA_NODES); + return -1; #endif - + } RTE_LOG(DEBUG, EAL, "Detected lcore %u as " "core %u on socket %u\n", lcore_id, lcore_config[lcore_id].core_id, diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index e45d3269..ddf65b7f 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -35,7 +35,11 @@ #include <stdint.h> #include <stdarg.h> #include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <regex.h> +#include <rte_eal.h> #include <rte_log.h> #include <rte_per_lcore.h> @@ -60,6 +64,11 @@ struct log_cur_msg { uint32_t logtype; /**< log type - see rte_log.h */ }; +struct rte_log_dynamic_type { + const char *name; + uint32_t loglevel; +}; + /* per core log */ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg); @@ -75,35 +84,95 @@ rte_openlog_stream(FILE *f) /* Set global log level */ void -rte_set_log_level(uint32_t level) +rte_log_set_global_level(uint32_t level) { rte_logs.level = (uint32_t)level; } +/* Set global log level */ +/* replaced by rte_log_set_global_level */ +__rte_deprecated void +rte_set_log_level(uint32_t level) +{ + rte_log_set_global_level(level); +} + /* Get global log level */ uint32_t -rte_get_log_level(void) +rte_log_get_global_level(void) { return rte_logs.level; } +/* Get global log level */ +/* replaced by rte_log_get_global_level */ +uint32_t +rte_get_log_level(void) +{ + return rte_log_get_global_level(); +} + /* Set global log type */ -void +__rte_deprecated void rte_set_log_type(uint32_t type, int enable) { + if (type < RTE_LOGTYPE_FIRST_EXT_ID) { + if (enable) + rte_logs.type |= 1 << type; + else + rte_logs.type &= ~(1 << type); + } + if (enable) - rte_logs.type |= type; + rte_log_set_level(type, 0); else - rte_logs.type &= (~type); + rte_log_set_level(type, RTE_LOG_DEBUG); } /* Get global log type */ -uint32_t +__rte_deprecated uint32_t rte_get_log_type(void) { return rte_logs.type; } +int +rte_log_set_level(uint32_t type, uint32_t level) +{ + if (type >= rte_logs.dynamic_types_len) + return -1; + if (level > RTE_LOG_DEBUG) + return -1; + + rte_logs.dynamic_types[type].loglevel = level; + + return 0; +} + +/* set level */ +int +rte_log_set_level_regexp(const char *pattern, uint32_t level) +{ + regex_t r; + size_t i; + + if (level > RTE_LOG_DEBUG) + return -1; + + if (regcomp(&r, pattern, 0) != 0) + return -1; + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + if (regexec(&r, rte_logs.dynamic_types[i].name, 0, + NULL, 0) == 0) + rte_logs.dynamic_types[i].loglevel = level; + } + + return 0; +} + /* get the current loglevel for the message beeing processed */ int rte_log_cur_msg_loglevel(void) { @@ -116,6 +185,161 @@ int rte_log_cur_msg_logtype(void) return RTE_PER_LCORE(log_cur_msg).logtype; } +static int +rte_log_lookup(const char *name) +{ + size_t i; + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + if (strcmp(name, rte_logs.dynamic_types[i].name) == 0) + return i; + } + + return -1; +} + +/* register an extended log type, assuming table is large enough, and id + * is not yet registered. + */ +static int +__rte_log_register(const char *name, int id) +{ + char *dup_name = strdup(name); + + if (dup_name == NULL) + return -ENOMEM; + + rte_logs.dynamic_types[id].name = dup_name; + rte_logs.dynamic_types[id].loglevel = RTE_LOG_DEBUG; + + return id; +} + +/* register an extended log type */ +int +rte_log_register(const char *name) +{ + struct rte_log_dynamic_type *new_dynamic_types; + int id, ret; + + id = rte_log_lookup(name); + if (id >= 0) + return id; + + new_dynamic_types = realloc(rte_logs.dynamic_types, + sizeof(struct rte_log_dynamic_type) * + (rte_logs.dynamic_types_len + 1)); + if (new_dynamic_types == NULL) + return -ENOMEM; + rte_logs.dynamic_types = new_dynamic_types; + + ret = __rte_log_register(name, rte_logs.dynamic_types_len); + if (ret < 0) + return ret; + + rte_logs.dynamic_types_len++; + + return ret; +} + +struct logtype { + uint32_t log_id; + const char *logtype; +}; + +static const struct logtype logtype_strings[] = { + {RTE_LOGTYPE_EAL, "eal"}, + {RTE_LOGTYPE_MALLOC, "malloc"}, + {RTE_LOGTYPE_RING, "ring"}, + {RTE_LOGTYPE_MEMPOOL, "mempool"}, + {RTE_LOGTYPE_TIMER, "timer"}, + {RTE_LOGTYPE_PMD, "pmd"}, + {RTE_LOGTYPE_HASH, "hash"}, + {RTE_LOGTYPE_LPM, "lpm"}, + {RTE_LOGTYPE_KNI, "kni"}, + {RTE_LOGTYPE_ACL, "acl"}, + {RTE_LOGTYPE_POWER, "power"}, + {RTE_LOGTYPE_METER, "meter"}, + {RTE_LOGTYPE_SCHED, "sched"}, + {RTE_LOGTYPE_PORT, "port"}, + {RTE_LOGTYPE_TABLE, "table"}, + {RTE_LOGTYPE_PIPELINE, "pipeline"}, + {RTE_LOGTYPE_MBUF, "mbuf"}, + {RTE_LOGTYPE_CRYPTODEV, "cryptodev"}, + {RTE_LOGTYPE_EFD, "efd"}, + {RTE_LOGTYPE_EVENTDEV, "eventdev"}, + {RTE_LOGTYPE_USER1, "user1"}, + {RTE_LOGTYPE_USER2, "user2"}, + {RTE_LOGTYPE_USER3, "user3"}, + {RTE_LOGTYPE_USER4, "user4"}, + {RTE_LOGTYPE_USER5, "user5"}, + {RTE_LOGTYPE_USER6, "user6"}, + {RTE_LOGTYPE_USER7, "user7"}, + {RTE_LOGTYPE_USER8, "user8"} +}; + +RTE_INIT(rte_log_init); +static void +rte_log_init(void) +{ + uint32_t i; + +#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG + rte_log_set_global_level(RTE_LOG_INFO); +#else + rte_log_set_global_level(RTE_LOG_LEVEL); +#endif + + rte_logs.dynamic_types = calloc(RTE_LOGTYPE_FIRST_EXT_ID, + sizeof(struct rte_log_dynamic_type)); + if (rte_logs.dynamic_types == NULL) + return; + + /* register legacy log types */ + for (i = 0; i < RTE_DIM(logtype_strings); i++) + __rte_log_register(logtype_strings[i].logtype, + logtype_strings[i].log_id); + + rte_logs.dynamic_types_len = RTE_LOGTYPE_FIRST_EXT_ID; +} + +static const char * +loglevel_to_string(uint32_t level) +{ + switch (level) { + case 0: return "disabled"; + case RTE_LOG_EMERG: return "emerg"; + case RTE_LOG_ALERT: return "alert"; + case RTE_LOG_CRIT: return "critical"; + case RTE_LOG_ERR: return "error"; + case RTE_LOG_WARNING: return "warning"; + case RTE_LOG_NOTICE: return "notice"; + case RTE_LOG_INFO: return "info"; + case RTE_LOG_DEBUG: return "debug"; + default: return "unknown"; + } +} + +/* dump global level and registered log types */ +void +rte_log_dump(FILE *f) +{ + size_t i; + + fprintf(f, "global log level is %s\n", + loglevel_to_string(rte_log_get_global_level())); + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + fprintf(f, "id %zu: %s, level is %s\n", + i, rte_logs.dynamic_types[i].name, + loglevel_to_string(rte_logs.dynamic_types[i].loglevel)); + } +} + /* * Generates a log message The message will be sent in the stream * defined by the previous call to rte_openlog_stream(). @@ -139,7 +363,11 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) } } - if ((level > rte_logs.level) || !(logtype & rte_logs.type)) + if (level > rte_logs.level) + return 0; + if (logtype >= rte_logs.dynamic_types_len) + return -1; + if (level > rte_logs.dynamic_types[logtype].loglevel) return 0; /* save loglevel and logtype in a global per-lcore variable */ @@ -176,7 +404,8 @@ eal_log_set_default(FILE *default_log) { default_log_stream = default_log; -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG - RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n"); +#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG + RTE_LOG(NOTICE, EAL, + "Debug dataplane logs available - lower performance\n"); #endif } diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 6ca8af17..f470195f 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -118,7 +118,7 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH; /* * Stringified version of solib path used by dpdk-pmdinfo.py * Note: PLEASE DO NOT ALTER THIS without making a corresponding - * change to tools/dpdk-pmdinfo.py + * change to usertools/dpdk-pmdinfo.py */ static const char dpdk_solib_path[] __attribute__((used)) = "DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH; @@ -126,6 +126,7 @@ static const char dpdk_solib_path[] __attribute__((used)) = static int master_lcore_parsed; static int mem_parsed; +static int core_parsed; void eal_reset_internal_config(struct internal_config *internal_cfg) @@ -147,12 +148,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->base_virtaddr = 0; internal_cfg->syslog_facility = LOG_DAEMON; - /* default value from build option */ -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG - internal_cfg->log_level = RTE_LOG_INFO; -#else - internal_cfg->log_level = RTE_LOG_LEVEL; -#endif internal_cfg->xen_dom0_support = 0; @@ -738,25 +733,49 @@ eal_parse_syslog(const char *facility, struct internal_config *conf) } static int -eal_parse_log_level(const char *level, uint32_t *log_level) +eal_parse_log_level(const char *arg) { - char *end; + char *end, *str, *type, *level; unsigned long tmp; + str = strdup(arg); + if (str == NULL) + return -1; + + if (strchr(str, ',') == NULL) { + type = NULL; + level = str; + } else { + type = strsep(&str, ","); + level = strsep(&str, ","); + } + errno = 0; tmp = strtoul(level, &end, 0); /* check for errors */ if ((errno != 0) || (level[0] == '\0') || - end == NULL || (*end != '\0')) - return -1; + end == NULL || (*end != '\0')) + goto fail; /* log_level is a uint32_t */ if (tmp >= UINT32_MAX) - return -1; + goto fail; + + if (type == NULL) { + rte_log_set_global_level(tmp); + } else if (rte_log_set_level_regexp(type, tmp) < 0) { + printf("cannot set log level %s,%lu\n", + type, tmp); + goto fail; + } - *log_level = tmp; + free(str); return 0; + +fail: + free(str); + return -1; } static enum rte_proc_type_t @@ -797,6 +816,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid coremask\n"); return -1; } + core_parsed = 1; break; /* corelist */ case 'l': @@ -804,6 +824,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid core list\n"); return -1; } + core_parsed = 1; break; /* size of memory */ case 'm': @@ -895,15 +916,12 @@ eal_parse_common_option(int opt, const char *optarg, break; case OPT_LOG_LEVEL_NUM: { - uint32_t log; - - if (eal_parse_log_level(optarg, &log) < 0) { + if (eal_parse_log_level(optarg) < 0) { RTE_LOG(ERR, EAL, "invalid parameters for --" OPT_LOG_LEVEL "\n"); return -1; } - conf->log_level = log; break; } case OPT_LCORES_NUM: @@ -912,6 +930,7 @@ eal_parse_common_option(int opt, const char *optarg, OPT_LCORES "\n"); return -1; } + core_parsed = 1; break; /* don't know what to do, leave this to caller */ @@ -923,12 +942,38 @@ eal_parse_common_option(int opt, const char *optarg, return 0; } +static void +eal_auto_detect_cores(struct rte_config *cfg) +{ + unsigned int lcore_id; + unsigned int removed = 0; + rte_cpuset_t affinity_set; + pthread_t tid = pthread_self(); + + if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t), + &affinity_set) < 0) + CPU_ZERO(&affinity_set); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (cfg->lcore_role[lcore_id] == ROLE_RTE && + !CPU_ISSET(lcore_id, &affinity_set)) { + cfg->lcore_role[lcore_id] = ROLE_OFF; + removed++; + } + } + + cfg->lcore_count -= removed; +} + int eal_adjust_config(struct internal_config *internal_cfg) { int i; struct rte_config *cfg = rte_eal_get_configuration(); + if (!core_parsed) + eal_auto_detect_cores(cfg); + if (internal_config.process_type == RTE_PROC_AUTO) internal_config.process_type = eal_proc_type_detect(); @@ -1027,7 +1072,9 @@ eal_common_usage(void) " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n" " --"OPT_SYSLOG" Set syslog facility\n" - " --"OPT_LOG_LEVEL" Set default log level\n" + " --"OPT_LOG_LEVEL"=<int> Set global log level\n" + " --"OPT_LOG_LEVEL"=<type-regexp>,<int>\n" + " Set specific log level\n" " -v Display version information on startup\n" " -h, --help This help\n" "\nEAL options for DEBUG use only:\n" diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c index 6bff6752..b7499913 100644 --- a/lib/librte_eal/common/eal_common_pci.c +++ b/lib/librte_eal/common/eal_common_pci.c @@ -69,8 +69,10 @@ #include <sys/queue.h> #include <sys/mman.h> +#include <rte_errno.h> #include <rte_interrupts.h> #include <rte_log.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_per_lcore.h> #include <rte_memory.h> @@ -82,10 +84,7 @@ #include "eal_private.h" -struct pci_driver_list pci_driver_list = - TAILQ_HEAD_INITIALIZER(pci_driver_list); -struct pci_device_list pci_device_list = - TAILQ_HEAD_INITIALIZER(pci_device_list); +extern struct rte_pci_bus rte_pci_bus; #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" @@ -153,170 +152,154 @@ pci_unmap_resource(void *requested_addr, size_t size) } /* - * If vendor/device ID match, call the probe() function of the - * driver. + * Match the PCI Driver and Device using the ID Table + * + * @param pci_drv + * PCI driver from which ID table would be extracted + * @param pci_dev + * PCI device to match against the driver + * @return + * 1 for successful match + * 0 for unsuccessful match */ static int -rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) +rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev) { - int ret; const struct rte_pci_id *id_table; - for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { - + for (id_table = pci_drv->id_table; id_table->vendor_id != 0; + id_table++) { /* check if device's identifiers match the driver's ones */ - if (id_table->vendor_id != dev->id.vendor_id && + if (id_table->vendor_id != pci_dev->id.vendor_id && id_table->vendor_id != PCI_ANY_ID) continue; - if (id_table->device_id != dev->id.device_id && + if (id_table->device_id != pci_dev->id.device_id && id_table->device_id != PCI_ANY_ID) continue; - if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && - id_table->subsystem_vendor_id != PCI_ANY_ID) + if (id_table->subsystem_vendor_id != + pci_dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) continue; - if (id_table->subsystem_device_id != dev->id.subsystem_device_id && - id_table->subsystem_device_id != PCI_ANY_ID) + if (id_table->subsystem_device_id != + pci_dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) continue; - if (id_table->class_id != dev->id.class_id && + if (id_table->class_id != pci_dev->id.class_id && id_table->class_id != RTE_CLASS_ANY_ID) continue; - struct rte_pci_addr *loc = &dev->addr; - - RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, loc->function, - dev->device.numa_node); - - /* no initialization when blacklisted, return without error */ - if (dev->device.devargs != NULL && - dev->device.devargs->type == - RTE_DEVTYPE_BLACKLISTED_PCI) { - RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n"); - return 1; - } - - RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); - - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { - /* map resources for devices that use igb_uio */ - ret = rte_eal_pci_map_device(dev); - if (ret != 0) - return ret; - } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && - rte_eal_process_type() == RTE_PROC_PRIMARY) { - /* unbind current driver */ - if (pci_unbind_kernel_driver(dev) < 0) - return -1; - } - - /* reference driver structure */ - dev->driver = dr; - - /* call the driver probe() function */ - ret = dr->probe(dr, dev); - if (ret) - dev->driver = NULL; - - return ret; + return 1; } - /* return positive value if driver doesn't support this device */ - return 1; + + return 0; } /* - * If vendor/device ID match, call the remove() function of the + * If vendor/device ID match, call the probe() function of the * driver. */ static int -rte_eal_pci_detach_dev(struct rte_pci_driver *dr, - struct rte_pci_device *dev) +rte_pci_probe_one_driver(struct rte_pci_driver *dr, + struct rte_pci_device *dev) { - const struct rte_pci_id *id_table; + int ret; + struct rte_pci_addr *loc; if ((dr == NULL) || (dev == NULL)) return -EINVAL; - for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + loc = &dev->addr; - /* check if device's identifiers match the driver's ones */ - if (id_table->vendor_id != dev->id.vendor_id && - id_table->vendor_id != PCI_ANY_ID) - continue; - if (id_table->device_id != dev->id.device_id && - id_table->device_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && - id_table->subsystem_vendor_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_device_id != dev->id.subsystem_device_id && - id_table->subsystem_device_id != PCI_ANY_ID) - continue; + /* The device is not blacklisted; Check if driver supports it */ + if (!rte_pci_match(dr, dev)) { + /* Match of device and driver failed */ + RTE_LOG(DEBUG, EAL, "Driver (%s) doesn't match the device\n", + dr->driver.name); + return 1; + } - struct rte_pci_addr *loc = &dev->addr; + RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->device.numa_node); + + /* no initialization when blacklisted, return without error */ + if (dev->device.devargs != NULL && + dev->device.devargs->type == + RTE_DEVTYPE_BLACKLISTED_PCI) { + RTE_LOG(INFO, EAL, " Device is blacklisted, not" + " initializing\n"); + return 1; + } - RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, - loc->function, dev->device.numa_node); + RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); - RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { + /* map resources for devices that use igb_uio */ + ret = rte_pci_map_device(dev); + if (ret != 0) + return ret; + } - if (dr->remove && (dr->remove(dev) < 0)) - return -1; /* negative value is an error */ + /* reference driver structure */ + dev->driver = dr; + dev->device.driver = &dr->driver; - /* clear driver structure */ + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) { dev->driver = NULL; - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) - /* unmap resources for devices that use igb_uio */ - rte_eal_pci_unmap_device(dev); - - return 0; + rte_pci_unmap_device(dev); } - /* return positive value if driver doesn't support this device */ - return 1; + return ret; } /* - * If vendor/device ID match, call the probe() function of all - * registered driver for the given device. Return -1 if initialization - * failed, return 1 if no driver is found for this device. + * If vendor/device ID match, call the remove() function of the + * driver. */ static int -pci_probe_all_drivers(struct rte_pci_device *dev) +rte_pci_detach_dev(struct rte_pci_device *dev) { - struct rte_pci_driver *dr = NULL; - int rc = 0; + struct rte_pci_addr *loc; + struct rte_pci_driver *dr; if (dev == NULL) - return -1; + return -EINVAL; - /* Check if a driver is already loaded */ - if (dev->driver != NULL) - return 0; + dr = dev->driver; + loc = &dev->addr; - TAILQ_FOREACH(dr, &pci_driver_list, next) { - rc = rte_eal_pci_probe_one_driver(dr, dev); - if (rc < 0) - /* negative value is an error */ - return -1; - if (rc > 0) - /* positive value means driver doesn't support it */ - continue; - return 0; - } - return 1; + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->device.numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); + + if (dr->remove && (dr->remove(dev) < 0)) + return -1; /* negative value is an error */ + + /* clear driver structure */ + dev->driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + rte_pci_unmap_device(dev); + + return 0; } /* - * If vendor/device ID match, call the remove() function of all + * If vendor/device ID match, call the probe() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ static int -pci_detach_all_drivers(struct rte_pci_device *dev) +pci_probe_all_drivers(struct rte_pci_device *dev) { struct rte_pci_driver *dr = NULL; int rc = 0; @@ -324,8 +307,12 @@ pci_detach_all_drivers(struct rte_pci_device *dev) if (dev == NULL) return -1; - TAILQ_FOREACH(dr, &pci_driver_list, next) { - rc = rte_eal_pci_detach_dev(dr, dev); + /* Check if a driver is already loaded */ + if (dev->driver != NULL) + return 0; + + FOREACH_DRIVER_ON_PCIBUS(dr) { + rc = rte_pci_probe_one_driver(dr, dev); if (rc < 0) /* negative value is an error */ return -1; @@ -342,9 +329,10 @@ pci_detach_all_drivers(struct rte_pci_device *dev) * the driver of the devive. */ int -rte_eal_pci_probe_one(const struct rte_pci_addr *addr) +rte_pci_probe_one(const struct rte_pci_addr *addr) { struct rte_pci_device *dev = NULL; + int ret = 0; if (addr == NULL) @@ -356,7 +344,7 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr) if (pci_update_device(addr) < 0) goto err_return; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; @@ -378,7 +366,7 @@ err_return: * Detach device specified by its pci address. */ int -rte_eal_pci_detach(const struct rte_pci_addr *addr) +rte_pci_detach(const struct rte_pci_addr *addr) { struct rte_pci_device *dev = NULL; int ret = 0; @@ -386,15 +374,19 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr) if (addr == NULL) return -1; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; - ret = pci_detach_all_drivers(dev); + ret = rte_pci_detach_dev(dev); if (ret < 0) + /* negative value is an error */ goto err_return; + if (ret > 0) + /* positive value means driver doesn't support it */ + continue; - TAILQ_REMOVE(&pci_device_list, dev, next); + rte_pci_remove_device(dev); free(dev); return 0; } @@ -413,9 +405,10 @@ err_return: * for discovered devices. */ int -rte_eal_pci_probe(void) +rte_pci_probe(void) { struct rte_pci_device *dev = NULL; + size_t probed = 0, failed = 0; struct rte_devargs *devargs; int probe_all = 0; int ret = 0; @@ -423,7 +416,8 @@ rte_eal_pci_probe(void) if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0) probe_all = 1; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { + probed++; /* set devargs in PCI structure */ devargs = pci_devargs_lookup(dev); @@ -436,13 +430,17 @@ rte_eal_pci_probe(void) else if (devargs != NULL && devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) ret = pci_probe_all_drivers(dev); - if (ret < 0) - rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT + if (ret < 0) { + RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT " cannot be used\n", dev->addr.domain, dev->addr.bus, dev->addr.devid, dev->addr.function); + rte_errno = errno; + failed++; + ret = 0; + } } - return 0; + return (probed && probed == failed) ? -1 : 0; } /* dump one device */ @@ -467,27 +465,60 @@ pci_dump_one_device(FILE *f, struct rte_pci_device *dev) /* dump devices on the bus */ void -rte_eal_pci_dump(FILE *f) +rte_pci_dump(FILE *f) { struct rte_pci_device *dev = NULL; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { pci_dump_one_device(f, dev); } } /* register a driver */ void -rte_eal_pci_register(struct rte_pci_driver *driver) +rte_pci_register(struct rte_pci_driver *driver) { - TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); - rte_eal_driver_register(&driver->driver); + TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); + driver->bus = &rte_pci_bus; } /* unregister a driver */ void -rte_eal_pci_unregister(struct rte_pci_driver *driver) +rte_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); + driver->bus = NULL; +} + +/* Add a device to PCI bus */ +void +rte_pci_add_device(struct rte_pci_device *pci_dev) +{ + TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); +} + +/* Insert a device into a predefined position in PCI bus */ +void +rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev) { - rte_eal_driver_unregister(&driver->driver); - TAILQ_REMOVE(&pci_driver_list, driver, next); + TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); } + +/* Remove a device from PCI bus */ +void +rte_pci_remove_device(struct rte_pci_device *pci_dev) +{ + TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); +} + +struct rte_pci_bus rte_pci_bus = { + .bus = { + .scan = rte_pci_scan, + .probe = rte_pci_probe, + }, + .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), + .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), +}; + +RTE_REGISTER_BUS(PCI_BUS_NAME, rte_pci_bus.bus); diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c index bb08ec8b..4f698288 100644 --- a/lib/librte_eal/common/eal_common_tailqs.c +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -188,8 +188,7 @@ rte_eal_tailqs_init(void) if (t->head == NULL) { RTE_LOG(ERR, EAL, "Cannot initialize tailq: %s\n", t->name); - /* no need to TAILQ_REMOVE, we are going to panic in - * rte_eal_init() */ + /* TAILQ_REMOVE not needed, error is already fatal */ goto fail; } } diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c index 7d6e54f4..0037a641 100644 --- a/lib/librte_eal/common/eal_common_vdev.c +++ b/lib/librte_eal/common/eal_common_vdev.c @@ -37,35 +37,84 @@ #include <stdint.h> #include <sys/queue.h> +#include <rte_eal.h> +#include <rte_bus.h> #include <rte_vdev.h> #include <rte_common.h> +#include <rte_devargs.h> +#include <rte_memory.h> +/** Double linked list of virtual device drivers. */ +TAILQ_HEAD(vdev_device_list, rte_vdev_device); + +static struct vdev_device_list vdev_device_list = + TAILQ_HEAD_INITIALIZER(vdev_device_list); struct vdev_driver_list vdev_driver_list = TAILQ_HEAD_INITIALIZER(vdev_driver_list); +static void rte_vdev_bus_register(void); + /* register a driver */ void -rte_eal_vdrv_register(struct rte_vdev_driver *driver) +rte_vdev_register(struct rte_vdev_driver *driver) { + rte_vdev_bus_register(); + TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); - rte_eal_driver_register(&driver->driver); } /* unregister a driver */ void -rte_eal_vdrv_unregister(struct rte_vdev_driver *driver) +rte_vdev_unregister(struct rte_vdev_driver *driver) { - rte_eal_driver_unregister(&driver->driver); TAILQ_REMOVE(&vdev_driver_list, driver, next); } -int -rte_eal_vdev_init(const char *name, const char *args) +/* + * Parse "driver" devargs without adding a dependency on rte_kvargs.h + */ +static char *parse_driver_arg(const char *args) +{ + const char *c; + char *str; + + if (!args || args[0] == '\0') + return NULL; + + c = args; + + do { + if (strncmp(c, "driver=", 7) == 0) { + c += 7; + break; + } + + c = strchr(c, ','); + if (c) + c++; + } while (c); + + if (c) + str = strdup(c); + else + str = NULL; + + return str; +} + +static int +vdev_probe_all_drivers(struct rte_vdev_device *dev) { + const char *name; + char *drv_name; struct rte_vdev_driver *driver; + int ret = 1; - if (name == NULL) - return -EINVAL; + drv_name = parse_driver_arg(rte_vdev_device_args(dev)); + name = drv_name ? drv_name : rte_vdev_device_name(dev); + + RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name, + rte_vdev_device_name(dev)); TAILQ_FOREACH(driver, &vdev_driver_list, next) { /* @@ -75,50 +124,235 @@ rte_eal_vdev_init(const char *name, const char *args) * So use strncmp to compare. */ if (!strncmp(driver->driver.name, name, - strlen(driver->driver.name))) - return driver->probe(name, args); + strlen(driver->driver.name))) { + dev->device.driver = &driver->driver; + ret = driver->probe(dev); + if (ret) + dev->device.driver = NULL; + goto out; + } } /* Give new names precedence over aliases. */ TAILQ_FOREACH(driver, &vdev_driver_list, next) { if (driver->driver.alias && !strncmp(driver->driver.alias, name, - strlen(driver->driver.alias))) - return driver->probe(name, args); + strlen(driver->driver.alias))) { + dev->device.driver = &driver->driver; + ret = driver->probe(dev); + if (ret) + dev->device.driver = NULL; + break; + } + } + +out: + free(drv_name); + return ret; +} + +static struct rte_vdev_device * +find_vdev(const char *name) +{ + struct rte_vdev_device *dev; + + if (!name) + return NULL; + + TAILQ_FOREACH(dev, &vdev_device_list, next) { + const char *devname = rte_vdev_device_name(dev); + if (!strncmp(devname, name, strlen(name))) + return dev; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; + return NULL; +} + +static struct rte_devargs * +alloc_devargs(const char *name, const char *args) +{ + struct rte_devargs *devargs; + int ret; + + devargs = calloc(1, sizeof(*devargs)); + if (!devargs) + return NULL; + + devargs->type = RTE_DEVTYPE_VIRTUAL; + if (args) + devargs->args = strdup(args); + + ret = snprintf(devargs->virt.drv_name, + sizeof(devargs->virt.drv_name), "%s", name); + if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) { + free(devargs->args); + free(devargs); + return NULL; + } + + return devargs; } int -rte_eal_vdev_uninit(const char *name) +rte_vdev_init(const char *name, const char *args) { - struct rte_vdev_driver *driver; + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + int ret; if (name == NULL) return -EINVAL; - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "net_pcap", but "name" will be "net_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->driver.name, name, - strlen(driver->driver.name))) - return driver->remove(name); + dev = find_vdev(name); + if (dev) + return -EEXIST; + + devargs = alloc_devargs(name, args); + if (!devargs) + return -ENOMEM; + + dev = calloc(1, sizeof(*dev)); + if (!dev) { + ret = -ENOMEM; + goto fail; } - /* Give new names precedence over aliases. */ - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - if (driver->driver.alias && - !strncmp(driver->driver.alias, name, - strlen(driver->driver.alias))) - return driver->remove(name); + dev->device.devargs = devargs; + dev->device.numa_node = SOCKET_ID_ANY; + dev->device.name = devargs->virt.drv_name; + + ret = vdev_probe_all_drivers(dev); + if (ret) { + if (ret > 0) + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + goto fail; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; + TAILQ_INSERT_TAIL(&devargs_list, devargs, next); + + TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); + return 0; + +fail: + free(devargs->args); + free(devargs); + free(dev); + return ret; +} + +static int +vdev_remove_driver(struct rte_vdev_device *dev) +{ + const char *name = rte_vdev_device_name(dev); + const struct rte_vdev_driver *driver; + + if (!dev->device.driver) { + RTE_LOG(DEBUG, EAL, "no driver attach to device %s\n", name); + return 1; + } + + driver = container_of(dev->device.driver, const struct rte_vdev_driver, + driver); + return driver->remove(dev); +} + +int +rte_vdev_uninit(const char *name) +{ + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + int ret; + + if (name == NULL) + return -EINVAL; + + dev = find_vdev(name); + if (!dev) + return -ENOENT; + + devargs = dev->device.devargs; + + ret = vdev_remove_driver(dev); + if (ret) + return ret; + + TAILQ_REMOVE(&vdev_device_list, dev, next); + + TAILQ_REMOVE(&devargs_list, devargs, next); + + free(devargs->args); + free(devargs); + free(dev); + return 0; +} + +static int +vdev_scan(void) +{ + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + + /* for virtual devices we scan the devargs_list populated via cmdline */ + + TAILQ_FOREACH(devargs, &devargs_list, next) { + + if (devargs->type != RTE_DEVTYPE_VIRTUAL) + continue; + + dev = find_vdev(devargs->virt.drv_name); + if (dev) + continue; + + dev = calloc(1, sizeof(*dev)); + if (!dev) + return -1; + + dev->device.devargs = devargs; + dev->device.numa_node = SOCKET_ID_ANY; + dev->device.name = devargs->virt.drv_name; + + TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); + } + + return 0; +} + +static int +vdev_probe(void) +{ + struct rte_vdev_device *dev; + + /* call the init function for each virtual device */ + TAILQ_FOREACH(dev, &vdev_device_list, next) { + + if (dev->device.driver) + continue; + + if (vdev_probe_all_drivers(dev)) { + RTE_LOG(ERR, EAL, "failed to initialize %s device\n", + rte_vdev_device_name(dev)); + return -1; + } + } + + return 0; +} + +static struct rte_bus rte_vdev_bus = { + .scan = vdev_scan, + .probe = vdev_probe, +}; + +RTE_INIT(rte_vdev_bus_register); + +static void rte_vdev_bus_register(void) +{ + static int registered; + + if (registered) + return; + + registered = 1; + rte_vdev_bus.name = RTE_STR(virtual); + rte_bus_register(&rte_vdev_bus); } diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367eb..7b7e8c88 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -78,7 +78,6 @@ struct internal_config { volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ volatile int syslog_facility; /**< facility passed to openlog() */ - volatile uint32_t log_level; /**< default log level */ /** default interrupt mode for VFIO */ volatile enum rte_intr_mode vfio_intr_mode; const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 9e7d8f6b..6cacce07 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -34,6 +34,7 @@ #ifndef _EAL_PRIVATE_H_ #define _EAL_PRIVATE_H_ +#include <stdbool.h> #include <stdio.h> #include <rte_pci.h> @@ -108,18 +109,43 @@ int rte_eal_timer_init(void); */ int rte_eal_log_init(const char *id, int facility); +struct rte_pci_driver; +struct rte_pci_device; + /** - * Init the PCI infrastructure - * - * This function is private to EAL. + * Add a PCI device to the PCI Bus (append to PCI Device list). This function + * also updates the bus references of the PCI Device (and the generic device + * object embedded within. * - * @return - * 0 on success, negative on error + * @param pci_dev + * PCI device to add + * @return void */ -int rte_eal_pci_init(void); +void rte_pci_add_device(struct rte_pci_device *pci_dev); -struct rte_pci_driver; -struct rte_pci_device; +/** + * Insert a PCI device in the PCI Bus at a particular location in the device + * list. It also updates the PCI Bus reference of the new devices to be + * inserted. + * + * @param exist_pci_dev + * Existing PCI device in PCI Bus + * @param new_pci_dev + * PCI device to be added before exist_pci_dev + * @return void + */ +void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev); + +/** + * Remove a PCI device from the PCI Bus. This sets to NULL the bus references + * in the PCI device object as well as the generic device object. + * + * @param pci_device + * PCI device to be removed from PCI Bus + * @return void + */ +void rte_pci_remove_device(struct rte_pci_device *pci_device); /** * Update a pci device object by asking the kernel for the latest information. @@ -301,4 +327,15 @@ int rte_eal_hugepage_init(void); */ int rte_eal_hugepage_attach(void); +/** + * Returns true if the system is able to obtain + * physical addresses. Return false if using DMA + * addresses through an IOMMU. + * + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. + */ +bool rte_eal_using_phys_addrs(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h index 454a12b0..f3f3b6e3 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h @@ -39,10 +39,4 @@ #include <rte_atomic_32.h> #endif -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_wmb() - -#define rte_smp_rmb() rte_rmb() - #endif /* _RTE_ATOMIC_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h index 9ae1e78b..14c04864 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h @@ -67,6 +67,18 @@ extern "C" { */ #define rte_rmb() __sync_synchronize() +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_wmb() + +#define rte_smp_rmb() rte_rmb() + +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_wmb() + +#define rte_io_rmb() rte_rmb() + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h index 671caa76..dc3a0f3b 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h @@ -43,43 +43,26 @@ extern "C" { #include "generic/rte_atomic.h" -#define dmb(opt) do { asm volatile("dmb " #opt : : : "memory"); } while (0) +#define dsb(opt) { asm volatile("dsb " #opt : : : "memory"); } +#define dmb(opt) { asm volatile("dmb " #opt : : : "memory"); } -/** - * General memory barrier. - * - * Guarantees that the LOAD and STORE operations generated before the - * barrier occur before the LOAD and STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_mb(void) -{ - dmb(ish); -} +#define rte_mb() dsb(sy) -/** - * Write memory barrier. - * - * Guarantees that the STORE operations generated before the barrier - * occur before the STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_wmb(void) -{ - dmb(ishst); -} +#define rte_wmb() dsb(st) -/** - * Read memory barrier. - * - * Guarantees that the LOAD operations generated before the barrier - * occur before the LOAD operations generated after. - * This function is architecture dependent. - */ -static inline void rte_rmb(void) -{ - dmb(ishld); -} +#define rte_rmb() dsb(ld) + +#define rte_smp_mb() dmb(ish) + +#define rte_smp_wmb() dmb(ishst) + +#define rte_smp_rmb() dmb(ishld) + +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_wmb() + +#define rte_io_rmb() rte_rmb() #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_io.h index 1849b520..9593b424 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h +++ b/lib/librte_eal/common/include/arch/arm/rte_io.h @@ -1,7 +1,8 @@ /* * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Cavium networks nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,26 +29,23 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_CPUFLAGS_TILE_H_ -#define _RTE_CPUFLAGS_TILE_H_ +#ifndef _RTE_IO_ARM_H_ +#define _RTE_IO_ARM_H_ #ifdef __cplusplus extern "C" { #endif -/** - * Enumeration of all CPU features supported - */ -enum rte_cpu_flag_t { - RTE_CPUFLAG_NUMFLAGS /**< This should always be the last! */ -}; - -#include "generic/rte_cpuflags.h" +#ifdef RTE_ARCH_64 +#include "rte_io_64.h" +#else +#include "generic/rte_io.h" +#endif #ifdef __cplusplus } #endif -#endif /* _RTE_CPUFLAGS_TILE_H_ */ +#endif /* _RTE_IO_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_io_64.h b/lib/librte_eal/common/include/arch/arm/rte_io_64.h new file mode 100644 index 00000000..0402125b --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_io_64.h @@ -0,0 +1,199 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_ARM64_H_ +#define _RTE_IO_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +#define RTE_OVERRIDE_IO_H + +#include "generic/rte_io.h" +#include "rte_atomic_64.h" + +static inline uint8_t __attribute__((always_inline)) +rte_read8_relaxed(const volatile void *addr) +{ + uint8_t val; + + asm volatile( + "ldrb %w[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16_relaxed(const volatile void *addr) +{ + uint16_t val; + + asm volatile( + "ldrh %w[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32_relaxed(const volatile void *addr) +{ + uint32_t val; + + asm volatile( + "ldr %w[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64_relaxed(const volatile void *addr) +{ + uint64_t val; + + asm volatile( + "ldr %x[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8_relaxed(uint8_t val, volatile void *addr) +{ + asm volatile( + "strb %w[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline void __attribute__((always_inline)) +rte_write16_relaxed(uint16_t val, volatile void *addr) +{ + asm volatile( + "strh %w[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline void __attribute__((always_inline)) +rte_write32_relaxed(uint32_t val, volatile void *addr) +{ + asm volatile( + "str %w[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline void __attribute__((always_inline)) +rte_write64_relaxed(uint64_t val, volatile void *addr) +{ + asm volatile( + "str %x[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline uint8_t __attribute__((always_inline)) +rte_read8(const volatile void *addr) +{ + uint8_t val; + val = rte_read8_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16(const volatile void *addr) +{ + uint16_t val; + val = rte_read16_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32(const volatile void *addr) +{ + uint32_t val; + val = rte_read32_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64(const volatile void *addr) +{ + uint64_t val; + val = rte_read64_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8(uint8_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write8_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write16(uint16_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write16_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write32(uint32_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write32_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write64(uint64_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write64_relaxed(value, addr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IO_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index b86c2cf5..4107c998 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -34,6 +34,7 @@ #define _RTE_VECT_ARM_H_ #include <stdint.h> +#include "generic/rte_vect.h" #include "arm_neon.h" #ifdef __cplusplus diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index fb4fccb4..150810cd 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -87,6 +87,12 @@ extern "C" { #define rte_smp_rmb() rte_rmb() +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_wmb() + +#define rte_io_rmb() rte_rmb() + /*------------------------- 16 bit atomic operations -------------------------*/ /* To be compatible with Power7, use GCC built-in functions for 16 bit * operations */ diff --git a/lib/librte_eal/common/arch/tile/rte_cpuflags.c b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h index a2b6c51a..be192da7 100644 --- a/lib/librte_eal/common/arch/tile/rte_cpuflags.c +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h @@ -1,7 +1,8 @@ /* * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Cavium networks nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,20 +29,19 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#include "rte_cpuflags.h" +#ifndef _RTE_IO_PPC_64_H_ +#define _RTE_IO_PPC_64_H_ -#include <errno.h> +#ifdef __cplusplus +extern "C" { +#endif -const struct feature_entry rte_cpu_feature_table[] = { -}; +#include "generic/rte_io.h" -/* - * Checks if a particular flag is available on current machine. - */ -int -rte_cpu_get_flag_enabled(__attribute__((unused)) enum rte_cpu_flag_t feature) -{ - return -ENOENT; +#ifdef __cplusplus } +#endif + +#endif /* _RTE_IO_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h index 05209e52..99586e58 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h @@ -34,6 +34,7 @@ #define _RTE_VECT_PPC_64_H_ #include <altivec.h> +#include "generic/rte_vect.h" #ifdef __cplusplus extern "C" { diff --git a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h deleted file mode 100644 index 7239e437..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_BYTEORDER_TILE_H_ -#define _RTE_BYTEORDER_TILE_H_ - -#ifndef RTE_FORCE_INTRINSICS -# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_byteorder.h" - -#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) -#define rte_bswap16(x) rte_constant_bswap16(x) -#endif - -#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN - -#define rte_cpu_to_le_16(x) (x) -#define rte_cpu_to_le_32(x) (x) -#define rte_cpu_to_le_64(x) (x) - -#define rte_cpu_to_be_16(x) rte_bswap16(x) -#define rte_cpu_to_be_32(x) rte_bswap32(x) -#define rte_cpu_to_be_64(x) rte_bswap64(x) - -#define rte_le_to_cpu_16(x) (x) -#define rte_le_to_cpu_32(x) (x) -#define rte_le_to_cpu_64(x) (x) - -#define rte_be_to_cpu_16(x) rte_bswap16(x) -#define rte_be_to_cpu_32(x) rte_bswap32(x) -#define rte_be_to_cpu_64(x) rte_bswap64(x) - -#else /* RTE_BIG_ENDIAN */ - -#define rte_cpu_to_le_16(x) rte_bswap16(x) -#define rte_cpu_to_le_32(x) rte_bswap32(x) -#define rte_cpu_to_le_64(x) rte_bswap64(x) - -#define rte_cpu_to_be_16(x) (x) -#define rte_cpu_to_be_32(x) (x) -#define rte_cpu_to_be_64(x) (x) - -#define rte_le_to_cpu_16(x) rte_bswap16(x) -#define rte_le_to_cpu_32(x) rte_bswap32(x) -#define rte_le_to_cpu_64(x) rte_bswap64(x) - -#define rte_be_to_cpu_16(x) (x) -#define rte_be_to_cpu_32(x) (x) -#define rte_be_to_cpu_64(x) (x) -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_BYTEORDER_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h deleted file mode 100644 index e606957c..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_MEMCPY_TILE_H_ -#define _RTE_MEMCPY_TILE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdint.h> -#include <string.h> - -#include "generic/rte_memcpy.h" - -static inline void -rte_mov16(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 16); -} - -static inline void -rte_mov32(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 32); -} - -static inline void -rte_mov48(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 48); -} - -static inline void -rte_mov64(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 64); -} - -static inline void -rte_mov128(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 128); -} - -static inline void -rte_mov256(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 256); -} - -#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_MEMCPY_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h deleted file mode 100644 index 7a1bb93e..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_PREFETCH_TILE_H_ -#define _RTE_PREFETCH_TILE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_prefetch.h" - -static inline void rte_prefetch0(const volatile void *p) -{ - __builtin_prefetch((const void *)(uintptr_t)p, 0, 3); -} - -static inline void rte_prefetch1(const volatile void *p) -{ - __builtin_prefetch((const void *)(uintptr_t)p, 0, 2); -} - -static inline void rte_prefetch2(const volatile void *p) -{ - __builtin_prefetch((const void *)(uintptr_t)p, 0, 1); -} - -static inline void rte_prefetch_non_temporal(const volatile void *p) -{ - /* non-temporal version not available, fallback to rte_prefetch0 */ - rte_prefetch0(p); -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_PREFETCH_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h index 00b1cdf5..4eac6663 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -61,6 +61,12 @@ extern "C" { #define rte_smp_rmb() rte_compiler_barrier() +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_compiler_barrier() + +#define rte_io_rmb() rte_compiler_barrier() + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/lib/librte_eal/common/include/arch/tile/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_io.h index 0b2200a3..c8d14043 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_cycles.h +++ b/lib/librte_eal/common/include/arch/x86/rte_io.h @@ -1,7 +1,8 @@ /* * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Cavium networks nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,43 +29,19 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_CYCLES_TILE_H_ -#define _RTE_CYCLES_TILE_H_ +#ifndef _RTE_IO_X86_H_ +#define _RTE_IO_X86_H_ #ifdef __cplusplus extern "C" { #endif -#include <arch/cycle.h> - -#include "generic/rte_cycles.h" - -/** - * Read the time base register. - * - * @return - * The time base for this lcore. - */ -static inline uint64_t -rte_rdtsc(void) -{ - return get_cycle_count(); -} - -static inline uint64_t -rte_rdtsc_precise(void) -{ - rte_mb(); - return rte_rdtsc(); -} - -static inline uint64_t -rte_get_tsc_cycles(void) { return rte_rdtsc(); } +#include "generic/rte_io.h" #ifdef __cplusplus } #endif -#endif /* _RTE_CYCLES_TILE_H_ */ +#endif /* _RTE_IO_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index b3bfc235..b9785e85 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -69,6 +69,8 @@ rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); #ifdef RTE_MACHINE_CPUFLAG_AVX512F +#define ALIGNMENT_MASK 0x3F + /** * AVX512 implementation below */ @@ -189,7 +191,7 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -308,6 +310,8 @@ COPY_BLOCK_128_BACK63: #elif defined RTE_MACHINE_CPUFLAG_AVX2 +#define ALIGNMENT_MASK 0x1F + /** * AVX2 implementation below */ @@ -387,7 +391,7 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -499,6 +503,8 @@ COPY_BLOCK_128_BACK31: #else /* RTE_MACHINE_CPUFLAG */ +#define ALIGNMENT_MASK 0x0F + /** * SSE & AVX implementation below */ @@ -677,7 +683,7 @@ __extension__ ({ \ }) static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; uintptr_t dstu = (uintptr_t)dst; @@ -821,6 +827,75 @@ COPY_BLOCK_64_BACK15: #endif /* RTE_MACHINE_CPUFLAG */ +static inline void * +rte_memcpy_aligned(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* Copy size <= 16 bytes */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + src = (const uint8_t *)src + 1; + dst = (uint8_t *)dst + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + src = (const uint16_t *)src + 1; + dst = (uint16_t *)dst + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + src = (const uint32_t *)src + 1; + dst = (uint32_t *)dst + 1; + } + if (n & 0x08) + *(uint64_t *)dst = *(const uint64_t *)src; + + return ret; + } + + /* Copy 16 <= size <= 32 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + + return ret; + } + + /* Copy 32 < size <= 64 bytes */ + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + + return ret; + } + + /* Copy 64 bytes blocks */ + for (; n >= 64; n -= 64) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; + } + + /* Copy whatever left */ + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + + return ret; +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK)) + return rte_memcpy_aligned(dst, src, n); + else + return rte_memcpy_generic(dst, src, n); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h index 77f2e253..1b4b85dd 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -31,8 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RTE_VECT_H_ -#define _RTE_VECT_H_ +#ifndef _RTE_VECT_X86_H_ +#define _RTE_VECT_X86_H_ /** * @file @@ -41,6 +41,7 @@ */ #include <stdint.h> +#include "generic/rte_vect.h" #if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) @@ -133,4 +134,4 @@ __extension__ ({ \ } #endif -#endif /* _RTE_VECT_H_ */ +#endif /* _RTE_VECT_X86_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h index 43a704ec..7b81705b 100644 --- a/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -100,6 +100,33 @@ static inline void rte_smp_wmb(void); */ static inline void rte_smp_rmb(void); +/** + * General memory barrier for I/O device + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_io_mb() call are visible to I/O device or CPU before the + * LOAD and STORE operations that follow it. + */ +static inline void rte_io_mb(void); + +/** + * Write memory barrier for I/O device + * + * Guarantees that the STORE operations that precede the + * rte_io_wmb() call are visible to I/O device before the STORE + * operations that follow it. + */ +static inline void rte_io_wmb(void); + +/** + * Read memory barrier for IO device + * + * Guarantees that the LOAD operations on I/O device that precede the + * rte_io_rmb() call are visible to CPU before the LOAD + * operations that follow it. + */ +static inline void rte_io_rmb(void); + #endif /* __DOXYGEN__ */ /** diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h index 71321f32..c1c5551f 100644 --- a/lib/librte_eal/common/include/generic/rte_cpuflags.h +++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -39,6 +39,7 @@ * Architecture specific API to determine available CPU features at runtime. */ +#include "rte_common.h" #include <errno.h> /** @@ -79,7 +80,17 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); * that were specified at compile time. It is called automatically within the * EAL, so does not need to be used by applications. */ +__rte_deprecated void rte_cpu_check_supported(void); +/** + * This function checks that the currently used CPU supports the CPU features + * that were specified at compile time. It is called automatically within the + * EAL, so does not need to be used by applications. This version returns a + * result so that decisions may be made (for instance, graceful shutdowns). + */ +int +rte_cpu_is_supported(void); + #endif /* _RTE_CPUFLAGS_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h index 00103ca9..0e645c2c 100644 --- a/lib/librte_eal/common/include/generic/rte_cycles.h +++ b/lib/librte_eal/common/include/generic/rte_cycles.h @@ -150,15 +150,17 @@ int rte_eal_hpet_init(int make_default); static inline uint64_t rte_get_timer_cycles(void) { +#ifdef RTE_LIBEAL_USE_HPET switch(eal_timer_source) { case EAL_TIMER_TSC: +#endif return rte_get_tsc_cycles(); - case EAL_TIMER_HPET: #ifdef RTE_LIBEAL_USE_HPET + case EAL_TIMER_HPET: return rte_get_hpet_cycles(); -#endif default: rte_panic("Invalid timer source specified\n"); } +#endif } /** @@ -170,15 +172,17 @@ rte_get_timer_cycles(void) static inline uint64_t rte_get_timer_hz(void) { +#ifdef RTE_LIBEAL_USE_HPET switch(eal_timer_source) { case EAL_TIMER_TSC: +#endif return rte_get_tsc_hz(); - case EAL_TIMER_HPET: #ifdef RTE_LIBEAL_USE_HPET + case EAL_TIMER_HPET: return rte_get_hpet_hz(); -#endif default: rte_panic("Invalid timer source specified\n"); } +#endif } /** * Wait at least us microseconds. diff --git a/lib/librte_eal/common/include/generic/rte_io.h b/lib/librte_eal/common/include/generic/rte_io.h new file mode 100644 index 00000000..d82ee695 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_io.h @@ -0,0 +1,381 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_H_ +#define _RTE_IO_H_ + +#include <rte_atomic.h> + +/** + * @file + * I/O device memory operations + * + * This file defines the generic API for I/O device memory read/write operations + */ + +#include <stdint.h> +#include <rte_common.h> +#include <rte_atomic.h> + +#ifdef __DOXYGEN__ + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8_relaxed(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16_relaxed(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32_relaxed(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64_relaxed(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8_relaxed(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16_relaxed(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32_relaxed(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64_relaxed(uint64_t value, volatile void *addr); + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64(uint64_t value, volatile void *addr); + +#endif /* __DOXYGEN__ */ + +#ifndef RTE_OVERRIDE_IO_H + +static inline uint8_t __attribute__((always_inline)) +rte_read8_relaxed(const volatile void *addr) +{ + return *(const volatile uint8_t *)addr; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16_relaxed(const volatile void *addr) +{ + return *(const volatile uint16_t *)addr; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32_relaxed(const volatile void *addr) +{ + return *(const volatile uint32_t *)addr; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64_relaxed(const volatile void *addr) +{ + return *(const volatile uint64_t *)addr; +} + +static inline void __attribute__((always_inline)) +rte_write8_relaxed(uint8_t value, volatile void *addr) +{ + *(volatile uint8_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write16_relaxed(uint16_t value, volatile void *addr) +{ + *(volatile uint16_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write32_relaxed(uint32_t value, volatile void *addr) +{ + *(volatile uint32_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write64_relaxed(uint64_t value, volatile void *addr) +{ + *(volatile uint64_t *)addr = value; +} + +static inline uint8_t __attribute__((always_inline)) +rte_read8(const volatile void *addr) +{ + uint8_t val; + val = rte_read8_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16(const volatile void *addr) +{ + uint16_t val; + val = rte_read16_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32(const volatile void *addr) +{ + uint32_t val; + val = rte_read32_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64(const volatile void *addr) +{ + uint64_t val; + val = rte_read64_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8(uint8_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write8_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write16(uint16_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write16_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write32(uint32_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write32_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write64(uint64_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write64_relaxed(value, addr); +} + +#endif /* RTE_OVERRIDE_IO_H */ + +#endif /* _RTE_IO_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_vect.h b/lib/librte_eal/common/include/generic/rte_vect.h new file mode 100644 index 00000000..600ee9f3 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_vect.h @@ -0,0 +1,214 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * SIMD vector types + * + * This file defines types to use vector instructions with generic C code. + */ + +#include <stdint.h> + +/* Unsigned vector types */ + +/** + * 64 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v64u16_t){ a0, a1, a2, a3 } + */ +typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v64u32_t){ a0, a1 } + */ +typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v128u32_t){ a0, a1, a2, a3, a4 } + */ +typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v128u64_t){ a0, a1 } + */ +typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v256u64_t){ a0, a1, a2, a3 } + */ +typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32))); + + +/* Signed vector types */ + +/** + * 64 bits vector size to use with 8 bits elements. + * + * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 16 bits elements. + * + * a = (rte_v64s16_t){ a0, a1, a2, a3 } + */ +typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 32 bits elements. + * + * a = (rte_v64s32_t){ a0, a1 } + */ +typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with 8 bits elements. + * + * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 16 bits elements. + * + * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 32 bits elements. + * + * a = (rte_v128s32_t){ a0, a1, a2, a3 } + */ +typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 64 bits elements. + * + * a = (rte_v128s64_t){ a1, a2 } + */ +typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with 8 bits elements. + * + * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 16 bits elements. + * + * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 32 bits elements. + * + * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 64 bits elements. + * + * a = (rte_v256s64_t){ a0, a1, a2, a3 } + */ +typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32))); + +#endif /* _RTE_VECT_H_ */ diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h new file mode 100644 index 00000000..7c369692 --- /dev/null +++ b/lib/librte_eal/common/include/rte_bus.h @@ -0,0 +1,158 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BUS_H_ +#define _RTE_BUS_H_ + +/** + * @file + * + * DPDK device bus interface + * + * This file exposes API and interfaces for bus abstraction + * over the devices and drivers in EAL. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_dev.h> + +/** Double linked list of buses */ +TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * Bus specific scan for devices attached on the bus. + * For each bus object, the scan would be reponsible for finding devices and + * adding them to its private device list. + * + * A bus should mandatorily implement this method. + * + * @return + * 0 for successful scan + * <0 for unsuccessful scan with error value + */ +typedef int (*rte_bus_scan_t)(void); + +/** + * Implementation specific probe function which is responsible for linking + * devices on that bus with applicable drivers. + * + * This is called while iterating over each registered bus. + * + * @return + * 0 for successful probe + * !0 for any error while probing + */ +typedef int (*rte_bus_probe_t)(void); + +/** + * A structure describing a generic bus. + */ +struct rte_bus { + TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */ + const char *name; /**< Name of the bus */ + rte_bus_scan_t scan; /**< Scan for devices attached to bus */ + rte_bus_probe_t probe; /**< Probe devices on bus */ +}; + +/** + * Register a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be registered. + */ +void rte_bus_register(struct rte_bus *bus); + +/** + * Unregister a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be unregistered. + */ +void rte_bus_unregister(struct rte_bus *bus); + +/** + * Scan all the buses. + * + * @return + * 0 in case of success in scanning all buses + * !0 in case of failure to scan + */ +int rte_bus_scan(void); + +/** + * For each device on the buses, perform a driver 'match' and call the + * driver-specific probe for device initialization. + * + * @return + * 0 for successful match/probe + * !0 otherwise + */ +int rte_bus_probe(void); + +/** + * Dump information of all the buses registered with EAL. + * + * @param f + * A valid and open output stream handle + * + * @return + * 0 in case of success + * !0 in case there is error in opening the output stream + */ +void rte_bus_dump(FILE *f); + +/** + * Helper for Bus registration. + * The constructor has higher priority than PMD constructors. + */ +#define RTE_REGISTER_BUS(nm, bus) \ +static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \ +{\ + (bus).name = RTE_STR(nm);\ + rte_bus_register(&bus); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BUS_H */ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index db5ac91c..e057f6e2 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -331,6 +331,29 @@ rte_bsf32(uint32_t v) #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #endif +/** + * Return pointer to the wrapping struct instance. + * + * Example: + * + * struct wrapper { + * ... + * struct child c; + * ... + * }; + * + * struct child *x = obtain(...); + * struct wrapper *w = container_of(x, struct wrapper, c); + */ +#ifndef container_of +#define container_of(ptr, type, member) __extension__ ({ \ + const typeof(((type *)0)->member) *_ptr = (ptr); \ + __attribute__((unused)) type *_target_ptr = \ + (type *)(ptr); \ + (type *)(((uintptr_t)_ptr) - offsetof(type, member)); \ + }) +#endif + #define _RTE_STR(x) #x /** Take a macro value and get a string version of it */ #define RTE_STR(x) _RTE_STR(x) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 8840380d..de20c063 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -49,6 +49,7 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> +#include <rte_config.h> #include <rte_log.h> __attribute__((format(printf, 2, 0))) @@ -70,6 +71,19 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); } +/* + * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the + * RTE_*_RET() macros defined below is compiled in debug mode. + */ +#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \ + defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \ + defined(RTE_LIBRTE_EVENTDEV_DEBUG) +#define RTE_PMD_DEBUG_TRACE(...) \ + rte_pmd_debug_trace(__func__, __VA_ARGS__) +#else +#define RTE_PMD_DEBUG_TRACE(...) (void)0 +#endif + /* Macros for checking for restricting functions to primary instance only */ #define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ @@ -109,40 +123,6 @@ struct rte_mem_resource { void *addr; /**< Virtual address, NULL when not mapped. */ }; -/** Double linked list of device drivers. */ -TAILQ_HEAD(rte_driver_list, rte_driver); -/** Double linked list of devices. */ -TAILQ_HEAD(rte_device_list, rte_device); - -/* Forward declaration */ -struct rte_driver; - -/** - * A structure describing a generic device. - */ -struct rte_device { - TAILQ_ENTRY(rte_device) next; /**< Next device */ - struct rte_driver *driver; /**< Associated driver */ - int numa_node; /**< NUMA node connection */ - struct rte_devargs *devargs; /**< Device user arguments */ -}; - -/** - * Insert a device detected by a bus scanning. - * - * @param dev - * A pointer to a rte_device structure describing the detected device. - */ -void rte_eal_device_insert(struct rte_device *dev); - -/** - * Remove a device (e.g. when being unplugged). - * - * @param dev - * A pointer to a rte_device structure describing the device to be removed. - */ -void rte_eal_device_remove(struct rte_device *dev); - /** * A structure describing a device driver. */ @@ -153,27 +133,15 @@ struct rte_driver { }; /** - * Register a device driver. - * - * @param driver - * A pointer to a rte_dev structure describing the driver - * to be registered. - */ -void rte_eal_driver_register(struct rte_driver *driver); - -/** - * Unregister a device driver. - * - * @param driver - * A pointer to a rte_dev structure describing the driver - * to be unregistered. - */ -void rte_eal_driver_unregister(struct rte_driver *driver); - -/** - * Initalize all the registered drivers in this process + * A structure describing a generic device. */ -int rte_eal_dev_init(void); +struct rte_device { + TAILQ_ENTRY(rte_device) next; /**< Next device */ + const char *name; /**< Device name */ + const struct rte_driver *driver;/**< Associated driver */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ +}; /** * Initialize a driver specified by name. @@ -185,7 +153,7 @@ int rte_eal_dev_init(void); * @return * 0 on success, negative on error */ -int rte_eal_vdev_init(const char *name, const char *args); +int rte_vdev_init(const char *name, const char *args); /** * Uninitalize a driver specified by name. @@ -195,7 +163,7 @@ int rte_eal_vdev_init(const char *name, const char *args); * @return * 0 on success, negative on error */ -int rte_eal_vdev_uninit(const char *name); +int rte_vdev_uninit(const char *name); /** * Attach a device to a registered driver. @@ -239,6 +207,31 @@ RTE_STR(table) static const char DRV_EXP_TAG(name, param_string_export)[] \ __attribute__((used)) = str +/** + * Advertise the list of kernel modules required to run this driver + * + * This string lists the kernel modules required for the devices + * associated to a PMD. The format of each line of the string is: + * "<device-pattern> <kmod-expression>". + * + * The possible formats for the device pattern are: + * "*" all devices supported by this driver + * "pci:*" all PCI devices supported by this driver + * "pci:v8086:d*:sv*:sd*" all PCI devices supported by this driver + * whose vendor id is 0x8086. + * + * The format of the kernel modules list is a parenthesed expression + * containing logical-and (&) and logical-or (|). + * + * The device pattern and the kmod expression are separated by a space. + * + * Example: + * - "* igb_uio | uio_pci_generic | vfio" + */ +#define RTE_PMD_REGISTER_KMOD_DEP(name, str) \ +static const char DRV_EXP_TAG(name, kmod_dep_export)[] \ +__attribute__((used)) = str + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index d150b9dd..abf020bf 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -146,15 +146,45 @@ int rte_eal_iopl_init(void); * This behavior may change in the future. * * @param argc - * The argc argument that was given to the main() function. + * A non-negative value. If it is greater than 0, the array members + * for argv[0] through argv[argc] (non-inclusive) shall contain pointers + * to strings. * @param argv - * The argv argument that was given to the main() function. + * An array of strings. The contents of the array, as well as the strings + * which are pointed to by the array, may be modified by this function. * @return * - On success, the number of parsed arguments, which is greater or * equal to zero. After the call to rte_eal_init(), - * all arguments argv[x] with x < ret may be modified and should - * not be accessed by the application. - * - On failure, a negative error value. + * all arguments argv[x] with x < ret may have been modified by this + * function call and should not be further interpreted by the + * application. The EAL does not take any ownership of the memory used + * for either the argv array, or its members. + * - On failure, -1 and rte_errno is set to a value indicating the cause + * for failure. In some instances, the application will need to be + * restarted as part of clearing the issue. + * + * Error codes returned via rte_errno: + * EACCES indicates a permissions issue. + * + * EAGAIN indicates either a bus or system resource was not available, + * setup may be attempted again. + * + * EALREADY indicates that the rte_eal_init function has already been + * called, and cannot be called again. + * + * EFAULT indicates the tailq configuration name was not found in + * memory configuration. + * + * EINVAL indicates invalid parameters were passed as argv/argc. + * + * ENOMEM indicates failure likely caused by an out-of-memory condition. + * + * ENODEV indicates memory setup issues. + * + * ENOTSUP indicates that the EAL cannot initialize on this system. + * + * EPROTO indicates that the PCI bus is either not present, or is not + * readable by the eal. */ int rte_eal_init(int argc, char **argv); diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index fd3c6eff..5d06ed79 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -51,8 +51,7 @@ extern "C" { struct rte_intr_handle; /** Function to be registered for the specific interrupt */ -typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, - void *cb_arg); +typedef void (*rte_intr_callback_fn)(void *cb_arg); #include <exec-env/rte_interrupts.h> @@ -70,7 +69,7 @@ typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_callback_register(struct rte_intr_handle *intr_handle, +int rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -88,7 +87,7 @@ int rte_intr_callback_register(struct rte_intr_handle *intr_handle, * - On success, return the number of callback entities removed. * - On failure, a negative value. */ -int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -101,7 +100,7 @@ int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_enable(struct rte_intr_handle *intr_handle); +int rte_intr_enable(const struct rte_intr_handle *intr_handle); /** * It disables the interrupt for the specified handle. @@ -113,7 +112,7 @@ int rte_intr_enable(struct rte_intr_handle *intr_handle); * - On success, zero. * - On failure, a negative value. */ -int rte_intr_disable(struct rte_intr_handle *intr_handle); +int rte_intr_disable(const struct rte_intr_handle *intr_handle); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index 29f7d192..34191385 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,45 +50,56 @@ extern "C" { #include <stdio.h> #include <stdarg.h> +#include <rte_common.h> + +struct rte_log_dynamic_type; + /** The rte_log structure. */ struct rte_logs { uint32_t type; /**< Bitfield with enabled logs. */ uint32_t level; /**< Log level. */ FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */ + size_t dynamic_types_len; + struct rte_log_dynamic_type *dynamic_types; }; /** Global log informations */ extern struct rte_logs rte_logs; /* SDK log type */ -#define RTE_LOGTYPE_EAL 0x00000001 /**< Log related to eal. */ -#define RTE_LOGTYPE_MALLOC 0x00000002 /**< Log related to malloc. */ -#define RTE_LOGTYPE_RING 0x00000004 /**< Log related to ring. */ -#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */ -#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */ -#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */ -#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */ -#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */ -#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */ -#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */ -#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */ -#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */ -#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */ -#define RTE_LOGTYPE_PORT 0x00002000 /**< Log related to port. */ -#define RTE_LOGTYPE_TABLE 0x00004000 /**< Log related to table. */ -#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ -#define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ -#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ +#define RTE_LOGTYPE_EAL 0 /**< Log related to eal. */ +#define RTE_LOGTYPE_MALLOC 1 /**< Log related to malloc. */ +#define RTE_LOGTYPE_RING 2 /**< Log related to ring. */ +#define RTE_LOGTYPE_MEMPOOL 3 /**< Log related to mempool. */ +#define RTE_LOGTYPE_TIMER 4 /**< Log related to timers. */ +#define RTE_LOGTYPE_PMD 5 /**< Log related to poll mode driver. */ +#define RTE_LOGTYPE_HASH 6 /**< Log related to hash table. */ +#define RTE_LOGTYPE_LPM 7 /**< Log related to LPM. */ +#define RTE_LOGTYPE_KNI 8 /**< Log related to KNI. */ +#define RTE_LOGTYPE_ACL 9 /**< Log related to ACL. */ +#define RTE_LOGTYPE_POWER 10 /**< Log related to power. */ +#define RTE_LOGTYPE_METER 11 /**< Log related to QoS meter. */ +#define RTE_LOGTYPE_SCHED 12 /**< Log related to QoS port scheduler. */ +#define RTE_LOGTYPE_PORT 13 /**< Log related to port. */ +#define RTE_LOGTYPE_TABLE 14 /**< Log related to table. */ +#define RTE_LOGTYPE_PIPELINE 15 /**< Log related to pipeline. */ +#define RTE_LOGTYPE_MBUF 16 /**< Log related to mbuf. */ +#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */ +#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */ +#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */ /* these log types can be used in an application */ -#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ -#define RTE_LOGTYPE_USER2 0x02000000 /**< User-defined log type 2. */ -#define RTE_LOGTYPE_USER3 0x04000000 /**< User-defined log type 3. */ -#define RTE_LOGTYPE_USER4 0x08000000 /**< User-defined log type 4. */ -#define RTE_LOGTYPE_USER5 0x10000000 /**< User-defined log type 5. */ -#define RTE_LOGTYPE_USER6 0x20000000 /**< User-defined log type 6. */ -#define RTE_LOGTYPE_USER7 0x40000000 /**< User-defined log type 7. */ -#define RTE_LOGTYPE_USER8 0x80000000 /**< User-defined log type 8. */ +#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */ +#define RTE_LOGTYPE_USER2 25 /**< User-defined log type 2. */ +#define RTE_LOGTYPE_USER3 26 /**< User-defined log type 3. */ +#define RTE_LOGTYPE_USER4 27 /**< User-defined log type 4. */ +#define RTE_LOGTYPE_USER5 28 /**< User-defined log type 5. */ +#define RTE_LOGTYPE_USER6 29 /**< User-defined log type 6. */ +#define RTE_LOGTYPE_USER7 30 /**< User-defined log type 7. */ +#define RTE_LOGTYPE_USER8 31 /**< User-defined log type 8. */ + +/** First identifier for extended logs */ +#define RTE_LOGTYPE_FIRST_EXT_ID 32 /* Can't use 0, as it gives compiler warnings */ #define RTE_LOG_EMERG 1U /**< System is unusable. */ @@ -118,18 +129,32 @@ int rte_openlog_stream(FILE *f); /** * Set the global log level. * - * After this call, all logs that are lower or equal than level and - * lower or equal than the RTE_LOG_LEVEL configuration option will be - * displayed. + * After this call, logs with a level lower or equal than the level + * passed as argument will be displayed. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). */ +void rte_log_set_global_level(uint32_t level); + +/** + * Deprecated, replaced by rte_log_set_global_level(). + */ +__rte_deprecated void rte_set_log_level(uint32_t level); /** * Get the global log level. + * + * @return + * The current global log level. + */ +uint32_t rte_log_get_global_level(void); + +/** + * Deprecated, replaced by rte_log_get_global_level(). */ +__rte_deprecated uint32_t rte_get_log_level(void); /** @@ -140,14 +165,40 @@ uint32_t rte_get_log_level(void); * @param enable * True for enable; false for disable. */ +__rte_deprecated void rte_set_log_type(uint32_t type, int enable); /** * Get the global log type. */ +__rte_deprecated uint32_t rte_get_log_type(void); /** + * Set the log level for a given type. + * + * @param pattern + * The regexp identifying the log type. + * @param level + * The level to be set. + * @return + * 0 on success, a negative value if level is invalid. + */ +int rte_log_set_level_regexp(const char *pattern, uint32_t level); + +/** + * Set the log level for a given type. + * + * @param logtype + * The log type identifier. + * @param level + * The level to be set. + * @return + * 0 on success, a negative value if logtype or level is invalid. + */ +int rte_log_set_level(uint32_t logtype, uint32_t level); + +/** * Get the current loglevel for the message being processed. * * Before calling the user-defined stream for logging, the log @@ -176,6 +227,30 @@ int rte_log_cur_msg_loglevel(void); int rte_log_cur_msg_logtype(void); /** + * Register a dynamic log type + * + * If a log is already registered with the same type, the returned value + * is the same than the previous one. + * + * @param name + * The string identifying the log type. + * @return + * - >0: success, the returned value is the log type identifier. + * - (-ENONEM): cannot allocate memory. + */ +int rte_log_register(const char *name); + +/** + * Dump log information. + * + * Dump the global level and the registered log types. + * + * @param f + * The output stream where the dump should be sent. + */ +void rte_log_dump(FILE *f); + +/** * Generates a log message. * * The message will be sent in the stream defined by the previous call @@ -184,9 +259,8 @@ int rte_log_cur_msg_logtype(void); * The level argument determines if the log should be displayed or * not, depending on the global rte_logs variable. * - * The preferred alternative is the RTE_LOG() function because debug logs may - * be removed at compilation time if optimization is enabled. Moreover, - * logs are automatically prefixed by type when using the macro. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -217,8 +291,8 @@ int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) * not, depending on the global rte_logs variable. A trailing * newline may be added if needed. * - * The preferred alternative is the RTE_LOG() because debug logs may be - * removed at compilation time. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -239,15 +313,8 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) /** * Generates a log message. * - * The RTE_LOG() is equivalent to rte_log() with two differences: - - * - RTE_LOG() can be used to remove debug logs at compilation time, - * depending on RTE_LOG_LEVEL configuration option, and compilation - * optimization level. If optimization is enabled, the tests - * involving constants only are pre-computed. If compilation is done - * with -O0, these tests will be done at run time. - * - The log level and log type names are smaller, for example: - * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * The RTE_LOG() is a helper that prefixes the string with the log level + * and type, and call rte_log(). * * @param l * Log level. A value between EMERG (1) and DEBUG (8). The short name is @@ -263,7 +330,31 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) * - Negative on error. */ #define RTE_LOG(l, t, ...) \ - (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) + +/** + * Generates a log message for data path. + * + * Similar to RTE_LOG(), except that it is removed at compilation time + * if the RTE_LOG_DP_LEVEL configuration option is lower than the log + * level argument. + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG_DP(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ? \ rte_log(RTE_LOG_ ## l, \ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ 0) diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index 9ce88472..ab64c63c 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -85,12 +85,7 @@ extern "C" { #include <rte_debug.h> #include <rte_interrupts.h> #include <rte_dev.h> - -TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ -TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ - -extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */ -extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */ +#include <rte_bus.h> /** Pathname of PCI devices directory. */ const char *pci_get_sysfs_path(void); @@ -111,6 +106,25 @@ const char *pci_get_sysfs_path(void); /** Maximum number of PCI resources. */ #define PCI_MAX_RESOURCE 6 +/** Name of PCI Bus */ +#define PCI_BUS_NAME "PCI" + +/* Forward declarations */ +struct rte_pci_device; +struct rte_pci_driver; + +/** List of PCI devices */ +TAILQ_HEAD(rte_pci_device_list, rte_pci_device); +/** List of PCI drivers */ +TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); + +/* PCI Bus iterators */ +#define FOREACH_DEVICE_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) + +#define FOREACH_DRIVER_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) + /** * A structure describing an ID for a PCI driver. Each driver provides a * table of these IDs for each device that it supports. @@ -158,8 +172,15 @@ struct rte_pci_device { struct rte_pci_driver *driver; /**< Associated driver */ uint16_t max_vfs; /**< sriov enable if not zero */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ }; +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_pci_device. + */ +#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) + /** Any PCI device identifier (vendor, device, ...) */ #define PCI_ANY_ID (0xffff) #define RTE_CLASS_ANY_ID (0xffffff) @@ -182,8 +203,6 @@ struct rte_pci_device { .subsystem_device_id = PCI_ANY_ID #endif -struct rte_pci_driver; - /** * Initialisation function for the driver called during PCI probing. */ @@ -200,20 +219,28 @@ typedef int (pci_remove_t)(struct rte_pci_device *); struct rte_pci_driver { TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ struct rte_driver driver; /**< Inherit core driver. */ + struct rte_pci_bus *bus; /**< PCI bus reference. */ pci_probe_t *probe; /**< Device Probe function. */ pci_remove_t *remove; /**< Device Remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ uint32_t drv_flags; /**< Flags contolling handling of device. */ }; +/** + * Structure describing the PCI bus + */ +struct rte_pci_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_pci_device_list device_list; /**< List of PCI devices */ + struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ +}; + /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ #define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device needs to be unbound even if no module is provided */ -#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 /** Device driver supports link state interrupt */ #define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports detaching capability */ -#define RTE_PCI_DRV_DETACHABLE 0x0010 +/** Device driver supports device removal interrupt */ +#define RTE_PCI_DRV_INTR_RMV 0x0010 /** * A structure describing a PCI mapping. @@ -315,8 +342,8 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) * The output buffer size */ static inline void -rte_eal_pci_device_name(const struct rte_pci_addr *addr, - char *output, size_t size) +rte_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size) { RTE_VERIFY(size >= PCI_PRI_STR_SIZE); RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, @@ -366,20 +393,17 @@ rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, * @return * 0 on success, negative on error */ -int rte_eal_pci_scan(void); +int rte_pci_scan(void); /** - * Probe the PCI bus for registered drivers. - * - * Scan the content of the PCI bus, and call the probe() function for - * all registered drivers that have a matching entry in its id_table - * for discovered devices. + * Probe the PCI bus * * @return * - 0 on success. - * - Negative on error. + * - !0 on error. */ -int rte_eal_pci_probe(void); +int +rte_pci_probe(void); /** * Map the PCI device resources in user space virtual memory address @@ -396,7 +420,7 @@ int rte_eal_pci_probe(void); * 0 on success, negative on error and positive if no driver * is found for the device. */ -int rte_eal_pci_map_device(struct rte_pci_device *dev); +int rte_pci_map_device(struct rte_pci_device *dev); /** * Unmap this device @@ -405,7 +429,7 @@ int rte_eal_pci_map_device(struct rte_pci_device *dev); * A pointer to a rte_pci_device structure describing the device * to use */ -void rte_eal_pci_unmap_device(struct rte_pci_device *dev); +void rte_pci_unmap_device(struct rte_pci_device *dev); /** * @internal @@ -452,7 +476,7 @@ void pci_unmap_resource(void *requested_addr, size_t size); * - 0 on success. * - Negative on error. */ -int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); +int rte_pci_probe_one(const struct rte_pci_addr *addr); /** * Close the single PCI device. @@ -467,7 +491,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); * - 0 on success. * - Negative on error. */ -int rte_eal_pci_detach(const struct rte_pci_addr *addr); +int rte_pci_detach(const struct rte_pci_addr *addr); /** * Dump the content of the PCI bus. @@ -475,7 +499,7 @@ int rte_eal_pci_detach(const struct rte_pci_addr *addr); * @param f * A pointer to a file for output */ -void rte_eal_pci_dump(FILE *f); +void rte_pci_dump(FILE *f); /** * Register a PCI driver. @@ -484,7 +508,7 @@ void rte_eal_pci_dump(FILE *f); * A pointer to a rte_pci_driver structure describing the driver * to be registered. */ -void rte_eal_pci_register(struct rte_pci_driver *driver); +void rte_pci_register(struct rte_pci_driver *driver); /** Helper for PCI device registration from driver (eth, crypto) instance */ #define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ @@ -492,7 +516,7 @@ RTE_INIT(pciinitfn_ ##nm); \ static void pciinitfn_ ##nm(void) \ {\ (pci_drv).driver.name = RTE_STR(nm);\ - rte_eal_pci_register(&pci_drv); \ + rte_pci_register(&pci_drv); \ } \ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) @@ -503,7 +527,7 @@ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) * A pointer to a rte_pci_driver structure describing the driver * to be unregistered. */ -void rte_eal_pci_unregister(struct rte_pci_driver *driver); +void rte_pci_unregister(struct rte_pci_driver *driver); /** * Read PCI config space. @@ -518,8 +542,8 @@ void rte_eal_pci_unregister(struct rte_pci_driver *driver); * @param offset * The offset into PCI config space */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset); +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); /** * Write PCI config space. @@ -534,8 +558,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device, * @param offset * The offset into PCI config space */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset); +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); /** * A structure used to access io resources for a pci device. @@ -563,8 +587,8 @@ struct rte_pci_ioport { * @return * 0 on success, negative on error. */ -int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); +int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); /** * Release any resources used in a rte_pci_ioport object. @@ -574,7 +598,7 @@ int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, * @return * 0 on success, negative on error. */ -int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); +int rte_pci_ioport_unmap(struct rte_pci_ioport *p); /** * Read from a io pci resource. @@ -588,8 +612,8 @@ int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); * @param offset * The offset into the pci io resource. */ -void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); +void rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); /** * Write to a io pci resource. @@ -603,8 +627,8 @@ void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, * @param offset * The offset into the pci io resource. */ -void rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); +void rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h index 784e837d..e6b678ea 100644 --- a/lib/librte_eal/common/include/rte_vdev.h +++ b/lib/librte_eal/common/include/rte_vdev.h @@ -39,6 +39,28 @@ extern "C" { #include <sys/queue.h> #include <rte_dev.h> +#include <rte_devargs.h> + +struct rte_vdev_device { + TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */ + struct rte_device device; /**< Inherit core device */ +}; + +static inline const char * +rte_vdev_device_name(const struct rte_vdev_device *dev) +{ + if (dev && dev->device.devargs) + return dev->device.devargs->virt.drv_name; + return NULL; +} + +static inline const char * +rte_vdev_device_args(const struct rte_vdev_device *dev) +{ + if (dev && dev->device.devargs) + return dev->device.devargs->args; + return ""; +} /** Double linked list of virtual device drivers. */ TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); @@ -46,12 +68,12 @@ TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); /** * Probe function called for each virtual device driver once. */ -typedef int (rte_vdev_probe_t)(const char *name, const char *args); +typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev); /** * Remove function called for each virtual device driver once. */ -typedef int (rte_vdev_remove_t)(const char *name); +typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev); /** * A virtual device driver abstraction. @@ -70,7 +92,7 @@ struct rte_vdev_driver { * A pointer to a rte_vdev_driver structure describing the driver * to be registered. */ -void rte_eal_vdrv_register(struct rte_vdev_driver *driver); +void rte_vdev_register(struct rte_vdev_driver *driver); /** * Unregister a virtual device driver. @@ -79,7 +101,7 @@ void rte_eal_vdrv_register(struct rte_vdev_driver *driver); * A pointer to a rte_vdev_driver structure describing the driver * to be unregistered. */ -void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver); +void rte_vdev_unregister(struct rte_vdev_driver *driver); #define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ RTE_INIT(vdrvinitfn_ ##vdrv);\ @@ -88,7 +110,7 @@ static void vdrvinitfn_ ##vdrv(void)\ {\ (vdrv).driver.name = RTE_STR(nm);\ (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ - rte_eal_vdrv_register(&vdrv);\ + rte_vdev_register(&vdrv);\ } \ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 0de35fb7..07a085eb 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -56,17 +56,17 @@ extern "C" { /** * Major version/year number i.e. the yy in yy.mm.z */ -#define RTE_VER_YEAR 16 +#define RTE_VER_YEAR 17 /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 11 +#define RTE_VER_MONTH 5 /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 1 +#define RTE_VER_MINOR 0 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index 20d2a916..4794696b 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -34,6 +34,8 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio DIRS-$(CONFIG_RTE_KNI_KMOD) += kni +DEPDIRS-kni := eal DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0 +DEPDIRS-xen_dom0 := eal include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 4e206f09..640afd08 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 3 +LIBABIVER := 4 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c @@ -130,7 +131,4 @@ INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 2075282e..7c78f2dc 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -61,6 +61,7 @@ #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> +#include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_log.h> @@ -69,6 +70,7 @@ #include <rte_string_fns.h> #include <rte_cpuflags.h> #include <rte_interrupts.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> @@ -210,7 +212,7 @@ rte_eal_config_create(void) rte_panic("Cannot mmap memory for rte_config\n"); } memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; /* store address of the config in the config itself so that secondary * processes could later map the config into this exact location */ @@ -490,8 +492,6 @@ eal_log_level_parse(int argc, char **argv) argvopt = argv; optind = 1; - eal_reset_internal_config(&internal_config); - while ((opt = getopt_long(argc, argvopt, eal_short_options, eal_long_options, &option_index)) != EOF) { @@ -739,6 +739,12 @@ static int rte_eal_vfio_setup(void) } #endif +static void rte_eal_init_alert(const char *msg) +{ + fprintf(stderr, "EAL: FATAL: %s\n", msg); + RTE_LOG(ERR, EAL, "%s\n", msg); +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) @@ -751,33 +757,51 @@ rte_eal_init(int argc, char **argv) char thread_name[RTE_MAX_THREAD_NAME_LEN]; /* checks if the machine is adequate */ - rte_cpu_check_supported(); + if (!rte_cpu_is_supported()) { + rte_eal_init_alert("unsupported cpu type."); + rte_errno = ENOTSUP; + return -1; + } - if (!rte_atomic32_test_and_set(&run_once)) + if (!rte_atomic32_test_and_set(&run_once)) { + rte_eal_init_alert("already called initialization."); + rte_errno = EALREADY; return -1; + } logid = strrchr(argv[0], '/'); logid = strdup(logid ? logid + 1: argv[0]); thread_id = pthread_self(); - eal_log_level_parse(argc, argv); + eal_reset_internal_config(&internal_config); /* set log level as early as possible */ - rte_set_log_level(internal_config.log_level); + eal_log_level_parse(argc, argv); - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); + if (rte_eal_cpu_init() < 0) { + rte_eal_init_alert("Cannot detect lcores."); + rte_errno = ENOTSUP; + return -1; + } fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); + if (fctret < 0) { + rte_eal_init_alert("Invalid 'command line' arguments."); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && internal_config.xen_dom0_support == 0 && - eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); + eal_hugepage_info_init() < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) @@ -799,39 +823,59 @@ rte_eal_init(int argc, char **argv) rte_config_init(); - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) { + rte_eal_init_alert("Cannot init logging."); + rte_errno = ENOMEM; + rte_atomic32_clear(&run_once); + return -1; + } #ifdef VFIO_PRESENT - if (rte_eal_vfio_setup() < 0) - rte_panic("Cannot init VFIO\n"); + if (rte_eal_vfio_setup() < 0) { + rte_eal_init_alert("Cannot init VFIO\n"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); + return -1; + } #endif - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory\n"); + rte_errno = ENOMEM; + return -1; + } /* the directories are locked during eal_hugepage_info_init */ eal_hugedirs_unlock(); - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); + if (rte_eal_tailqs_init() < 0) { + rte_eal_init_alert("Cannot init tail queues for objects\n"); + rte_errno = EFAULT; + return -1; + } - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_alarm_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + /* rte_eal_alarm_init sets rte_errno on failure. */ + return -1; + } - if (rte_eal_timer_init() < 0) - rte_panic("Cannot init HPET or TSC timers\n"); + if (rte_eal_timer_init() < 0) { + rte_eal_init_alert("Cannot init HPET or TSC timers\n"); + rte_errno = ENOTSUP; + return -1; + } eal_check_mem_on_local_socket(); if (eal_plugins_init() < 0) - rte_panic("Cannot init plugins\n"); + rte_eal_init_alert("Cannot init plugins\n"); eal_thread_init_master(rte_config.master_lcore); @@ -841,11 +885,16 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + return -1; + } RTE_LCORE_FOREACH_SLAVE(i) { @@ -883,9 +932,12 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); - /* Probe & Initialize PCI devices */ - if (rte_eal_pci_probe()) - rte_panic("Cannot probe PCI\n"); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) { + rte_eal_init_alert("Cannot probe devices\n"); + rte_errno = ENOTSUP; + return -1; + } rte_eal_mcfg_complete(); diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index 8b042abc..fbae4613 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -83,7 +83,7 @@ static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; static struct rte_intr_handle intr_handle = {.fd = -1 }; static int handler_registered = 0; -static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); +static void eal_alarm_callback(void *arg); int rte_eal_alarm_init(void) @@ -102,8 +102,7 @@ error: } static void -eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, - void *arg __rte_unused) +eal_alarm_callback(void *arg __rte_unused) { struct timespec now; struct alarm_entry *ap; diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c index 5fbc17c5..e1c75548 100644 --- a/lib/librte_eal/linuxapp/eal/eal_debug.c +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -31,7 +31,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef RTE_BACKTRACE #include <execinfo.h> +#endif #include <stdarg.h> #include <signal.h> #include <stdlib.h> @@ -47,6 +49,7 @@ /* dump the stack of the calling core */ void rte_dump_stack(void) { +#ifdef RTE_BACKTRACE void *func[BACKTRACE_SIZE]; char **symb = NULL; int size; @@ -64,6 +67,7 @@ void rte_dump_stack(void) } free(symb); +#endif /* RTE_BACKTRACE */ } /* not implemented in this environment */ diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2d..7a21e8f6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -283,9 +283,12 @@ eal_hugepage_info_init(void) struct dirent *dirent; dir = opendir(sys_dir_path); - if (dir == NULL) - rte_panic("Cannot open directory %s to read system hugepage " - "info\n", sys_dir_path); + if (dir == NULL) { + RTE_LOG(ERR, EAL, + "Cannot open directory %s to read system hugepage info\n", + sys_dir_path); + return -1; + } for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { struct hugepage_info *hpi; diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 47a3b20a..2e3bd12a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -46,6 +46,7 @@ #include <sys/ioctl.h> #include <sys/eventfd.h> #include <assert.h> +#include <stdbool.h> #include <rte_common.h> #include <rte_interrupts.h> @@ -136,7 +137,7 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(struct rte_intr_handle *intr_handle) { +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -183,7 +184,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(struct rte_intr_handle *intr_handle) { +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -194,14 +195,14 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; irq_set->start = 0; ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { - RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } @@ -226,7 +227,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { /* enable MSI interrupts */ static int -vfio_enable_msi(struct rte_intr_handle *intr_handle) { +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -255,7 +256,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) { /* disable MSI interrupts */ static int -vfio_disable_msi(struct rte_intr_handle *intr_handle) { +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -280,7 +281,7 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) { /* enable MSI-X interrupts */ static int -vfio_enable_msix(struct rte_intr_handle *intr_handle) { +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -290,12 +291,10 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; - if (!intr_handle->max_intr) - intr_handle->max_intr = 1; - else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) - intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; - - irq_set->count = intr_handle->max_intr; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; @@ -318,7 +317,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { /* disable MSI-X interrupts */ static int -vfio_disable_msix(struct rte_intr_handle *intr_handle) { +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; int len, ret; @@ -343,7 +342,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) { #endif static int -uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -367,7 +366,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -391,7 +390,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle) } static int -uio_intr_disable(struct rte_intr_handle *intr_handle) +uio_intr_disable(const struct rte_intr_handle *intr_handle) { const int value = 0; @@ -405,7 +404,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intr_enable(struct rte_intr_handle *intr_handle) +uio_intr_enable(const struct rte_intr_handle *intr_handle) { const int value = 1; @@ -419,7 +418,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg) { int ret, wake_thread; @@ -491,7 +490,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle, } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb_fn, void *cb_arg) { int ret; @@ -555,8 +554,11 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, } int -rte_intr_enable(struct rte_intr_handle *intr_handle) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -599,8 +601,11 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_disable(struct rte_intr_handle *intr_handle) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -645,6 +650,7 @@ rte_intr_disable(struct rte_intr_handle *intr_handle) static int eal_intr_process_interrupts(struct epoll_event *events, int nfds) { + bool call = false; int n, bytes_read; struct rte_intr_source *src; struct rte_intr_callback *cb; @@ -693,13 +699,18 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) bytes_read = sizeof(buf.vfio_intr_count); break; #endif + case RTE_INTR_HANDLE_VDEV: case RTE_INTR_HANDLE_EXT: + bytes_read = 0; + call = true; + break; + default: bytes_read = 1; break; } - if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { + if (bytes_read > 0) { /** * read out to clear the ready-to-be-read flag * for epoll_wait. @@ -716,12 +727,14 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); + else + call = true; } /* grab a lock, again to call callbacks and update status. */ rte_spinlock_lock(&intr_lock); - if (bytes_read > 0) { + if (call) { /* Finally, call all callbacks. */ TAILQ_FOREACH(cb, &src->callbacks, next) { @@ -731,8 +744,7 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) rte_spinlock_unlock(&intr_lock); /* call the actual callback */ - active_cb.cb_fn(&src->intr_handle, - active_cb.cb_arg); + active_cb.cb_fn(active_cb.cb_arg); /*get the lock back. */ rte_spinlock_lock(&intr_lock); @@ -832,7 +844,7 @@ eal_intr_thread_main(__rte_unused void *arg) TAILQ_FOREACH(src, &intr_sources, next) { if (src->callbacks.tqh_first == NULL) continue; /* skip those with no callbacks */ - ev.events = EPOLLIN | EPOLLPRI; + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; ev.data.fd = src->intr_handle.fd; /** @@ -872,13 +884,16 @@ rte_eal_intr_init(void) * create a pipe which will be waited by epoll and notified to * rebuild the wait list of epoll. */ - if (pipe(intr_pipe.pipefd) < 0) + if (pipe(intr_pipe.pipefd) < 0) { + rte_errno = errno; return -1; + } /* create the host thread to wait/handle the interrupt */ ret = pthread_create(&intr_thread, NULL, eal_intr_thread_main, NULL); if (ret != 0) { + rte_errno = ret; RTE_LOG(ERR, EAL, "Failed to create thread for interrupt handling\n"); } else { @@ -913,6 +928,14 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) bytes_read = sizeof(buf.vfio_intr_count); break; #endif + case RTE_INTR_HANDLE_VDEV: + /* for vdev, fd points to: + * a. eventfd which does not need to read out; + * b. datapath fd which needs PMD to read out. + */ + return; + case RTE_INTR_HANDLE_EXT: + return; default: bytes_read = 1; RTE_LOG(INFO, EAL, "unexpected intr type\n"); @@ -1141,6 +1164,24 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, return rc; } +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } +} + int rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) { @@ -1157,12 +1198,14 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) RTE_LOG(ERR, EAL, "can't setup eventfd, error %i (%s)\n", errno, strerror(errno)); - return -1; + return -errno; } intr_handle->efds[i] = fd; } intr_handle->nb_efd = n; intr_handle->max_intr = NB_OTHER_INTR + n; + } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { + /* do nothing, and let vdev driver to initialize this struct */ } else { intr_handle->efds[0] = intr_handle->fd; intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); @@ -1176,19 +1219,8 @@ void rte_intr_efd_disable(struct rte_intr_handle *intr_handle) { uint32_t i; - struct rte_epoll_event *rev; - - for (i = 0; i < intr_handle->nb_efd; i++) { - rev = &intr_handle->elist[i]; - if (rev->status == RTE_EPOLL_INVALID) - continue; - if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { - /* force free if the entry valid */ - eal_epoll_data_safe_free(rev); - rev->status = RTE_EPOLL_INVALID; - } - } + rte_intr_free_epoll_fd(intr_handle); if (intr_handle->max_intr > intr_handle->nb_efd) { for (i = 0; i < intr_handle->nb_efd; i++) close(intr_handle->efds[i]); @@ -1218,5 +1250,8 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) return 1; + if (intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 1; + return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index a956bb22..ebe06833 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -64,6 +64,7 @@ #define _FILE_OFFSET_BITS 64 #include <errno.h> #include <stdarg.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <stdint.h> @@ -122,26 +123,28 @@ int rte_xen_dom0_supported(void) static uint64_t baseaddr_offset; -static unsigned proc_pagemap_readable; +static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" static void -test_proc_pagemap_readable(void) +test_phys_addrs_available(void) { - int fd = open("/proc/self/pagemap", O_RDONLY); + uint64_t tmp; + phys_addr_t physaddr; - if (fd < 0) { + /* For dom0, phys addresses can always be available */ + if (rte_xen_dom0_supported()) + return; + + physaddr = rte_mem_virt2phy(&tmp); + if (physaddr == RTE_BAD_PHYS_ADDR) { RTE_LOG(ERR, EAL, - "Cannot open /proc/self/pagemap: %s. " - "virt2phys address translation will not work\n", + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", strerror(errno)); - return; + phys_addrs_available = false; } - - /* Is readable */ - close(fd); - proc_pagemap_readable = 1; } /* Lock page in physical memory and prevent from swapping. */ @@ -190,7 +193,7 @@ rte_mem_virt2phy(const void *virtaddr) } /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ - if (!proc_pagemap_readable) + if (!phys_addrs_available) return RTE_BAD_PHYS_ADDR; /* standard page size */ @@ -229,6 +232,9 @@ rte_mem_virt2phy(const void *virtaddr) * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ + if ((page & 0x7fffffffffffffULL) == 0) + return RTE_BAD_PHYS_ADDR; + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -242,7 +248,7 @@ rte_mem_virt2phy(const void *virtaddr) static int find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { - unsigned i; + unsigned int i; phys_addr_t addr; for (i = 0; i < hpi->num_pages[0]; i++) { @@ -255,6 +261,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) } /* + * For each hugepage in hugepg_tbl, fill the physaddr value sequentially. + */ +static int +set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned int i; + static phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + hugepg_tbl[i].physaddr = addr; + addr += hugepg_tbl[i].size; + } + return 0; +} + +/* * Check whether address-space layout randomization is enabled in * the kernel. This is important for multi-process as it can prevent * two processes mapping data to the same virtual address @@ -313,7 +335,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz) } do { addr = mmap(addr, - (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + (*size) + hugepage_sz, PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE, +#endif + fd, 0); if (addr == MAP_FAILED) *size -= hugepage_sz; } while (addr == MAP_FAILED && *size > 0); @@ -592,12 +620,12 @@ static int cmp_physaddr(const void *a, const void *b) { #ifndef RTE_ARCH_PPC_64 - const struct hugepage_file *p1 = (const struct hugepage_file *)a; - const struct hugepage_file *p2 = (const struct hugepage_file *)b; + const struct hugepage_file *p1 = a; + const struct hugepage_file *p2 = b; #else /* PowerPC needs memory sorted in reverse order from x86 */ - const struct hugepage_file *p1 = (const struct hugepage_file *)b; - const struct hugepage_file *p2 = (const struct hugepage_file *)a; + const struct hugepage_file *p1 = b; + const struct hugepage_file *p2 = a; #endif if (p1->physaddr < p2->physaddr) return -1; @@ -951,7 +979,7 @@ rte_eal_hugepage_init(void) int nr_hugefiles, nr_hugepages = 0; void *addr; - test_proc_pagemap_readable(); + test_phys_addrs_available(); memset(used_hp, 0, sizeof(used_hp)); @@ -1043,11 +1071,22 @@ rte_eal_hugepage_init(void) continue; } - /* find physical addresses and sockets for each hugepage */ - if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; + if (phys_addrs_available) { + /* find physical addresses for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to find phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + } else { + /* set physical addresses for each hugepage */ + if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to set phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } } if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ @@ -1289,7 +1328,7 @@ rte_eal_hugepage_attach(void) "into secondary processes\n"); } - test_proc_pagemap_readable(); + test_phys_addrs_available(); if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 @@ -1330,7 +1369,13 @@ rte_eal_hugepage_attach(void) * use mmap to get identical addresses as the primary process. */ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ, MAP_PRIVATE, fd_zero, 0); + PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE, +#endif + fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { max_seg = s; @@ -1426,3 +1471,9 @@ error: close(fd_hugepage); return -1; } + +bool +rte_eal_using_phys_addrs(void) +{ + return phys_addrs_available; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 876ba381..595622b2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -35,6 +35,7 @@ #include <dirent.h> #include <rte_log.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_eal_memconfig.h> #include <rte_malloc.h> @@ -54,44 +55,7 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} +extern struct rte_pci_bus rte_pci_bus; static int pci_get_kernel_driver_by_path(const char *filename, char *dri_name) @@ -124,7 +88,7 @@ pci_get_kernel_driver_by_path(const char *filename, char *dri_name) /* Map pci device */ int -rte_eal_pci_map_device(struct rte_pci_device *dev) +rte_pci_map_device(struct rte_pci_device *dev) { int ret = -1; @@ -138,8 +102,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } break; default: RTE_LOG(DEBUG, EAL, @@ -153,12 +119,15 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) /* Unmap pci device */ void -rte_eal_pci_unmap_device(struct rte_pci_device *dev) +rte_pci_unmap_device(struct rte_pci_device *dev) { /* try unmapping the NIC resources using VFIO if it exists */ switch (dev->kdrv) { case RTE_KDRV_VFIO: - RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + pci_vfio_unmap_resource(dev); +#endif break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: @@ -267,8 +236,7 @@ error: /* Scan one pci sysfs entry, and fill the devices list from it. */ static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; @@ -281,10 +249,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, return -1; memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; + dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); @@ -359,6 +324,9 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->device.numa_node = tmp; } + rte_pci_device_name(addr, dev->name, sizeof(dev->name)); + dev->device.name = dev->name; + /* parse resources */ snprintf(filename, sizeof(filename), "%s/resource", dirname); if (pci_parse_sysfs_resource(filename, dev) < 0) { @@ -389,21 +357,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->kdrv = RTE_KDRV_NONE; /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - rte_eal_device_insert(&dev->device); - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); } else { struct rte_pci_device *dev2; int ret; - TAILQ_FOREACH(dev2, &pci_device_list, next) { + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); if (ret > 0) continue; if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); - rte_eal_device_insert(&dev->device); + rte_pci_insert_device(dev2, dev); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -413,8 +379,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } return 0; } - rte_eal_device_insert(&dev->device); - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + + rte_pci_add_device(dev); } return 0; @@ -429,16 +395,14 @@ pci_update_device(const struct rte_pci_addr *addr) pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, addr->function); - return pci_scan_one(filename, addr->domain, addr->bus, addr->devid, - addr->function); + return pci_scan_one(filename, addr); } /* * split up a pci address into its constituent parts. */ static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) { /* first split on ':' */ union splitaddr { @@ -466,10 +430,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, /* now convert to int values */ errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; @@ -485,13 +449,16 @@ error: * list */ int -rte_eal_pci_scan(void) +rte_pci_scan(void) { struct dirent *e; DIR *dir; char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; + struct rte_pci_addr addr; + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { @@ -504,13 +471,13 @@ rte_eal_pci_scan(void) if (e->d_name[0] == '.') continue; - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) continue; snprintf(dirname, sizeof(dirname), "%s/%s", pci_get_sysfs_path(), e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + + if (pci_scan_one(dirname, &addr) < 0) goto error; } closedir(dir); @@ -522,8 +489,8 @@ error: } /* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -547,8 +514,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device, } /* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -574,7 +541,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device, #if defined(RTE_ARCH_X86) static int pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) + struct rte_pci_ioport *p) { uint16_t start, end; FILE *fp; @@ -632,8 +599,8 @@ pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, #endif int -rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) { int ret = -1; @@ -670,8 +637,8 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, } void -rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -696,8 +663,8 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -722,7 +689,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p, } int -rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +rte_pci_ioport_unmap(struct rte_pci_ioport *p) { int ret = -1; @@ -754,19 +721,3 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) return ret; } - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } - - return 0; -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 6a960d1b..ae2980d6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -88,8 +88,9 @@ void pci_vfio_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset); int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); -/* map VFIO resource prototype */ +/* map/unmap VFIO resource prototype */ int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_unmap_resource(struct rte_pci_device *dev); #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 3e4ffb57..fa10329f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -38,6 +38,7 @@ #include <inttypes.h> #include <sys/stat.h> #include <sys/mman.h> +#include <sys/sysmacros.h> #include <linux/pci_regs.h> #if defined(RTE_ARCH_X86) @@ -230,7 +231,7 @@ pci_uio_free_resource(struct rte_pci_device *dev, close(dev->intr_handle.uio_cfg_fd); dev->intr_handle.uio_cfg_fd = -1; } - if (dev->intr_handle.fd) { + if (dev->intr_handle.fd >= 0) { close(dev->intr_handle.fd); dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 5f478c59..2be13195 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -38,6 +38,7 @@ #include <sys/socket.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <stdbool.h> #include <rte_log.h> #include <rte_pci.h> @@ -172,7 +173,7 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd) +pci_vfio_set_bus_master(int dev_fd, bool op) { uint16_t reg; int ret; @@ -185,8 +186,11 @@ pci_vfio_set_bus_master(int dev_fd) return -1; } - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; + if (op) + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + else + reg &= ~(PCI_COMMAND_MASTER); ret = pwrite64(dev_fd, ®, sizeof(reg), VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + @@ -355,7 +359,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } else { /* if we're in a secondary process, just find our tailq entry */ TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, + &dev->addr)) continue; break; } @@ -517,7 +522,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd)) { + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); close(vfio_dev_fd); rte_free(vfio_res); @@ -535,6 +540,79 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } int +pci_vfio_unmap_resource(struct rte_pci_device *dev) +{ + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list; + + struct pci_map *maps; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + + if (close(dev->intr_handle.fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", + pci_addr); + return -1; + } + + if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", + pci_addr); + return -1; + } + + ret = vfio_release_device(pci_get_sysfs_path(), pci_addr, + dev->intr_handle.vfio_dev_fd); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot release device\n", __func__); + return ret; + } + + vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + /* Get vfio_res */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + /* unmap BARs */ + maps = vfio_res->maps; + + RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", + pci_addr); + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + + /* + * We do not need to be aware of MSI-X table BAR mappings as + * when mapping. Just using current maps array is enough + */ + if (maps[i].addr) { + RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", + pci_addr, maps[i].addr); + pci_unmap_resource(maps[i].addr, maps[i].size); + } + } + + TAILQ_REMOVE(vfio_res_list, vfio_res, next); + + return 0; +} + +int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 702f7a2e..53ac725d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -50,12 +50,15 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + /* ppc64 IOMMU, otherwise known as spapr */ + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, /* IOMMU-less mode */ { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, }; @@ -65,13 +68,32 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; + int group_idx = -1; char filename[PATH_MAX]; /* check if we already have the group descriptor open */ - for (i = 0; i < vfio_cfg.vfio_group_idx; i++) + for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) return vfio_cfg.vfio_groups[i].fd; + /* Lets see first if there is room for a new group */ + if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Now lets get an index for the new group */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].group_no == -1) { + group_idx = i; + break; + } + + /* This should not happen */ + if (group_idx == -1) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } /* if primary, try to open the group */ if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try regular group format */ @@ -101,14 +123,9 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - /* if the fd is valid, create a new group for it */ - if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { - RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); - close(vfio_group_fd); - return -1; - } - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* if we're in a secondary process, request group fd from the primary @@ -155,14 +172,115 @@ vfio_get_group_fd(int iommu_group_no) return -1; } + +static int +get_vfio_group_idx(int vfio_group_fd) +{ + int i; + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) + return i; + return -1; +} + +static void +vfio_group_device_get(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices++; +} + static void -clear_current_group(void) +vfio_group_device_put(int vfio_group_fd) { - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices--; +} + +static int +vfio_group_device_count(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + return -1; + } + + return vfio_cfg.vfio_groups[i].devices; +} + +int +clear_group(int vfio_group_fd) +{ + int i; + int socket_fd, ret; + + if (internal_config.process_type == RTE_PROC_PRIMARY) { + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0) + return -1; + vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].devices = 0; + vfio_cfg.vfio_active_groups--; + return 0; + } + + /* This is just for SECONDARY processes */ + socket_fd = vfio_mp_sync_connect_to_primary(); + + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + + if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + + if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) { + RTE_LOG(ERR, EAL, " cannot send group fd!\n"); + close(socket_fd); + return -1; + } + + ret = vfio_mp_sync_receive_request(socket_fd); + switch (ret) { + case SOCKET_NO_FD: + RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); + close(socket_fd); + break; + case SOCKET_OK: + close(socket_fd); + return 0; + case SOCKET_ERR: + RTE_LOG(ERR, EAL, " Socket error\n"); + close(socket_fd); + break; + default: + RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret); + close(socket_fd); + } + return -1; } -int vfio_setup_device(const char *sysfs_base, const char *dev_addr, +int +vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -189,18 +307,10 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, if (vfio_group_fd < 0) return -1; - /* store group fd */ - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; - /* if group_fd == 0, that means the device isn't managed by VFIO */ if (vfio_group_fd == 0) { - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", dev_addr); - /* we store 0 as group fd to distinguish between existing but - * unbound VFIO groups, and groups that don't exist at all. - */ - vfio_cfg.vfio_group_idx++; return 1; } @@ -215,12 +325,12 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, RTE_LOG(ERR, EAL, " %s cannot get group status, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } @@ -234,60 +344,131 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } + /* - * at this point we know that this group has been successfully - * initialized, so we increment vfio_group_idx to indicate that we can - * add new groups. + * pick an IOMMU type and set up DMA mappings for container + * + * needs to be done only once, only when first group is + * assigned to a container and only in primary process. + * Note this can happen several times with the hotplug + * functionality. */ - vfio_cfg.vfio_group_idx++; - } - - /* - * pick an IOMMU type and set up DMA mappings for container - * - * needs to be done only once, only when at least one group is assigned to - * a container and only in primary process - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_container_has_dma == 0) { - /* select an IOMMU type which we will be using */ - const struct vfio_iommu_type *t = + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_active_groups == 1) { + /* select an IOMMU type which we will be using */ + const struct vfio_iommu_type *t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd); - if (!t) { - RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr); - return -1; - } - ret = t->dma_map_func(vfio_cfg.vfio_container_fd); - if (ret) { - RTE_LOG(ERR, EAL, " %s DMA remapping failed, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); - return -1; + if (!t) { + RTE_LOG(ERR, EAL, + " %s failed to select IOMMU type\n", + dev_addr); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, + " %s DMA remapping failed, error %i (%s)\n", + dev_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } } - vfio_cfg.vfio_container_has_dma = 1; } /* get a file descriptor for the device */ *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr); if (*vfio_dev_fd < 0) { - /* if we cannot get a device fd, this simply means that this - * particular port is not bound to VFIO - */ - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + /* if we cannot get a device fd, this implies a problem with + * the VFIO group or the container not having IOMMU configured. + */ + + RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n", dev_addr); - return 1; + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; } /* test and setup the device */ ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info); if (ret) { RTE_LOG(ERR, EAL, " %s cannot get device info, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); + "error %i (%s)\n", dev_addr, errno, + strerror(errno)); close(*vfio_dev_fd); + close(vfio_group_fd); + clear_group(vfio_group_fd); return -1; } + vfio_group_device_get(vfio_group_fd); + + return 0; +} + +int +vfio_release_device(const char *sysfs_base, const char *dev_addr, + int vfio_dev_fd) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status) + }; + int vfio_group_fd; + int iommu_group_no; + int ret; + + /* get group number */ + ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + if (ret <= 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n", + dev_addr); + /* This is an error at this point. */ + return -1; + } + + /* get the actual group fd */ + vfio_group_fd = vfio_get_group_fd(iommu_group_no); + if (vfio_group_fd <= 0) { + RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n", + dev_addr); + return -1; + } + + /* At this point we got an active group. Closing it will make the + * container detachment. If this is the last active group, VFIO kernel + * code will unset the container and the IOMMU mappings. + */ + + /* Closing a device */ + if (close(vfio_dev_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", + dev_addr); + return -1; + } + + /* An VFIO group can have several devices attached. Just when there is + * no devices remaining should the group be closed. + */ + vfio_group_device_put(vfio_group_fd); + if (!vfio_group_device_count(vfio_group_fd)) { + + if (close(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", + dev_addr); + return -1; + } + + if (clear_group(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", + dev_addr); + return -1; + } + } return 0; } @@ -302,6 +483,7 @@ vfio_enable(const char *modname) for (i = 0; i < VFIO_MAX_GROUPS; i++) { vfio_cfg.vfio_groups[i].fd = -1; vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].devices = 0; } /* inform the user that we are probing for VFIO */ @@ -531,7 +713,8 @@ vfio_type1_dma_map(int vfio_container_fd) if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " - "error %i (%s)\n", errno, strerror(errno)); + "error %i (%s)\n", errno, + strerror(errno)); return -1; } } @@ -540,6 +723,93 @@ vfio_type1_dma_map(int vfio_container_fd) } static int +vfio_spapr_dma_map(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + struct vfio_iommu_spapr_register_memory reg = { + .argsz = sizeof(reg), + .flags = 0 + }; + struct vfio_iommu_spapr_tce_info info = { + .argsz = sizeof(info), + }; + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + }; + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + }; + + /* query spapr iommu info */ + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + RTE_LOG(ERR, EAL, " cannot get iommu info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* remove default DMA of 32 bit window */ + remove.start_addr = info.dma32_window_start; + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + RTE_LOG(ERR, EAL, " cannot remove default DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* calculate window size based on number of hugepages configured */ + create.window_size = rte_eal_get_physmem_size(); + create.page_shift = __builtin_ctzll(ms->hugepage_sz); + create.levels = 2; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); + if (ret) { + RTE_LOG(ERR, EAL, " cannot create new DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + reg.vaddr = (uintptr_t) ms[i].addr; + reg.size = ms[i].len; + ret = ioctl(vfio_container_fd, + VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); + if (ret) { + RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + } + + return 0; +} + +static int vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) { /* No-IOMMU mode does not need DMA mapping */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 29f7f3ec..5ff63e5d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -54,6 +54,62 @@ #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU +#ifndef VFIO_SPAPR_TCE_v2_IOMMU +#define RTE_VFIO_SPAPR 7 +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + +struct vfio_iommu_spapr_register_memory { + uint32_t argsz; + uint32_t flags; + uint64_t vaddr; + uint64_t size; +}; + +struct vfio_iommu_spapr_tce_create { + uint32_t argsz; + uint32_t flags; + /* in */ + uint32_t page_shift; + uint32_t __resv1; + uint64_t window_size; + uint32_t levels; + uint32_t __resv2; + /* out */ + uint64_t start_addr; +}; + +struct vfio_iommu_spapr_tce_remove { + uint32_t argsz; + uint32_t flags; + /* in */ + uint64_t start_addr; +}; + +struct vfio_iommu_spapr_tce_ddw_info { + uint64_t pgsizes; + uint32_t max_dynamic_windows_supported; + uint32_t levels; +}; + +/* SPAPR_v2 is not present, but SPAPR might be */ +#ifndef VFIO_SPAPR_TCE_IOMMU +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +struct vfio_iommu_spapr_tce_info { + uint32_t argsz; + uint32_t flags; + uint32_t dma32_window_start; + uint32_t dma32_window_size; + struct vfio_iommu_spapr_tce_ddw_info ddw; +}; +#endif /* VFIO_SPAPR_TCE_IOMMU */ + +#else /* VFIO_SPAPR_TCE_v2_IOMMU */ +#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) #define RTE_VFIO_NOIOMMU 8 #else @@ -78,13 +134,13 @@ int vfio_mp_sync_connect_to_primary(void); struct vfio_group { int group_no; int fd; + int devices; }; struct vfio_config { int vfio_enabled; int vfio_container_fd; - int vfio_container_has_dma; - int vfio_group_idx; + int vfio_active_groups; struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; }; @@ -130,6 +186,10 @@ vfio_get_group_no(const char *sysfs_base, int vfio_get_group_fd(int iommu_group_no); +/* remove group fd from internal VFIO group fd array */ +int +clear_group(int vfio_group_fd); + /** * Setup vfio_cfg for the device identified by its address. It discovers * the configured I/O MMU groups or sets a new one for the device. If a new @@ -140,6 +200,8 @@ vfio_get_group_fd(int iommu_group_no); int vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info); +int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); + int vfio_enable(const char *modname); int vfio_is_enabled(const char *modname); @@ -150,6 +212,7 @@ int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 +#define SOCKET_CLR_GROUP 0x300 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index fb4a2f84..7e8095cb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -267,7 +267,7 @@ vfio_mp_sync_connect_to_primary(void) static __attribute__((noreturn)) void * vfio_mp_sync_thread(void __rte_unused * arg) { - int ret, fd, vfio_group_no; + int ret, fd, vfio_data; /* wait for requests on the socket */ for (;;) { @@ -305,13 +305,13 @@ vfio_mp_sync_thread(void __rte_unused * arg) break; case SOCKET_REQ_GROUP: /* wait for group number */ - vfio_group_no = vfio_mp_sync_receive_request(conn_sock); - if (vfio_group_no < 0) { + vfio_data = vfio_mp_sync_receive_request(conn_sock); + if (vfio_data < 0) { close(conn_sock); continue; } - fd = vfio_get_group_fd(vfio_group_no); + fd = vfio_get_group_fd(vfio_data); if (fd < 0) vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); @@ -324,6 +324,21 @@ vfio_mp_sync_thread(void __rte_unused * arg) vfio_mp_sync_send_fd(conn_sock, fd); } break; + case SOCKET_CLR_GROUP: + /* wait for group fd */ + vfio_data = vfio_mp_sync_receive_request(conn_sock); + if (vfio_data < 0) { + close(conn_sock); + continue; + } + + ret = clear_group(vfio_data); + + if (ret < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + else + vfio_mp_sync_send_request(conn_sock, SOCKET_OK); + break; default: vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); break; diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index d459bf48..6daffebf 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -49,8 +49,9 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_VDEV, /**< virtual device */ RTE_INTR_HANDLE_MAX }; @@ -171,6 +172,15 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, int op, unsigned int vec, void *data); /** + * It deletes registered eventfds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle); + +/** * It enables the packet I/O interrupt event if it's necessary. * It creates event fd for each interrupt vector when MSIX is used, * otherwise it multiplexes a single event fd. diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 09713b0c..2ac879fd 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -116,11 +116,10 @@ struct rte_kni_fifo { struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); uint64_t buf_physaddr; - char pad0[2]; uint16_t data_off; /**< Start address of data in segment buffer. */ char pad1[2]; - uint8_t nb_segs; /**< Number of segments. */ - char pad4[1]; + uint16_t nb_segs; /**< Number of segments. */ + char pad4[2]; uint64_t ol_flags; /**< Offload features. */ char pad2[4]; uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 83721ba5..670bab3a 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -6,8 +6,6 @@ DPDK_2.0 { eal_parse_sysfs_value; eal_timer_source; lcore_config; - pci_device_list; - pci_driver_list; per_lcore__lcore_id; per_lcore__rte_errno; rte_calloc; @@ -22,12 +20,9 @@ DPDK_2.0 { rte_dump_tailq; rte_eal_alarm_cancel; rte_eal_alarm_set; - rte_eal_dev_init; rte_eal_devargs_add; rte_eal_devargs_dump; rte_eal_devargs_type_count; - rte_eal_driver_register; - rte_eal_driver_unregister; rte_eal_get_configuration; rte_eal_get_lcore_state; rte_eal_get_physmem_layout; @@ -40,18 +35,10 @@ DPDK_2.0 { rte_eal_mp_remote_launch; rte_eal_mp_wait_lcore; rte_eal_parse_devargs_str; - rte_eal_pci_dump; - rte_eal_pci_probe; - rte_eal_pci_probe_one; - rte_eal_pci_register; - rte_eal_pci_scan; - rte_eal_pci_unregister; rte_eal_process_type; rte_eal_remote_launch; rte_eal_tailq_lookup; rte_eal_tailq_register; - rte_eal_vdev_init; - rte_eal_vdev_uninit; rte_eal_wait_lcore; rte_exit; rte_free; @@ -66,11 +53,8 @@ DPDK_2.0 { rte_intr_disable; rte_intr_enable; rte_log; - rte_log_add_in_history; rte_log_cur_msg_loglevel; rte_log_cur_msg_logtype; - rte_log_dump_history; - rte_log_set_history; rte_logs; rte_malloc; rte_malloc_dump_stats; @@ -114,9 +98,6 @@ DPDK_2.0 { DPDK_2.1 { global: - rte_eal_pci_detach; - rte_eal_pci_read_config; - rte_eal_pci_write_config; rte_epoll_ctl; rte_epoll_wait; rte_intr_allow_others; @@ -146,12 +127,6 @@ DPDK_16.04 { global: rte_cpu_get_flag_name; - rte_eal_pci_ioport_map; - rte_eal_pci_ioport_read; - rte_eal_pci_ioport_unmap; - rte_eal_pci_ioport_write; - rte_eal_pci_map_device; - rte_eal_pci_unmap_device; rte_eal_primary_proc_alive; } DPDK_2.2; @@ -174,7 +149,52 @@ DPDK_16.11 { rte_delay_us_callback_register; rte_eal_dev_attach; rte_eal_dev_detach; - rte_eal_vdrv_register; - rte_eal_vdrv_unregister; } DPDK_16.07; + +DPDK_17.02 { + global: + + rte_bus_dump; + rte_bus_probe; + rte_bus_register; + rte_bus_scan; + rte_bus_unregister; + +} DPDK_16.11; + +DPDK_17.05 { + global: + + rte_cpu_is_supported; + rte_intr_free_epoll_fd; + rte_log_dump; + rte_log_get_global_level; + rte_log_register; + rte_log_set_global_level; + rte_log_set_level; + rte_log_set_level_regexp; + rte_pci_detach; + rte_pci_dump; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_probe; + rte_pci_probe_one; + rte_pci_read_config; + rte_pci_register; + rte_pci_scan; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + rte_vdev_init; + rte_vdev_register; + rte_vdev_uninit; + rte_vdev_unregister; + vfio_get_container_fd; + vfio_get_group_fd; + vfio_get_group_no; + +} DPDK_17.02; diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index 0d781e48..b800a53c 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -123,3 +123,7 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev) } #endif /* < 3.3.0 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) +#define HAVE_PCI_ENABLE_MSIX +#endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index df41e457..b9d427c5 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -314,7 +314,7 @@ igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) } } - return (iom != 0) ? ret : -ENOENT; + return (iom != 0 || iop != 0) ? ret : -ENOENT; } #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) @@ -325,7 +325,11 @@ static int igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct rte_uio_pci_dev *udev; +#ifdef HAVE_PCI_ENABLE_MSIX struct msix_entry msix_entry; +#endif + dma_addr_t map_dma_addr; + void *map_addr; int err; udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); @@ -379,18 +383,28 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) switch (igbuio_intr_mode_preferred) { case RTE_INTR_MODE_MSIX: /* Only 1 msi-x vector needed */ +#ifdef HAVE_PCI_ENABLE_MSIX msix_entry.entry = 0; if (pci_enable_msix(dev, &msix_entry, 1) == 0) { dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; udev->info.irq = msix_entry.vector; udev->mode = RTE_INTR_MODE_MSIX; break; } +#else + if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) { + dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq = pci_irq_vector(dev, 0); + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#endif /* fall back to INTX */ case RTE_INTR_MODE_LEGACY: if (pci_intx_mask_supported(dev)) { dev_dbg(&dev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED; + udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; udev->info.irq = dev->irq; udev->mode = RTE_INTR_MODE_LEGACY; break; @@ -423,6 +437,27 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_info(&dev->dev, "uio device registered with irq %lx\n", udev->info.irq); + /* + * Doing a harmless dma mapping for attaching the device to + * the iommu identity mapping if kernel boots with iommu=pt. + * Note this is not a problem if no IOMMU at all. + */ + map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr, + GFP_KERNEL); + if (map_addr) + memset(map_addr, 0, 1024); + + if (!map_addr) + dev_info(&dev->dev, "dma mapping failed\n"); + else { + dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n", + (unsigned long long)map_dma_addr, map_addr); + + dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr); + dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n", + (unsigned long long)map_dma_addr, map_addr); + } + return 0; fail_remove_group: diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile index 4e99e07e..154c528d 100644 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -44,45 +44,43 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/e MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h MODULE_CFLAGS += -Wall -Werror -ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) -MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) +-include /etc/lsb-release + +ifeq ($(DISTRIB_ID),Ubuntu) +MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE)) UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1) MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" endif -# this lib needs main eal -DEPDIRS-y += lib/librte_eal/linuxapp/eal - # # all source are stored in SRCS-y # -SRCS-y := ethtool/ixgbe/ixgbe_main.c -SRCS-y += ethtool/ixgbe/ixgbe_api.c -SRCS-y += ethtool/ixgbe/ixgbe_common.c -SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c -SRCS-y += ethtool/ixgbe/ixgbe_82599.c -SRCS-y += ethtool/ixgbe/ixgbe_82598.c -SRCS-y += ethtool/ixgbe/ixgbe_x540.c -SRCS-y += ethtool/ixgbe/ixgbe_phy.c -SRCS-y += ethtool/ixgbe/kcompat.c +SRCS-y := kni_misc.c +SRCS-y += kni_net.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += kni_ethtool.c -SRCS-y += ethtool/igb/e1000_82575.c -SRCS-y += ethtool/igb/e1000_i210.c -SRCS-y += ethtool/igb/e1000_api.c -SRCS-y += ethtool/igb/e1000_mac.c -SRCS-y += ethtool/igb/e1000_manage.c -SRCS-y += ethtool/igb/e1000_mbx.c -SRCS-y += ethtool/igb/e1000_nvm.c -SRCS-y += ethtool/igb/e1000_phy.c -SRCS-y += ethtool/igb/igb_ethtool.c -SRCS-y += ethtool/igb/igb_main.c -SRCS-y += ethtool/igb/igb_param.c -SRCS-y += ethtool/igb/igb_vmdq.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_main.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_api.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_common.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_ethtool.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82599.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82598.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_x540.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_phy.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/kcompat.c -SRCS-y += kni_misc.c -SRCS-y += kni_net.c -SRCS-y += kni_ethtool.c -SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_82575.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_i210.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_api.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mac.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_manage.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mbx.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_nvm.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_phy.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_ethtool.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_main.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_param.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_vmdq.c include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 78da08e5..d96275af 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -2,6 +2,8 @@ * Minimal wrappers to allow compiling kni on older kernels. */ +#include <linux/version.h> + #ifndef RHEL_RELEASE_VERSION #define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) #endif @@ -67,3 +69,7 @@ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) #undef NET_NAME_UNKNOWN #endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c index d7a987d5..95e262b7 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -1126,7 +1126,7 @@ static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data) static irqreturn_t igb_test_intr(int irq, void *data) { - struct igb_adapter *adapter = (struct igb_adapter *) data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index f4dca5a3..5f1f3a6b 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -1031,8 +1031,15 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) for (i = 0; i < numvecs; i++) adapter->msix_entries[i].entry = i; +#ifdef HAVE_PCI_ENABLE_MSIX err = pci_enable_msix(pdev, adapter->msix_entries, numvecs); +#else + err = pci_enable_msix_range(pdev, + adapter->msix_entries, + numvecs, + numvecs); +#endif if (err == 0) break; } @@ -1629,7 +1636,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) */ static int igb_get_i2c_data(void *data) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); @@ -1644,7 +1651,7 @@ static int igb_get_i2c_data(void *data) */ static void igb_set_i2c_data(void *data, int state) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); @@ -1669,7 +1676,7 @@ static void igb_set_i2c_data(void *data, int state) */ static void igb_set_i2c_clk(void *data, int state) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); @@ -1691,7 +1698,7 @@ static void igb_set_i2c_clk(void *data, int state) */ static int igb_get_i2c_clk(void *data) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index 84826b26..4c52da3c 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -710,6 +710,9 @@ struct _kc_ethtool_pauseparam { #elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) /* SLES12 is at least 3.12.28+ based */ #define SLE_VERSION_CODE SLE_VERSION(12,0,0) +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) +/* SLES12SP3 is at least 4.4.57+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) #endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ #endif /* CONFIG_SUSE_KERNEL */ #ifndef SLE_VERSION_CODE @@ -3929,8 +3932,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #define vlan_tx_tag_present skb_vlan_tag_present #endif -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) ) +#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0))) #define HAVE_VF_VLAN_PROTO -#endif /* >= 4.9.0 */ +#endif /* >= 4.9.0, >= SLES12SP3 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) +#define HAVE_PCI_ENABLE_MSIX +#endif #endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c index bc3cb2f4..cdfcb959 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -1462,7 +1462,7 @@ static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data) static irqreturn_t ixgbe_test_intr(int irq, void *data) { - struct net_device *netdev = (struct net_device *) data; + struct net_device *netdev = data; struct ixgbe_adapter *adapter = netdev_priv(netdev); adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR); @@ -2447,7 +2447,7 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, break; case ETHTOOL_GRXCLSRLALL: ret = ixgbe_get_ethtool_fdir_all(adapter, cmd, - (u32 *)rule_locs); + rule_locs); break; case ETHTOOL_GRXFH: ret = ixgbe_get_rss_hash_opts(adapter, cmd); diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h index 58cbadd3..72385ab4 100644 --- a/lib/librte_eal/linuxapp/kni/kni_dev.h +++ b/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -30,17 +30,19 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include "compat.h" + #include <linux/if.h> #include <linux/wait.h> +#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER +#include <linux/sched/signal.h> +#else #include <linux/sched.h> +#endif #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/list.h> -#ifdef RTE_KNI_VHOST -#include <net/sock.h> -#endif - #include <exec-env/rte_kni_common.h> #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ @@ -102,15 +104,6 @@ struct kni_dev { /* synchro for request processing */ unsigned long synchro; -#ifdef RTE_KNI_VHOST - struct kni_vhost_queue *vhost_queue; - - volatile enum { - BE_STOP = 0x1, - BE_START = 0x2, - BE_FINISH = 0x4, - } vq_status; -#endif /* buffers */ void *pa[MBUF_BURST_SZ]; void *va[MBUF_BURST_SZ]; @@ -118,26 +111,6 @@ struct kni_dev { void *alloc_va[MBUF_BURST_SZ]; }; -#ifdef RTE_KNI_VHOST -uint32_t -kni_poll(struct file *file, struct socket *sock, poll_table * wait); -int kni_chk_vhost_rx(struct kni_dev *kni); -int kni_vhost_init(struct kni_dev *kni); -int kni_vhost_backend_release(struct kni_dev *kni); - -struct kni_vhost_queue { - struct sock sk; - struct socket *sock; - int vnet_hdr_sz; - struct kni_dev *kni; - int sockfd; - uint32_t flags; - struct sk_buff *cache; - struct rte_kni_fifo *fifo; -}; - -#endif - void kni_net_rx(struct kni_dev *kni); void kni_net_init(struct net_device *dev); void kni_net_config_lo_mode(char *lo_str); diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h index 025ec1c9..14f4141f 100644 --- a/lib/librte_eal/linuxapp/kni/kni_fifo.h +++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -91,18 +91,4 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo) return (fifo->read - fifo->write - 1) & (fifo->len - 1); } -#ifdef RTE_KNI_VHOST -/** - * Initializes the kni fifo structure - */ -static inline void -kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size) -{ - fifo->write = 0; - fifo->read = 0; - fifo->len = size; - fifo->elem_size = sizeof(void *); -} -#endif - #endif /* _KNI_FIFO_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c index 497db9bd..7590f1fd 100644 --- a/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -140,11 +140,7 @@ kni_thread_single(void *data) down_read(&knet->kni_list_lock); for (j = 0; j < KNI_RX_LOOP_NUM; j++) { list_for_each_entry(dev, &knet->kni_list_head, list) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else kni_net_rx(dev); -#endif kni_net_poll_resp(dev); } } @@ -163,15 +159,11 @@ static int kni_thread_multiple(void *param) { int j; - struct kni_dev *dev = (struct kni_dev *)param; + struct kni_dev *dev = param; while (!kthread_should_stop()) { for (j = 0; j < KNI_RX_LOOP_NUM; j++) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else kni_net_rx(dev); -#endif kni_net_poll_resp(dev); } #ifdef RTE_KNI_PREEMPT_DEFAULT @@ -205,12 +197,14 @@ kni_dev_remove(struct kni_dev *dev) if (!dev) return -ENODEV; +#ifdef RTE_KNI_KMOD_ETHTOOL if (dev->pci_dev) { if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) ixgbe_kni_remove(dev->pci_dev); else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) igb_kni_remove(dev->pci_dev); } +#endif if (dev->net_dev) { unregister_netdev(dev->net_dev); @@ -246,9 +240,6 @@ kni_release(struct inode *inode, struct file *file) dev->pthread = NULL; } -#ifdef RTE_KNI_VHOST - kni_vhost_backend_release(dev); -#endif kni_dev_remove(dev); list_del(&dev->list); } @@ -326,11 +317,13 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, struct kni_net *knet = net_generic(net, kni_net_id); int ret; struct rte_kni_device_info dev_info; - struct pci_dev *pci = NULL; - struct pci_dev *found_pci = NULL; struct net_device *net_dev = NULL; - struct net_device *lad_dev = NULL; struct kni_dev *kni, *dev, *n; +#ifdef RTE_KNI_KMOD_ETHTOOL + struct pci_dev *found_pci = NULL; + struct net_device *lad_dev = NULL; + struct pci_dev *pci = NULL; +#endif pr_info("Creating kni...\n"); /* Check the buffer size, to avoid warning */ @@ -344,6 +337,12 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, return -EIO; } + /* Check if name is zero-ended */ + if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { + pr_err("kni.name not zero-terminated"); + return -EINVAL; + } + /** * Check if the cpu core id is valid for binding. */ @@ -363,8 +362,8 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, up_read(&knet->kni_list_lock); net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, -#ifdef NET_NAME_UNKNOWN - NET_NAME_UNKNOWN, +#ifdef NET_NAME_USER + NET_NAME_USER, #endif kni_net_init); if (net_dev == NULL) { @@ -392,10 +391,6 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, kni->sync_va = dev_info.sync_va; kni->sync_kva = phys_to_virt(dev_info.sync_phys); -#ifdef RTE_KNI_VHOST - kni->vhost_queue = NULL; - kni->vq_status = BE_STOP; -#endif kni->mbuf_size = dev_info.mbuf_size; pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", @@ -418,7 +413,7 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, dev_info.function, dev_info.vendor_id, dev_info.device_id); - +#ifdef RTE_KNI_KMOD_ETHTOOL pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); /* Support Ethtool */ @@ -459,6 +454,7 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, } if (pci) pci_dev_put(pci); +#endif if (kni->lad_dev) ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); @@ -479,10 +475,6 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, return -ENODEV; } -#ifdef RTE_KNI_VHOST - kni_vhost_init(kni); -#endif - ret = kni_run_thread(knet, kni, dev_info.force_bind); if (ret != 0) return ret; @@ -526,9 +518,6 @@ kni_ioctl_release(struct net *net, uint32_t ioctl_num, dev->pthread = NULL; } -#ifdef RTE_KNI_VHOST - kni_vhost_backend_release(dev); -#endif kni_dev_remove(dev); list_del(&dev->list); ret = 0; diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c index 4ac99cfe..db9f4898 100644 --- a/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -198,18 +198,6 @@ kni_net_config(struct net_device *dev, struct ifmap *map) /* * Transmit a packet (called by the kernel) */ -#ifdef RTE_KNI_VHOST -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#else static int kni_net_tx(struct sk_buff *skb, struct net_device *dev) { @@ -289,7 +277,6 @@ drop: return NETDEV_TX_OK; } -#endif /* * RX: normal working mode diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c deleted file mode 100644 index f54c34b1..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_vhost.c +++ /dev/null @@ -1,842 +0,0 @@ -/*- - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - */ - -#include <linux/module.h> -#include <linux/net.h> -#include <net/sock.h> -#include <linux/virtio_net.h> -#include <linux/wait.h> -#include <linux/mm.h> -#include <linux/nsproxy.h> -#include <linux/sched.h> -#include <linux/if_tun.h> -#include <linux/version.h> -#include <linux/file.h> - -#include "compat.h" -#include "kni_dev.h" -#include "kni_fifo.h" - -#define RX_BURST_SZ 4 - -#ifdef HAVE_STATIC_SOCK_MAP_FD -static int kni_sock_map_fd(struct socket *sock) -{ - struct file *file; - int fd = get_unused_fd_flags(0); - - if (fd < 0) - return fd; - - file = sock_alloc_file(sock, 0, NULL); - if (IS_ERR(file)) { - put_unused_fd(fd); - return PTR_ERR(file); - } - fd_install(fd, file); - return fd; -} -#endif - -static struct proto kni_raw_proto = { - .name = "kni_vhost", - .owner = THIS_MODULE, - .obj_size = sizeof(struct kni_vhost_queue), -}; - -static inline int -kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, - uint32_t offset, uint32_t len) -{ - struct rte_kni_mbuf *pkt_kva = NULL; - struct rte_kni_mbuf *pkt_va = NULL; - int ret; - - pr_debug("tx offset=%d, len=%d, iovlen=%d\n", -#ifdef HAVE_IOV_ITER_MSGHDR - offset, len, (int)m->msg_iter.iov->iov_len); -#else - offset, len, (int)m->msg_iov->iov_len); -#endif - - /** - * Check if it has at least one free entry in tx_q and - * one entry in alloc_q. - */ - if (kni_fifo_free_count(kni->tx_q) == 0 || - kni_fifo_count(kni->alloc_q) == 0) { - /** - * If no free entry in tx_q or no entry in alloc_q, - * drops skb and goes out. - */ - goto drop; - } - - /* dequeue a mbuf from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); - if (likely(ret == 1)) { - void *data_kva; - - pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; - data_kva = pkt_kva->buf_addr + pkt_kva->data_off - - kni->mbuf_va + kni->mbuf_kva; - -#ifdef HAVE_IOV_ITER_MSGHDR - copy_from_iter(data_kva, len, &m->msg_iter); -#else - memcpy_fromiovecend(data_kva, m->msg_iov, offset, len); -#endif - - if (unlikely(len < ETH_ZLEN)) { - memset(data_kva + len, 0, ETH_ZLEN - len); - len = ETH_ZLEN; - } - pkt_kva->pkt_len = len; - pkt_kva->data_len = len; - - /* enqueue mbuf into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); - if (unlikely(ret != 1)) { - /* Failing should not happen */ - pr_err("Fail to enqueue mbuf into tx_q\n"); - goto drop; - } - } else { - /* Failing should not happen */ - pr_err("Fail to dequeue mbuf from alloc_q\n"); - goto drop; - } - - /* update statistics */ - kni->stats.tx_bytes += len; - kni->stats.tx_packets++; - - return 0; - -drop: - /* update statistics */ - kni->stats.tx_dropped++; - - return 0; -} - -static inline int -kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, - uint32_t offset, uint32_t len) -{ - uint32_t pkt_len; - struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va; - void *data_kva; - struct sk_buff *skb; - struct kni_vhost_queue *q = kni->vhost_queue; - - if (unlikely(q == NULL)) - return 0; - - /* ensure at least one entry in free_q */ - if (unlikely(kni_fifo_free_count(kni->free_q) == 0)) - return 0; - - skb = skb_dequeue(&q->sk.sk_receive_queue); - if (unlikely(skb == NULL)) - return 0; - - kva = (struct rte_kni_mbuf *)skb->data; - - /* free skb to cache */ - skb->data = NULL; - if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1)) - /* Failing should not happen */ - pr_err("Fail to enqueue entries into rx cache fifo\n"); - - pkt_len = kva->data_len; - if (unlikely(pkt_len > len)) - goto drop; - - pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", -#ifdef HAVE_IOV_ITER_MSGHDR - offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); -#else - offset, len, pkt_len, (int)m->msg_iov->iov_len); -#endif - - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; -#ifdef HAVE_IOV_ITER_MSGHDR - if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter))) -#else - if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len))) -#endif - goto drop; - - /* Update statistics */ - kni->stats.rx_bytes += pkt_len; - kni->stats.rx_packets++; - - /* enqueue mbufs into free_q */ - va = (void *)kva - kni->mbuf_kva + kni->mbuf_va; - if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1)) - /* Failing should not happen */ - pr_err("Fail to enqueue entries into free_q\n"); - - pr_debug("receive done %d\n", pkt_len); - - return pkt_len; - -drop: - /* Update drop statistics */ - kni->stats.rx_dropped++; - - return 0; -} - -static uint32_t -kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait) -{ - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - struct kni_dev *kni; - uint32_t mask = 0; - - if (unlikely(q == NULL || q->kni == NULL)) - return POLLERR; - - kni = q->kni; -#ifdef HAVE_SOCKET_WQ - pr_debug("start kni_poll on group %d, wq 0x%16llx\n", - kni->group_id, (uint64_t)sock->wq); - poll_wait(file, &sock->wq->wait, wait); -#else - pr_debug("start kni_poll on group %d, wait at 0x%16llx\n", - kni->group_id, (uint64_t)&sock->wait); - poll_wait(file, &sock->wait, wait); -#endif - - if (kni_fifo_count(kni->rx_q) > 0) - mask |= POLLIN | POLLRDNORM; - - if (sock_writeable(&q->sk) || -#ifdef SOCKWQ_ASYNC_NOSPACE - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && - sock_writeable(&q->sk))) -#else - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && - sock_writeable(&q->sk))) -#endif - mask |= POLLOUT | POLLWRNORM; - - return mask; -} - -static inline void -kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, - struct sk_buff *skb, struct rte_kni_mbuf *va) -{ - struct rte_kni_mbuf *kva; - - kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; - (skb)->data = (unsigned char *)kva; - (skb)->len = kva->data_len; - skb_queue_tail(&q->sk.sk_receive_queue, skb); -} - -static inline void -kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, - struct sk_buff **skb, struct rte_kni_mbuf **va) -{ - int i; - - for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) - kni_vhost_enqueue(kni, q, *skb, *va); -} - -int -kni_chk_vhost_rx(struct kni_dev *kni) -{ - struct kni_vhost_queue *q = kni->vhost_queue; - uint32_t nb_in, nb_mbuf, nb_skb; - const uint32_t BURST_MASK = RX_BURST_SZ - 1; - uint32_t nb_burst, nb_backlog, i; - struct sk_buff *skb[RX_BURST_SZ]; - struct rte_kni_mbuf *va[RX_BURST_SZ]; - - if (unlikely(BE_STOP & kni->vq_status)) { - kni->vq_status |= BE_FINISH; - return 0; - } - - if (unlikely(q == NULL)) - return 0; - - nb_skb = kni_fifo_count(q->fifo); - nb_mbuf = kni_fifo_count(kni->rx_q); - - nb_in = min(nb_mbuf, nb_skb); - nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ); - nb_burst = (nb_in & ~BURST_MASK); - nb_backlog = (nb_in & BURST_MASK); - - /* enqueue skb_queue per BURST_SIZE bulk */ - if (nb_burst != 0) { - if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ) - != RX_BURST_SZ)) - goto except; - - if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ) - != RX_BURST_SZ)) - goto except; - - kni_vhost_enqueue_burst(kni, q, skb, va); - } - - /* all leftover, do one by one */ - for (i = 0; i < nb_backlog; ++i) { - if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1)) - goto except; - - if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1)) - goto except; - - kni_vhost_enqueue(kni, q, *skb, *va); - } - - /* Ondemand wake up */ - if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) || - ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { - wake_up_interruptible_poll(sk_sleep(&q->sk), - POLLIN | POLLRDNORM | POLLRDBAND); - pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", - nb_mbuf, nb_skb, nb_in); - } - - return 0; - -except: - /* Failing should not happen */ - pr_err("Fail to enqueue fifo, it shouldn't happen\n"); - BUG_ON(1); - - return 0; -} - -static int -#ifdef HAVE_KIOCB_MSG_PARAM -kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) -#else -kni_sock_sndmsg(struct socket *sock, - struct msghdr *m, size_t total_len) -#endif /* HAVE_KIOCB_MSG_PARAM */ -{ - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - int vnet_hdr_len = 0; - unsigned long len = total_len; - - if (unlikely(q == NULL || q->kni == NULL)) - return 0; - - pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", -#ifdef HAVE_IOV_ITER_MSGHDR - len, q->flags, (int)m->msg_iter.iov->iov_len); -#else - len, q->flags, (int)m->msg_iovlen); -#endif - -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - if (likely(q->flags & IFF_VNET_HDR)) { - vnet_hdr_len = q->vnet_hdr_sz; - if (unlikely(len < vnet_hdr_len)) - return -EINVAL; - len -= vnet_hdr_len; - } -#endif - - if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz)) - return -EINVAL; - - return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len); -} - -static int -#ifdef HAVE_KIOCB_MSG_PARAM -kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) -#else -kni_sock_rcvmsg(struct socket *sock, - struct msghdr *m, size_t len, int flags) -#endif /* HAVE_KIOCB_MSG_PARAM */ -{ - int vnet_hdr_len = 0; - int pkt_len = 0; - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - static struct virtio_net_hdr - __attribute__ ((unused)) vnet_hdr = { - .flags = 0, - .gso_type = VIRTIO_NET_HDR_GSO_NONE - }; - - if (unlikely(q == NULL || q->kni == NULL)) - return 0; - -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - if (likely(q->flags & IFF_VNET_HDR)) { - vnet_hdr_len = q->vnet_hdr_sz; - len -= vnet_hdr_len; - if (len < 0) - return -EINVAL; - } -#endif - - pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len); - if (unlikely(pkt_len == 0)) - return 0; - -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - /* no need to copy hdr when no pkt received */ -#ifdef HAVE_IOV_ITER_MSGHDR - if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len, - &m->msg_iter))) -#else - if (unlikely(memcpy_toiovecend(m->msg_iov, - (void *)&vnet_hdr, 0, vnet_hdr_len))) -#endif /* HAVE_IOV_ITER_MSGHDR */ - return -EFAULT; -#endif /* RTE_KNI_VHOST_VNET_HDR_EN */ - pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", - (unsigned long)len, q->flags, pkt_len); - - return pkt_len + vnet_hdr_len; -} - -/* dummy tap like ioctl */ -static int -kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - struct ifreq __user *ifr = argp; - uint32_t __user *up = argp; - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - struct kni_dev *kni; - uint32_t u; - int __user *sp = argp; - int s; - int ret; - - pr_debug("tap ioctl cmd 0x%08x\n", cmd); - - switch (cmd) { - case TUNSETIFF: - pr_debug("TUNSETIFF\n"); - /* ignore the name, just look at flags */ - if (get_user(u, &ifr->ifr_flags)) - return -EFAULT; - - ret = 0; - if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) - ret = -EINVAL; - else - q->flags = u; - - return ret; - - case TUNGETIFF: - pr_debug("TUNGETIFF\n"); - rcu_read_lock_bh(); - kni = rcu_dereference_bh(q->kni); - if (kni) - dev_hold(kni->net_dev); - rcu_read_unlock_bh(); - - if (!kni) - return -ENOLINK; - - ret = 0; - if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) - || put_user(q->flags, &ifr->ifr_flags)) - ret = -EFAULT; - dev_put(kni->net_dev); - return ret; - - case TUNGETFEATURES: - pr_debug("TUNGETFEATURES\n"); - u = IFF_TAP | IFF_NO_PI; -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - u |= IFF_VNET_HDR; -#endif - if (put_user(u, up)) - return -EFAULT; - return 0; - - case TUNSETSNDBUF: - pr_debug("TUNSETSNDBUF\n"); - if (get_user(u, up)) - return -EFAULT; - - q->sk.sk_sndbuf = u; - return 0; - - case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - pr_debug("TUNGETVNETHDRSZ %d\n", s); - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - pr_debug("TUNSETVNETHDRSZ %d\n", s); - q->vnet_hdr_sz = s; - return 0; - - case TUNSETOFFLOAD: - pr_debug("TUNSETOFFLOAD %lx\n", arg); -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - /* not support any offload yet */ - if (!(q->flags & IFF_VNET_HDR)) - return -EINVAL; - - return 0; -#else - return -EINVAL; -#endif - - default: - pr_debug("NOT SUPPORT\n"); - return -EINVAL; - } -} - -static int -kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd, - unsigned long arg) -{ - /* 32 bits app on 64 bits OS to be supported later */ - pr_debug("Not implemented.\n"); - - return -EINVAL; -} - -#define KNI_VHOST_WAIT_WQ_SAFE() \ -do { \ - while ((BE_FINISH | BE_STOP) == kni->vq_status) \ - msleep(1); \ -} while (0) \ - - -static int -kni_sock_release(struct socket *sock) -{ - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - struct kni_dev *kni; - - if (q == NULL) - return 0; - - kni = q->kni; - if (kni != NULL) { - kni->vq_status = BE_STOP; - KNI_VHOST_WAIT_WQ_SAFE(); - kni->vhost_queue = NULL; - q->kni = NULL; - } - - if (q->sockfd != -1) - q->sockfd = -1; - - sk_set_socket(&q->sk, NULL); - sock->sk = NULL; - - sock_put(&q->sk); - - pr_debug("dummy sock release done\n"); - - return 0; -} - -int -kni_sock_getname(struct socket *sock, struct sockaddr *addr, - int *sockaddr_len, int peer) -{ - pr_debug("dummy sock getname\n"); - ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET; - return 0; -} - -static const struct proto_ops kni_socket_ops = { - .getname = kni_sock_getname, - .sendmsg = kni_sock_sndmsg, - .recvmsg = kni_sock_rcvmsg, - .release = kni_sock_release, - .poll = kni_sock_poll, - .ioctl = kni_sock_ioctl, - .compat_ioctl = kni_sock_compat_ioctl, -}; - -static void -kni_sk_write_space(struct sock *sk) -{ - wait_queue_head_t *wqueue; - - if (!sock_writeable(sk) || -#ifdef SOCKWQ_ASYNC_NOSPACE - !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) -#else - !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) -#endif - return; - wqueue = sk_sleep(sk); - if (wqueue && waitqueue_active(wqueue)) - wake_up_interruptible_poll( - wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); -} - -static void -kni_sk_destruct(struct sock *sk) -{ - struct kni_vhost_queue *q = - container_of(sk, struct kni_vhost_queue, sk); - - if (!q) - return; - - /* make sure there's no packet in buffer */ - while (skb_dequeue(&sk->sk_receive_queue) != NULL) - ; - - mb(); - - if (q->fifo != NULL) { - kfree(q->fifo); - q->fifo = NULL; - } - - if (q->cache != NULL) { - kfree(q->cache); - q->cache = NULL; - } -} - -static int -kni_vhost_backend_init(struct kni_dev *kni) -{ - struct kni_vhost_queue *q; - struct net *net = current->nsproxy->net_ns; - int err, i, sockfd; - struct rte_kni_fifo *fifo; - struct sk_buff *elem; - - if (kni->vhost_queue != NULL) - return -1; - -#ifdef HAVE_SK_ALLOC_KERN_PARAM - q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &kni_raw_proto, 0); -#else - q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &kni_raw_proto); -#endif - if (!q) - return -ENOMEM; - - err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock); - if (err) - goto free_sk; - - sockfd = kni_sock_map_fd(q->sock); - if (sockfd < 0) { - err = sockfd; - goto free_sock; - } - - /* cache init */ - q->cache = kzalloc( - RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), - GFP_KERNEL); - if (!q->cache) - goto free_fd; - - fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *) - + sizeof(struct rte_kni_fifo), GFP_KERNEL); - if (!fifo) - goto free_cache; - - kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE); - - for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { - elem = &q->cache[i]; - kni_fifo_put(fifo, (void **)&elem, 1); - } - q->fifo = fifo; - - /* store sockfd in vhost_queue */ - q->sockfd = sockfd; - - /* init socket */ - q->sock->type = SOCK_RAW; - q->sock->state = SS_CONNECTED; - q->sock->ops = &kni_socket_ops; - sock_init_data(q->sock, &q->sk); - - /* init sock data */ - q->sk.sk_write_space = kni_sk_write_space; - q->sk.sk_destruct = kni_sk_destruct; - q->flags = IFF_NO_PI | IFF_TAP; - q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - q->flags |= IFF_VNET_HDR; -#endif - - /* bind kni_dev with vhost_queue */ - q->kni = kni; - kni->vhost_queue = q; - - wmb(); - - kni->vq_status = BE_START; - -#ifdef HAVE_SOCKET_WQ - pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx", - q->sockfd, (uint64_t)q->sock->wq, - (uint64_t)q->sk.sk_wq); -#else - pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx", - q->sockfd, (uint64_t)&q->sock->wait, - (uint64_t)q->sk.sk_sleep); -#endif - - return 0; - -free_cache: - kfree(q->cache); - q->cache = NULL; - -free_fd: - put_unused_fd(sockfd); - -free_sock: - q->kni = NULL; - kni->vhost_queue = NULL; - kni->vq_status |= BE_FINISH; - sock_release(q->sock); - q->sock->ops = NULL; - q->sock = NULL; - -free_sk: - sk_free((struct sock *)q); - - return err; -} - -/* kni vhost sock sysfs */ -static ssize_t -show_sock_fd(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct net_device *net_dev = container_of(dev, struct net_device, dev); - struct kni_dev *kni = netdev_priv(net_dev); - int sockfd = -1; - - if (kni->vhost_queue != NULL) - sockfd = kni->vhost_queue->sockfd; - return snprintf(buf, 10, "%d\n", sockfd); -} - -static ssize_t -show_sock_en(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct net_device *net_dev = container_of(dev, struct net_device, dev); - struct kni_dev *kni = netdev_priv(net_dev); - - return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); -} - -static ssize_t -set_sock_en(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct net_device *net_dev = container_of(dev, struct net_device, dev); - struct kni_dev *kni = netdev_priv(net_dev); - unsigned long en; - int err = 0; - - if (kstrtoul(buf, 0, &en) != 0) - return -EINVAL; - - if (en) - err = kni_vhost_backend_init(kni); - - return err ? err : count; -} - -static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL); -static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); -static struct attribute *dev_attrs[] = { - &dev_attr_sock_fd.attr, - &dev_attr_sock_en.attr, - NULL, -}; - -static const struct attribute_group dev_attr_grp = { - .attrs = dev_attrs, -}; - -int -kni_vhost_backend_release(struct kni_dev *kni) -{ - struct kni_vhost_queue *q = kni->vhost_queue; - - if (q == NULL) - return 0; - - /* dettach from kni */ - q->kni = NULL; - - pr_debug("release backend done\n"); - - return 0; -} - -int -kni_vhost_init(struct kni_dev *kni) -{ - struct net_device *dev = kni->net_dev; - - if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp)) - sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); - - kni->vq_status = BE_STOP; - - pr_debug("kni_vhost_init done\n"); - - return 0; -} diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile index 9d22fb97..be51a82a 100644 --- a/lib/librte_eal/linuxapp/xen_dom0/Makefile +++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile @@ -44,9 +44,6 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h MODULE_CFLAGS += -Wall -Werror -# this lib needs main eal -DEPDIRS-y += lib/librte_eal/linuxapp/eal - # # all source are stored in SRCS-y # diff --git a/lib/librte_efd/Makefile b/lib/librte_efd/Makefile new file mode 100644 index 00000000..b9277bc5 --- /dev/null +++ b/lib/librte_efd/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2016-2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_efd.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_efd_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_EFD) := rte_efd.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_EFD)-include := rte_efd.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c new file mode 100644 index 00000000..f601d62e --- /dev/null +++ b/lib/librte_efd/rte_efd.c @@ -0,0 +1,1343 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <errno.h> +#include <stdarg.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_eal_memconfig.h> +#include <rte_errno.h> +#include <rte_malloc.h> +#include <rte_memzone.h> +#include <rte_prefetch.h> +#include <rte_branch_prediction.h> +#include <rte_memcpy.h> +#include <rte_ring.h> +#include <rte_jhash.h> +#include <rte_hash_crc.h> + +#include "rte_efd.h" +#if defined(RTE_ARCH_X86) +#include "rte_efd_x86.h" +#endif + +#define EFD_KEY(key_idx, table) (table->keys + ((key_idx) * table->key_len)) +/** Hash function used to determine chunk_id and bin_id for a group */ +#define EFD_HASH(key, table) \ + (uint32_t)(rte_jhash(key, table->key_len, 0xbc9f1d34)) +/** Hash function used as constant component of perfect hash search */ +#define EFD_HASHFUNCA(key, table) \ + (uint32_t)(rte_hash_crc(key, table->key_len, 0xbc9f1d35)) +/** Hash function used as multiplicative component of perfect hash search */ +#define EFD_HASHFUNCB(key, table) \ + (uint32_t)(rte_hash_crc(key, table->key_len, 0xbc9f1d36)) + +/************************************************************************* + * Fixed constants + *************************************************************************/ + +/* These parameters are fixed by the efd_bin_to_group balancing table */ +#define EFD_CHUNK_NUM_GROUPS (64) +#define EFD_CHUNK_NUM_BINS (256) +#define EFD_CHUNK_NUM_BIN_TO_GROUP_SETS \ + (EFD_CHUNK_NUM_BINS / EFD_CHUNK_NUM_GROUPS) + +/* + * Target number of rules that each chunk is created to handle. + * Used when initially allocating the table + */ +#define EFD_TARGET_CHUNK_NUM_RULES \ + (EFD_CHUNK_NUM_GROUPS * EFD_TARGET_GROUP_NUM_RULES) +/* + * Max number of rules that each chunk is created to handle. + * Used when initially allocating the table + */ +#define EFD_TARGET_CHUNK_MAX_NUM_RULES \ + (EFD_CHUNK_NUM_GROUPS * EFD_MAX_GROUP_NUM_RULES) + +/** This is fixed based on the bin_to_group permutation array */ +#define EFD_MAX_GROUP_NUM_BINS (16) + +/** + * The end of the chunks array needs some extra padding to ensure + * that vectorization over-reads on the last online chunk stay within +allocated memory + */ +#define EFD_NUM_CHUNK_PADDING_BYTES (256) + +/* All different internal lookup functions */ +enum efd_lookup_internal_function { + EFD_LOOKUP_SCALAR = 0, + EFD_LOOKUP_AVX2, + EFD_LOOKUP_NUM +}; + +TAILQ_HEAD(rte_efd_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_efd_tailq = { + .name = "RTE_EFD", +}; +EAL_REGISTER_TAILQ(rte_efd_tailq); + +/** Internal permutation array used to shuffle bins into pseudorandom groups */ +const uint32_t efd_bin_to_group[EFD_CHUNK_NUM_BIN_TO_GROUP_SETS][EFD_CHUNK_NUM_BINS] = { + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63 + }, + { + 34, 33, 48, 59, 0, 21, 36, 18, 9, 49, 54, 38, 51, 23, 31, 5, + 44, 23, 37, 52, 11, 4, 58, 20, 38, 40, 38, 22, 26, 28, 42, 6, + 46, 16, 31, 28, 46, 14, 60, 0, 35, 53, 16, 58, 16, 29, 39, 7, + 1, 54, 15, 11, 48, 3, 62, 9, 58, 5, 30, 43, 17, 7, 36, 34, + 6, 36, 2, 14, 10, 1, 47, 47, 20, 45, 62, 56, 34, 25, 39, 18, + 51, 41, 61, 25, 56, 40, 41, 37, 52, 35, 30, 57, 11, 42, 37, 27, + 54, 19, 26, 13, 48, 31, 46, 15, 12, 10, 16, 20, 43, 17, 12, 55, + 45, 18, 8, 41, 7, 31, 42, 63, 12, 14, 21, 57, 24, 40, 5, 41, + 13, 44, 23, 59, 25, 57, 52, 50, 62, 1, 2, 49, 32, 57, 26, 43, + 56, 60, 55, 5, 49, 6, 3, 50, 46, 39, 27, 33, 17, 4, 53, 13, + 2, 19, 36, 51, 63, 0, 22, 33, 59, 28, 29, 23, 45, 33, 53, 27, + 22, 21, 40, 56, 4, 18, 44, 47, 28, 17, 4, 50, 21, 62, 8, 39, + 0, 8, 15, 24, 29, 24, 9, 11, 48, 61, 35, 55, 43, 1, 54, 42, + 53, 60, 22, 3, 32, 52, 25, 8, 15, 60, 7, 55, 27, 63, 19, 10, + 63, 24, 61, 19, 12, 38, 6, 29, 13, 37, 10, 3, 45, 32, 32, 30, + 49, 61, 44, 14, 20, 58, 35, 30, 2, 26, 34, 51, 9, 59, 47, 50 + }, + { + 32, 35, 32, 34, 55, 5, 6, 23, 49, 11, 6, 23, 52, 37, 29, 54, + 55, 40, 63, 50, 29, 52, 61, 25, 12, 56, 39, 38, 29, 11, 46, 1, + 40, 11, 19, 56, 7, 28, 51, 16, 15, 48, 21, 51, 60, 31, 14, 22, + 41, 47, 59, 56, 53, 28, 58, 26, 43, 27, 41, 33, 24, 52, 44, 38, + 13, 59, 48, 51, 60, 15, 3, 30, 15, 0, 10, 62, 44, 14, 28, 51, + 38, 2, 41, 26, 25, 49, 10, 12, 55, 57, 27, 35, 19, 33, 0, 30, + 5, 36, 47, 53, 5, 53, 20, 43, 34, 37, 52, 41, 21, 63, 59, 9, + 24, 1, 45, 24, 39, 44, 45, 16, 9, 17, 7, 50, 57, 22, 18, 28, + 25, 45, 2, 40, 58, 15, 17, 3, 1, 27, 61, 39, 19, 0, 19, 21, + 57, 62, 54, 60, 54, 40, 48, 33, 36, 37, 4, 42, 1, 43, 58, 8, + 13, 42, 10, 56, 35, 22, 48, 61, 63, 10, 49, 9, 24, 9, 25, 57, + 33, 18, 13, 31, 42, 36, 36, 55, 30, 37, 53, 34, 59, 4, 4, 23, + 8, 16, 58, 14, 30, 11, 12, 63, 49, 62, 2, 39, 47, 22, 2, 60, + 18, 8, 46, 31, 6, 20, 32, 29, 46, 42, 20, 31, 32, 61, 34, 4, + 47, 26, 20, 43, 26, 21, 7, 3, 16, 35, 18, 44, 27, 62, 13, 23, + 6, 50, 12, 8, 45, 17, 3, 46, 50, 7, 14, 5, 17, 54, 38, 0 + }, + { + 29, 56, 5, 7, 54, 48, 23, 37, 35, 44, 52, 40, 33, 49, 60, 0, + 59, 51, 28, 12, 41, 26, 2, 23, 34, 5, 59, 40, 3, 19, 6, 26, + 35, 53, 45, 49, 29, 57, 28, 62, 58, 59, 19, 53, 59, 62, 6, 54, + 13, 15, 48, 50, 45, 21, 41, 12, 34, 40, 24, 56, 19, 21, 35, 18, + 55, 45, 9, 61, 47, 61, 19, 15, 16, 39, 17, 31, 3, 51, 21, 50, + 17, 25, 25, 11, 44, 16, 18, 28, 14, 2, 37, 61, 58, 27, 62, 4, + 14, 17, 1, 9, 46, 28, 37, 0, 53, 43, 57, 7, 57, 46, 21, 41, + 39, 14, 52, 60, 44, 53, 49, 60, 49, 63, 13, 11, 29, 1, 55, 47, + 55, 12, 60, 43, 54, 37, 13, 6, 42, 10, 36, 13, 9, 8, 34, 51, + 31, 32, 12, 7, 57, 2, 26, 14, 3, 30, 63, 3, 32, 1, 5, 11, + 27, 24, 26, 44, 31, 23, 56, 38, 62, 0, 40, 30, 6, 23, 38, 2, + 47, 5, 15, 27, 16, 10, 31, 25, 22, 63, 30, 25, 20, 33, 32, 50, + 29, 43, 55, 10, 50, 45, 56, 20, 4, 7, 27, 46, 11, 16, 22, 52, + 35, 20, 41, 54, 46, 33, 42, 18, 63, 8, 22, 58, 36, 4, 51, 42, + 38, 32, 38, 22, 17, 0, 47, 8, 48, 8, 48, 1, 61, 36, 33, 20, + 24, 39, 39, 18, 30, 36, 9, 43, 42, 24, 10, 58, 4, 15, 34, 52 + }, +}; + +/************************************************************************* + * Offline region structures + *************************************************************************/ + +/** Online group containing number of rules, values, keys and their bins + * for EFD_MAX_GROUP_NUM_RULES rules. + */ +struct efd_offline_group_rules { + uint32_t num_rules; + /**< Sum of the number of rules in all bins assigned to this group. */ + + uint32_t key_idx[EFD_MAX_GROUP_NUM_RULES]; + /**< Array with all keys of the group. */ + efd_value_t value[EFD_MAX_GROUP_NUM_RULES]; + /**< Array with all values of the keys of the group. */ + + uint8_t bin_id[EFD_MAX_GROUP_NUM_RULES]; + /**< Stores the bin for each correspending key to + * avoid having to recompute it + */ +}; + +/** Offline chunk record, containing EFD_TARGET_CHUNK_NUM_RULES rules. + * Those rules are split into EFD_CHUNK_NUM_GROUPS groups per chunk. + */ +struct efd_offline_chunk_rules { + uint16_t num_rules; + /**< Number of rules in the entire chunk; + * used to detect unbalanced groups + */ + + struct efd_offline_group_rules group_rules[EFD_CHUNK_NUM_GROUPS]; + /**< Array of all groups in the chunk. */ +}; + +/************************************************************************* + * Online region structures + *************************************************************************/ + +/** Online group containing values for EFD_MAX_GROUP_NUM_RULES rules. */ +struct efd_online_group_entry { + efd_hashfunc_t hash_idx[RTE_EFD_VALUE_NUM_BITS]; + efd_lookuptbl_t lookup_table[RTE_EFD_VALUE_NUM_BITS]; +} __attribute__((__packed__)); + +/** + * A single chunk record, containing EFD_TARGET_CHUNK_NUM_RULES rules. + * Those rules are split into EFD_CHUNK_NUM_GROUPS groups per chunk. + */ +struct efd_online_chunk { + uint8_t bin_choice_list[(EFD_CHUNK_NUM_BINS * 2 + 7) / 8]; + /**< This is a packed indirection index into the 'groups' array. + * Each byte contains four two-bit values which index into + * the efd_bin_to_group array. + * The efd_bin_to_group array returns the index into the groups array + */ + + struct efd_online_group_entry groups[EFD_CHUNK_NUM_GROUPS]; + /**< Array of all the groups in the chunk. */ +} __attribute__((__packed__)); + +/** + * EFD table structure + */ +struct rte_efd_table { + char name[RTE_EFD_NAMESIZE]; /**< Name of the efd table. */ + + uint32_t key_len; /**< Length of the key stored offline */ + + uint32_t max_num_rules; + /**< Static maximum number of entries the table was constructed to hold. */ + + uint32_t num_rules; + /**< Number of entries currently in the table . */ + + uint32_t num_chunks; + /**< Number of chunks in the table needed to support num_rules. */ + + uint32_t num_chunks_shift; + /**< Bits to shift to get chunk id, instead of dividing by num_chunk. */ + + enum efd_lookup_internal_function lookup_fn; + /**< Indicates which lookup function to use. */ + + struct efd_online_chunk *chunks[RTE_MAX_NUMA_NODES]; + /**< Dynamic array of size num_chunks of chunk records. */ + + struct efd_offline_chunk_rules *offline_chunks; + /**< Dynamic array of size num_chunks of key-value pairs. */ + + struct rte_ring *free_slots; + /**< Ring that stores all indexes of the free slots in the key table */ + + uint8_t *keys; /**< Dynamic array of size max_num_rules of keys */ +}; + +/** + * Computes the chunk ID for a given key hash + * + * @param table + * EFD table to reference + * @param hashed_key + * 32-bit key hash returned by EFD_HASH + * + * @return + * chunk ID containing this key hash + */ +static inline uint32_t +efd_get_chunk_id(const struct rte_efd_table * const table, + const uint32_t hashed_key) +{ + return hashed_key & (table->num_chunks - 1); +} + +/** + * Computes the bin ID for a given key hash + * + * @param table + * EFD table to reference + * @param hashed_key + * 32-bit key hash returned by EFD_HASH + * + * @return bin ID containing this key hash + */ +static inline uint32_t +efd_get_bin_id(const struct rte_efd_table * const table, + const uint32_t hashed_key) +{ + return (hashed_key >> table->num_chunks_shift) & (EFD_CHUNK_NUM_BINS - 1); +} + +/** + * Looks up the current permutation choice for a particular bin in the online table + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to look up existing values (ideally caller's socket id) + * @param chunk_id + * Chunk ID of bin to look up + * @param bin_id + * Bin ID to look up + * + * @return + * Currently active permutation choice in the online table + */ +static inline uint8_t +efd_get_choice(const struct rte_efd_table * const table, + const unsigned int socket_id, const uint32_t chunk_id, + const uint32_t bin_id) +{ + struct efd_online_chunk *chunk = &table->chunks[socket_id][chunk_id]; + + /* + * Grab the chunk (byte) that contains the choices + * for four neighboring bins. + */ + uint8_t choice_chunk = + chunk->bin_choice_list[bin_id / EFD_CHUNK_NUM_BIN_TO_GROUP_SETS]; + + /* + * Compute the offset into the chunk that contains + * the group_id lookup position + */ + int offset = (bin_id & 0x3) * 2; + + /* Extract from the byte just the desired lookup position */ + return (uint8_t) ((choice_chunk >> offset) & 0x3); +} + +/** + * Compute the chunk_id and bin_id for a given key + * + * @param table + * EFD table to reference + * @param key + * Key to hash and find location of + * @param chunk_id + * Computed chunk ID + * @param bin_id + * Computed bin ID + * + */ +static inline void +efd_compute_ids(const struct rte_efd_table * const table, + const void *key, uint32_t * const chunk_id, uint32_t * const bin_id) +{ + /* Compute the position of the entry in the hash table */ + uint32_t h = EFD_HASH(key, table); + + /* Compute the chunk_id where that entry can be found */ + *chunk_id = efd_get_chunk_id(table, h); + + /* + * Compute the bin within that chunk where the entry + * can be found (0 - 255) + */ + *bin_id = efd_get_bin_id(table, h); +} + +/** + * Search for a hash function for a group that satisfies all group results + */ +static inline int +efd_search_hash(struct rte_efd_table * const table, + const struct efd_offline_group_rules * const off_group, + struct efd_online_group_entry * const on_group) +{ + efd_hashfunc_t hash_idx; + efd_hashfunc_t start_hash_idx[RTE_EFD_VALUE_NUM_BITS]; + efd_lookuptbl_t start_lookup_table[RTE_EFD_VALUE_NUM_BITS]; + + uint32_t i, j, rule_id; + uint32_t hash_val_a[EFD_MAX_GROUP_NUM_RULES]; + uint32_t hash_val_b[EFD_MAX_GROUP_NUM_RULES]; + uint32_t hash_val[EFD_MAX_GROUP_NUM_RULES]; + + + rte_prefetch0(off_group->value); + + /* + * Prepopulate the hash_val tables by running the two hash functions + * for each provided rule + */ + for (i = 0; i < off_group->num_rules; i++) { + void *key_stored = EFD_KEY(off_group->key_idx[i], table); + hash_val_b[i] = EFD_HASHFUNCB(key_stored, table); + hash_val_a[i] = EFD_HASHFUNCA(key_stored, table); + } + + for (i = 0; i < RTE_EFD_VALUE_NUM_BITS; i++) { + hash_idx = on_group->hash_idx[i]; + start_hash_idx[i] = hash_idx; + start_lookup_table[i] = on_group->lookup_table[i]; + + do { + efd_lookuptbl_t lookup_table = 0; + efd_lookuptbl_t lookup_table_complement = 0; + + for (rule_id = 0; rule_id < off_group->num_rules; rule_id++) + hash_val[rule_id] = hash_val_a[rule_id] + (hash_idx * + hash_val_b[rule_id]); + + /* + * The goal here is to find a hash function for this + * particular bit entry that meets the following criteria: + * The most significant bits of the hash result define a + * shift into the lookup table where the bit will be stored + */ + + /* Iterate over each provided rule */ + for (rule_id = 0; rule_id < off_group->num_rules; + rule_id++) { + /* + * Use the few most significant bits (number based on + * EFD_LOOKUPTBL_SIZE) to see what position the + * expected bit should be set in the lookup_table + */ + uint32_t bucket_idx = hash_val[rule_id] >> + EFD_LOOKUPTBL_SHIFT; + + /* + * Get the current bit of interest. + * This only find an appropriate hash function + * for one bit at a time of the rule + */ + efd_lookuptbl_t expected = + (off_group->value[rule_id] >> i) & 0x1; + + /* + * Add the expected bit (if set) to a map + * (lookup_table). Also set its complement + * in lookup_table_complement + */ + lookup_table |= expected << bucket_idx; + lookup_table_complement |= (1 - expected) + << bucket_idx; + + /* + * If ever the hash function of two different + * elements result in different values at the + * same location in the lookup_table, + * the current hash_idx is not valid. + */ + if (lookup_table & lookup_table_complement) + break; + } + + /* + * Check if the previous loop completed without + * breaking early + */ + if (rule_id == off_group->num_rules) { + /* + * Current hash function worked, store it + * for the current group + */ + on_group->hash_idx[i] = hash_idx; + on_group->lookup_table[i] = lookup_table; + + /* + * Make sure that the hash function has changed + * from the starting value + */ + hash_idx = start_hash_idx[i] + 1; + break; + } + hash_idx++; + + } while (hash_idx != start_hash_idx[i]); + + /* Failed to find perfect hash for this group */ + if (hash_idx == start_hash_idx[i]) { + /* + * Restore previous hash_idx and lookup_table + * for all value bits + */ + for (j = 0; j < i; j++) { + on_group->hash_idx[j] = start_hash_idx[j]; + on_group->lookup_table[j] = start_lookup_table[j]; + } + return 1; + } + } + + return 0; +} + +struct rte_efd_table * +rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len, + uint8_t online_cpu_socket_bitmask, uint8_t offline_cpu_socket) +{ + struct rte_efd_table *table = NULL; + uint8_t *key_array = NULL; + uint32_t num_chunks, num_chunks_shift; + uint8_t socket_id; + struct rte_efd_list *efd_list = NULL; + struct rte_tailq_entry *te; + uint64_t offline_table_size; + char ring_name[RTE_RING_NAMESIZE]; + struct rte_ring *r = NULL; + unsigned int i; + + efd_list = RTE_TAILQ_CAST(rte_efd_tailq.head, rte_efd_list); + + if (online_cpu_socket_bitmask == 0) { + RTE_LOG(ERR, EFD, "At least one CPU socket must be enabled " + "in the bitmask\n"); + return NULL; + } + + if (max_num_rules == 0) { + RTE_LOG(ERR, EFD, "Max num rules must be higher than 0\n"); + return NULL; + } + + /* + * Compute the minimum number of chunks (smallest power of 2) + * that can hold all of the rules + */ + if (max_num_rules % EFD_TARGET_CHUNK_NUM_RULES == 0) + num_chunks = rte_align32pow2(max_num_rules / + EFD_TARGET_CHUNK_NUM_RULES); + else + num_chunks = rte_align32pow2((max_num_rules / + EFD_TARGET_CHUNK_NUM_RULES) + 1); + + num_chunks_shift = rte_bsf32(num_chunks); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* + * Guarantee there's no existing: this is normally already checked + * by ring creation above + */ + TAILQ_FOREACH(te, efd_list, next) + { + table = (struct rte_efd_table *) te->data; + if (strncmp(name, table->name, RTE_EFD_NAMESIZE) == 0) + break; + } + + table = NULL; + if (te != NULL) { + rte_errno = EEXIST; + te = NULL; + goto error_unlock_exit; + } + + te = rte_zmalloc("EFD_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, EFD, "tailq entry allocation failed\n"); + goto error_unlock_exit; + } + + /* Create a new EFD table management structure */ + table = (struct rte_efd_table *) rte_zmalloc_socket(NULL, + sizeof(struct rte_efd_table), + RTE_CACHE_LINE_SIZE, + offline_cpu_socket); + if (table == NULL) { + RTE_LOG(ERR, EFD, "Allocating EFD table management structure" + " on socket %u failed\n", + offline_cpu_socket); + goto error_unlock_exit; + } + + + RTE_LOG(DEBUG, EFD, "Allocated EFD table management structure " + "on socket %u\n", offline_cpu_socket); + + table->max_num_rules = num_chunks * EFD_TARGET_CHUNK_MAX_NUM_RULES; + table->num_rules = 0; + table->num_chunks = num_chunks; + table->num_chunks_shift = num_chunks_shift; + table->key_len = key_len; + + /* key_array */ + key_array = (uint8_t *) rte_zmalloc_socket(NULL, + table->max_num_rules * table->key_len, + RTE_CACHE_LINE_SIZE, + offline_cpu_socket); + if (key_array == NULL) { + RTE_LOG(ERR, EFD, "Allocating key array" + " on socket %u failed\n", + offline_cpu_socket); + goto error_unlock_exit; + } + table->keys = key_array; + snprintf(table->name, sizeof(table->name), "%s", name); + + RTE_LOG(DEBUG, EFD, "Creating an EFD table with %u chunks," + " which potentially supports %u entries\n", + num_chunks, table->max_num_rules); + + /* Make sure all the allocatable table pointers are NULL initially */ + for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; socket_id++) + table->chunks[socket_id] = NULL; + table->offline_chunks = NULL; + + /* + * Allocate one online table per socket specified + * in the user-supplied bitmask + */ + uint64_t online_table_size = num_chunks * sizeof(struct efd_online_chunk) + + EFD_NUM_CHUNK_PADDING_BYTES; + + for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; socket_id++) { + if ((online_cpu_socket_bitmask >> socket_id) & 0x01) { + /* + * Allocate all of the EFD table chunks (the online portion) + * as a continuous block + */ + table->chunks[socket_id] = + (struct efd_online_chunk *) rte_zmalloc_socket( + NULL, + online_table_size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (table->chunks[socket_id] == NULL) { + RTE_LOG(ERR, EFD, + "Allocating EFD online table on " + "socket %u failed\n", + socket_id); + goto error_unlock_exit; + } + RTE_LOG(DEBUG, EFD, + "Allocated EFD online table of size " + "%"PRIu64" bytes (%.2f MB) on socket %u\n", + online_table_size, + (float) online_table_size / + (1024.0F * 1024.0F), + socket_id); + } + } + +#if defined(RTE_ARCH_X86) + /* + * For less than 4 bits, scalar function performs better + * than vectorised version + */ + if (RTE_EFD_VALUE_NUM_BITS > 3 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + table->lookup_fn = EFD_LOOKUP_AVX2; + else +#endif + table->lookup_fn = EFD_LOOKUP_SCALAR; + + /* + * Allocate the EFD table offline portion (with the actual rules + * mapping keys to values) as a continuous block. + * This could be several gigabytes of memory. + */ + offline_table_size = num_chunks * sizeof(struct efd_offline_chunk_rules); + table->offline_chunks = + (struct efd_offline_chunk_rules *) rte_zmalloc_socket(NULL, + offline_table_size, + RTE_CACHE_LINE_SIZE, + offline_cpu_socket); + if (table->offline_chunks == NULL) { + RTE_LOG(ERR, EFD, "Allocating EFD offline table on socket %u " + "failed\n", offline_cpu_socket); + goto error_unlock_exit; + } + + RTE_LOG(DEBUG, EFD, + "Allocated EFD offline table of size %"PRIu64" bytes " + " (%.2f MB) on socket %u\n", offline_table_size, + (float) offline_table_size / (1024.0F * 1024.0F), + offline_cpu_socket); + + te->data = (void *) table; + TAILQ_INSERT_TAIL(efd_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + snprintf(ring_name, sizeof(ring_name), "HT_%s", table->name); + /* Create ring (Dummy slot index is not enqueued) */ + r = rte_ring_create(ring_name, rte_align32pow2(table->max_num_rules), + offline_cpu_socket, 0); + if (r == NULL) { + RTE_LOG(ERR, EFD, "memory allocation failed\n"); + goto error_unlock_exit; + } + + /* Populate free slots ring. Entry zero is reserved for key misses. */ + for (i = 0; i < table->max_num_rules; i++) + rte_ring_sp_enqueue(r, (void *) ((uintptr_t) i)); + + table->free_slots = r; + return table; + +error_unlock_exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + rte_efd_free(table); + + return NULL; +} + +struct rte_efd_table * +rte_efd_find_existing(const char *name) +{ + struct rte_efd_table *table = NULL; + struct rte_tailq_entry *te; + struct rte_efd_list *efd_list; + + efd_list = RTE_TAILQ_CAST(rte_efd_tailq.head, rte_efd_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, efd_list, next) + { + table = (struct rte_efd_table *) te->data; + if (strncmp(name, table->name, RTE_EFD_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return table; +} + +void +rte_efd_free(struct rte_efd_table *table) +{ + uint8_t socket_id; + + if (table == NULL) + return; + + for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; socket_id++) + rte_free(table->chunks[socket_id]); + + rte_ring_free(table->free_slots); + rte_free(table->offline_chunks); + rte_free(table->keys); + rte_free(table); +} + +/** + * Applies a previously computed table entry to the specified table for all + * socket-local copies of the online table. + * Intended to apply an update for only a single change + * to a key/value pair at a time + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to lookup existing values (ideally caller's socket id) + * @param chunk_id + * Chunk index to update + * @param group_id + * Group index to update + * @param bin_id + * Bin within the group that this update affects + * @param new_bin_choice + * Newly chosen permutation which this bin should use - only lower 2 bits + * @param new_group_entry + * Previously computed updated chunk/group entry + */ +static inline void +efd_apply_update(struct rte_efd_table * const table, const unsigned int socket_id, + const uint32_t chunk_id, const uint32_t group_id, + const uint32_t bin_id, const uint8_t new_bin_choice, + const struct efd_online_group_entry * const new_group_entry) +{ + int i; + struct efd_online_chunk *chunk = &table->chunks[socket_id][chunk_id]; + uint8_t bin_index = bin_id / EFD_CHUNK_NUM_BIN_TO_GROUP_SETS; + + /* + * Grab the current byte that contains the choices + * for four neighboring bins + */ + uint8_t choice_chunk = + chunk->bin_choice_list[bin_index]; + + + /* Compute the offset into the chunk that needs to be updated */ + int offset = (bin_id & 0x3) * 2; + + /* Zero the two bits of interest and set them to new_bin_choice */ + choice_chunk = (choice_chunk & (~(0x03 << offset))) + | ((new_bin_choice & 0x03) << offset); + + /* Update the online table with the new data across all sockets */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if (table->chunks[i] != NULL) { + memcpy(&(table->chunks[i][chunk_id].groups[group_id]), + new_group_entry, + sizeof(struct efd_online_group_entry)); + table->chunks[i][chunk_id].bin_choice_list[bin_index] = + choice_chunk; + } + } +} + +/* + * Move the bin from prev group to the new group + */ +static inline void +move_groups(uint32_t bin_id, uint8_t bin_size, + struct efd_offline_group_rules *new_group, + struct efd_offline_group_rules * const current_group) +{ + + uint8_t empty_idx = 0; + unsigned int i; + + if (new_group == current_group) + return; + + for (i = 0; i < current_group->num_rules; i++) { + /* + * Move keys that belong to the same bin + * to the new group + */ + if (current_group->bin_id[i] == bin_id) { + new_group->key_idx[new_group->num_rules] = + current_group->key_idx[i]; + new_group->value[new_group->num_rules] = + current_group->value[i]; + new_group->bin_id[new_group->num_rules] = + current_group->bin_id[i]; + new_group->num_rules++; + } else { + if (i != empty_idx) { + /* + * Need to move this key towards + * the top of the array + */ + current_group->key_idx[empty_idx] = + current_group->key_idx[i]; + current_group->value[empty_idx] = + current_group->value[i]; + current_group->bin_id[empty_idx] = + current_group->bin_id[i]; + } + empty_idx++; + } + + } + current_group->num_rules -= bin_size; +} + +/* + * Revert group/s to their previous state before + * trying to insert/add a new key + */ +static inline void +revert_groups(struct efd_offline_group_rules *previous_group, + struct efd_offline_group_rules *current_group, uint8_t bin_size) +{ + unsigned int i; + + if (current_group == previous_group) + return; + + /* Move keys back to previous group */ + for (i = current_group->num_rules - bin_size; + i < current_group->num_rules; i++) { + previous_group->key_idx[previous_group->num_rules] = + current_group->key_idx[i]; + previous_group->value[previous_group->num_rules] = + current_group->value[i]; + previous_group->bin_id[previous_group->num_rules] = + current_group->bin_id[i]; + previous_group->num_rules++; + } + + /* + * Decrease number of rules after the move + * in the new group + */ + current_group->num_rules -= bin_size; +} + +/** + * Computes an updated table entry where the supplied key points to a new host. + * If no entry exists, one is inserted. + * + * This function does NOT modify the online table(s) + * This function DOES modify the offline table + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to lookup existing values (ideally caller's socket id) + * @param key + * Key to insert + * @param value + * Value to associate with key + * @param chunk_id + * Chunk ID of the chunk that was modified + * @param group_id + * Group ID of the group that was modified + * @param bin_id + * Bin ID that was modified + * @param new_bin_choice + * Newly chosen permutation which this bin will use + * @param entry + * Newly computed online entry to apply later with efd_apply_update + * + * @return + * RTE_EFD_UPDATE_WARN_GROUP_FULL + * Operation is insert, and the last available space in the + * key's group was just used. Future inserts may fail as groups fill up. + * This operation was still successful, and entry contains a valid update + * RTE_EFD_UPDATE_FAILED + * Either the EFD failed to find a suitable perfect hash or the group was full + * This is a fatal error, and the table is now in an indeterminite state + * RTE_EFD_UPDATE_NO_CHANGE + * Operation resulted in no change to the table (same value already exists) + * 0 + * Insert or update was successful, and the new efd_online_group_entry + * is stored in *entry + * + * @warning + * Note that entry will be UNCHANGED if the update has no effect, and thus any + * subsequent use of the entry content will likely be invalid + */ +static inline int +efd_compute_update(struct rte_efd_table * const table, + const unsigned int socket_id, const void *key, + const efd_value_t value, uint32_t * const chunk_id, + uint32_t * const group_id, uint32_t * const bin_id, + uint8_t * const new_bin_choice, + struct efd_online_group_entry * const entry) +{ + unsigned int i; + int ret; + uint32_t new_idx; + void *new_k, *slot_id = NULL; + int status = EXIT_SUCCESS; + unsigned int found = 0; + + efd_compute_ids(table, key, chunk_id, bin_id); + + struct efd_offline_chunk_rules * const chunk = + &table->offline_chunks[*chunk_id]; + struct efd_offline_group_rules *new_group; + + uint8_t current_choice = efd_get_choice(table, socket_id, + *chunk_id, *bin_id); + uint32_t current_group_id = efd_bin_to_group[current_choice][*bin_id]; + struct efd_offline_group_rules * const current_group = + &chunk->group_rules[current_group_id]; + uint8_t bin_size = 0; + uint8_t key_changed_index = 0; + efd_value_t key_changed_previous_value = 0; + uint32_t key_idx_previous = 0; + + /* Scan the current group and see if the key is already present */ + for (i = 0; i < current_group->num_rules; i++) { + if (current_group->bin_id[i] == *bin_id) + bin_size++; + else + continue; + + void *key_stored = EFD_KEY(current_group->key_idx[i], table); + if (found == 0 && unlikely(memcmp(key_stored, key, + table->key_len) == 0)) { + /* Key is already present */ + + /* + * If previous value is same as new value, + * no additional work is required + */ + if (current_group->value[i] == value) + return RTE_EFD_UPDATE_NO_CHANGE; + + key_idx_previous = current_group->key_idx[i]; + key_changed_previous_value = current_group->value[i]; + key_changed_index = i; + current_group->value[i] = value; + found = 1; + } + } + + if (found == 0) { + /* Key does not exist. Insert the rule into the bin/group */ + if (unlikely(current_group->num_rules >= EFD_MAX_GROUP_NUM_RULES)) { + RTE_LOG(ERR, EFD, + "Fatal: No room remaining for insert into " + "chunk %u group %u bin %u\n", + *chunk_id, + current_group_id, *bin_id); + return RTE_EFD_UPDATE_FAILED; + } + + if (unlikely(current_group->num_rules == + (EFD_MAX_GROUP_NUM_RULES - 1))) { + RTE_LOG(INFO, EFD, "Warn: Insert into last " + "available slot in chunk %u " + "group %u bin %u\n", *chunk_id, + current_group_id, *bin_id); + status = RTE_EFD_UPDATE_WARN_GROUP_FULL; + } + + if (rte_ring_sc_dequeue(table->free_slots, &slot_id) != 0) + return RTE_EFD_UPDATE_FAILED; + + new_k = RTE_PTR_ADD(table->keys, (uintptr_t) slot_id * + table->key_len); + rte_prefetch0(new_k); + new_idx = (uint32_t) ((uintptr_t) slot_id); + + rte_memcpy(EFD_KEY(new_idx, table), key, table->key_len); + current_group->key_idx[current_group->num_rules] = new_idx; + current_group->value[current_group->num_rules] = value; + current_group->bin_id[current_group->num_rules] = *bin_id; + current_group->num_rules++; + table->num_rules++; + bin_size++; + } else { + uint32_t last = current_group->num_rules - 1; + /* Swap the key with the last key inserted*/ + current_group->key_idx[key_changed_index] = + current_group->key_idx[last]; + current_group->value[key_changed_index] = + current_group->value[last]; + current_group->bin_id[key_changed_index] = + current_group->bin_id[last]; + + /* + * Key to be updated will always be available + * at the end of the group + */ + current_group->key_idx[last] = key_idx_previous; + current_group->value[last] = value; + current_group->bin_id[last] = *bin_id; + } + + *new_bin_choice = current_choice; + *group_id = current_group_id; + new_group = current_group; + + /* Group need to be rebalanced when it starts to get loaded */ + if (current_group->num_rules > EFD_MIN_BALANCED_NUM_RULES) { + + /* + * Subtract the number of entries in the bin from + * the original group + */ + current_group->num_rules -= bin_size; + + /* + * Figure out which of the available groups that this bin + * can map to is the smallest (using the current group + * as baseline) + */ + uint8_t smallest_choice = current_choice; + uint8_t smallest_size = current_group->num_rules; + uint32_t smallest_group_id = current_group_id; + unsigned char choice; + + for (choice = 0; choice < EFD_CHUNK_NUM_BIN_TO_GROUP_SETS; + choice++) { + uint32_t test_group_id = + efd_bin_to_group[choice][*bin_id]; + uint32_t num_rules = + chunk->group_rules[test_group_id].num_rules; + if (num_rules < smallest_size) { + smallest_choice = choice; + smallest_size = num_rules; + smallest_group_id = test_group_id; + } + } + + *new_bin_choice = smallest_choice; + *group_id = smallest_group_id; + new_group = &chunk->group_rules[smallest_group_id]; + current_group->num_rules += bin_size; + + } + + uint8_t choice = 0; + for (;;) { + if (current_group != new_group && + new_group->num_rules + bin_size > + EFD_MAX_GROUP_NUM_RULES) { + RTE_LOG(DEBUG, EFD, + "Unable to move_groups to dest group " + "containing %u entries." + "bin_size:%u choice:%02x\n", + new_group->num_rules, bin_size, + choice - 1); + goto next_choice; + } + move_groups(*bin_id, bin_size, new_group, current_group); + /* + * Recompute the hash function for the modified group, + * and return it to the caller + */ + ret = efd_search_hash(table, new_group, entry); + + if (!ret) + return status; + + RTE_LOG(DEBUG, EFD, + "Failed to find perfect hash for group " + "containing %u entries. bin_size:%u choice:%02x\n", + new_group->num_rules, bin_size, choice - 1); + /* Restore groups modified to their previous state */ + revert_groups(current_group, new_group, bin_size); + +next_choice: + if (choice == EFD_CHUNK_NUM_BIN_TO_GROUP_SETS) + break; + *new_bin_choice = choice; + *group_id = efd_bin_to_group[choice][*bin_id]; + new_group = &chunk->group_rules[*group_id]; + choice++; + } + + if (!found) { + current_group->num_rules--; + table->num_rules--; + } else + current_group->value[current_group->num_rules - 1] = + key_changed_previous_value; + return RTE_EFD_UPDATE_FAILED; +} + +int +rte_efd_update(struct rte_efd_table * const table, const unsigned int socket_id, + const void *key, const efd_value_t value) +{ + uint32_t chunk_id = 0, group_id = 0, bin_id = 0; + uint8_t new_bin_choice = 0; + struct efd_online_group_entry entry; + + int status = efd_compute_update(table, socket_id, key, value, + &chunk_id, &group_id, &bin_id, + &new_bin_choice, &entry); + + if (status == RTE_EFD_UPDATE_NO_CHANGE) + return EXIT_SUCCESS; + + if (status == RTE_EFD_UPDATE_FAILED) + return status; + + efd_apply_update(table, socket_id, chunk_id, group_id, bin_id, + new_bin_choice, &entry); + return status; +} + +int +rte_efd_delete(struct rte_efd_table * const table, const unsigned int socket_id, + const void *key, efd_value_t * const prev_value) +{ + unsigned int i; + uint32_t chunk_id, bin_id; + uint8_t not_found = 1; + + efd_compute_ids(table, key, &chunk_id, &bin_id); + + struct efd_offline_chunk_rules * const chunk = + &table->offline_chunks[chunk_id]; + + uint8_t current_choice = efd_get_choice(table, socket_id, + chunk_id, bin_id); + uint32_t current_group_id = efd_bin_to_group[current_choice][bin_id]; + struct efd_offline_group_rules * const current_group = + &chunk->group_rules[current_group_id]; + + /* + * Search the current group for the specified key. + * If it exists, remove it and re-pack the other values + */ + for (i = 0; i < current_group->num_rules; i++) { + if (not_found) { + /* Found key that needs to be removed */ + if (memcmp(EFD_KEY(current_group->key_idx[i], table), + key, table->key_len) == 0) { + /* Store previous value if requested by caller */ + if (prev_value != NULL) + *prev_value = current_group->value[i]; + + not_found = 0; + rte_ring_sp_enqueue(table->free_slots, + (void *)((uintptr_t)current_group->key_idx[i])); + } + } else { + /* + * If the desired key has been found, + * need to shift other values up one + */ + + /* Need to shift this entry back up one index */ + current_group->key_idx[i - 1] = current_group->key_idx[i]; + current_group->value[i - 1] = current_group->value[i]; + current_group->bin_id[i - 1] = current_group->bin_id[i]; + } + } + + if (not_found == 0) { + table->num_rules--; + current_group->num_rules--; + } + + return not_found; +} + +static inline efd_value_t +efd_lookup_internal_scalar(const efd_hashfunc_t *group_hash_idx, + const efd_lookuptbl_t *group_lookup_table, + const uint32_t hash_val_a, const uint32_t hash_val_b) +{ + efd_value_t value = 0; + uint32_t i; + + for (i = 0; i < RTE_EFD_VALUE_NUM_BITS; i++) { + value <<= 1; + uint32_t h = hash_val_a + (hash_val_b * + group_hash_idx[RTE_EFD_VALUE_NUM_BITS - i - 1]); + uint16_t bucket_idx = h >> EFD_LOOKUPTBL_SHIFT; + value |= (group_lookup_table[ + RTE_EFD_VALUE_NUM_BITS - i - 1] >> + bucket_idx) & 0x1; + } + + return value; +} + + +static inline efd_value_t +efd_lookup_internal(const struct efd_online_group_entry * const group, + const uint32_t hash_val_a, const uint32_t hash_val_b, + enum efd_lookup_internal_function lookup_fn) +{ + efd_value_t value = 0; + + switch (lookup_fn) { + +#if defined(RTE_ARCH_X86) + case EFD_LOOKUP_AVX2: + return efd_lookup_internal_avx2(group->hash_idx, + group->lookup_table, + hash_val_a, + hash_val_b); +#endif + case EFD_LOOKUP_SCALAR: + /* Fall-through */ + default: + return efd_lookup_internal_scalar(group->hash_idx, + group->lookup_table, + hash_val_a, + hash_val_b); + } + + return value; +} + +efd_value_t +rte_efd_lookup(const struct rte_efd_table * const table, + const unsigned int socket_id, const void *key) +{ + uint32_t chunk_id, group_id, bin_id; + uint8_t bin_choice; + const struct efd_online_group_entry *group; + const struct efd_online_chunk * const chunks = table->chunks[socket_id]; + + /* Determine the chunk and group location for the given key */ + efd_compute_ids(table, key, &chunk_id, &bin_id); + bin_choice = efd_get_choice(table, socket_id, chunk_id, bin_id); + group_id = efd_bin_to_group[bin_choice][bin_id]; + group = &chunks[chunk_id].groups[group_id]; + + return efd_lookup_internal(group, + EFD_HASHFUNCA(key, table), + EFD_HASHFUNCB(key, table), + table->lookup_fn); +} + +void rte_efd_lookup_bulk(const struct rte_efd_table * const table, + const unsigned int socket_id, const int num_keys, + const void **key_list, efd_value_t * const value_list) +{ + int i; + uint32_t chunk_id_list[RTE_EFD_BURST_MAX]; + uint32_t bin_id_list[RTE_EFD_BURST_MAX]; + uint8_t bin_choice_list[RTE_EFD_BURST_MAX]; + uint32_t group_id_list[RTE_EFD_BURST_MAX]; + struct efd_online_group_entry *group; + + struct efd_online_chunk *chunks = table->chunks[socket_id]; + + for (i = 0; i < num_keys; i++) { + efd_compute_ids(table, key_list[i], &chunk_id_list[i], + &bin_id_list[i]); + rte_prefetch0(&chunks[chunk_id_list[i]].bin_choice_list); + } + + for (i = 0; i < num_keys; i++) { + bin_choice_list[i] = efd_get_choice(table, socket_id, + chunk_id_list[i], bin_id_list[i]); + group_id_list[i] = + efd_bin_to_group[bin_choice_list[i]][bin_id_list[i]]; + group = &chunks[chunk_id_list[i]].groups[group_id_list[i]]; + rte_prefetch0(group); + } + + for (i = 0; i < num_keys; i++) { + group = &chunks[chunk_id_list[i]].groups[group_id_list[i]]; + value_list[i] = efd_lookup_internal(group, + EFD_HASHFUNCA(key_list[i], table), + EFD_HASHFUNCB(key_list[i], table), + table->lookup_fn); + } +} diff --git a/lib/librte_efd/rte_efd.h b/lib/librte_efd/rte_efd.h new file mode 100644 index 00000000..15968635 --- /dev/null +++ b/lib/librte_efd/rte_efd.h @@ -0,0 +1,308 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EFD_H_ +#define _RTE_EFD_H_ + +/** + * @file + * + * RTE EFD Table + */ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/************************************************************************* + * User selectable constants + *************************************************************************/ + +/* + * If possible, best lookup performance will be achieved by ensuring that + * the entire table fits in the L3 cache. + * + * Some formulas for calculating various sizes are listed below: + * + * # of chunks = + * 2 ^ (ceiling(log2((requested # of rules) / + * (EFD_CHUNK_NUM_GROUPS * EFD_TARGET_GROUP_NUM_RULES)))) + * + * Target # of rules = (# of chunks) * EFD_CHUNK_NUM_GROUPS * + * EFD_TARGET_GROUP_NUM_RULES + * + * Group Size (in bytes) = 4 (per value bit) + * + * Table size (in bytes) = RTE_EFD_VALUE_NUM_BITS * (# of chunks) * + * EFD_CHUNK_NUM_GROUPS * (group size) + */ + +/** + * !!! This parameter should be adjusted for your application !!! + * + * This parameter adjusts the number of bits of value that can be + * stored in the table. + * For example, setting the number of bits to 3 will allow storing 8 values + * in the table (between 0 and 7). + * + * This number directly affects the performance of both lookups and insertion. + * In general, performance decreases as more bits are stored in the table. + * + * This number is directly proportional to the size of the online region + * used for lookups. + * + * Note that due to the way the CPU operates on memory, best lookup performance + * will be achieved when RTE_EFD_VALUE_NUM_BITS is a multiple of 8. + * These values align the hash indexes on 16-byte boundaries. + * The greatest performance drop is moving from 8->9 bits, 16->17 bits, etc. + * + * This value must be between 1 and 32 + */ +#ifndef RTE_EFD_VALUE_NUM_BITS +#define RTE_EFD_VALUE_NUM_BITS (8) +#endif + +/* + * EFD_TARGET_GROUP_NUM_RULES: + * Adjusts how many groups/chunks are allocated at table creation time + * to support the requested number of rules. Higher values pack entries + * more tightly in memory, resulting in a smaller memory footprint + * for the online table. + * This comes at the cost of lower insert/update performance. + * + * EFD_MAX_GROUP_NUM_RULES: + * This adjusts the amount of offline memory allocated to store key/value + * pairs for the table. The recommended numbers are upper-bounds for + * this parameter + * - any higher and it becomes very unlikely that a perfect hash function + * can be found for that group size. This value should be at + * least 40% larger than EFD_TARGET_GROUP_NUM_RULES + * + * Recommended values for various lookuptable and hashfunc sizes are: + * + * HASH_FUNC_SIZE = 16, LOOKUPTBL_SIZE = 16: + * EFD_TARGET_GROUP_NUM_RULES = 22 + * EFD_MAX_GROUP_NUM_RULES = 28 + */ +#define EFD_TARGET_GROUP_NUM_RULES (22) +#define EFD_MAX_GROUP_NUM_RULES (28LU) + +#define EFD_MIN_BALANCED_NUM_RULES 5 + +/** + * Maximum number of keys that can be looked up in one call to efd_lookup_bulk + */ +#ifndef RTE_EFD_BURST_MAX +#define RTE_EFD_BURST_MAX (32) +#endif + +/** Maximum number of characters in efd name.*/ +#define RTE_EFD_NAMESIZE 32 + +#if (RTE_EFD_VALUE_NUM_BITS > 0 && RTE_EFD_VALUE_NUM_BITS <= 8) +typedef uint8_t efd_value_t; +#elif (RTE_EFD_VALUE_NUM_BITS > 8 && RTE_EFD_VALUE_NUM_BITS <= 16) +typedef uint16_t efd_value_t; +#elif (RTE_EFD_VALUE_NUM_BITS > 16 && RTE_EFD_VALUE_NUM_BITS <= 32) +typedef uint32_t efd_value_t; +#else +#error("RTE_EFD_VALUE_NUM_BITS must be in the range [1:32]") +#endif + +#define EFD_LOOKUPTBL_SHIFT (32 - 4) +typedef uint16_t efd_lookuptbl_t; +typedef uint16_t efd_hashfunc_t; + +/** + * Creates an EFD table with a single offline region and multiple per-socket + * internally-managed copies of the online table used for lookups + * + * @param name + * EFD table name + * @param max_num_rules + * Minimum number of rules the table should be sized to hold. + * Will be rounded up to the next smallest valid table size + * @param key_len + * Length of the key + * @param online_cpu_socket_bitmask + * Bitmask specifying which sockets should get a copy of the online table. + * LSB = socket 0, etc. + * @param offline_cpu_socket + * Identifies the socket where the offline table will be allocated + * (and most efficiently accessed in the case of updates/insertions) + * + * @return + * EFD table, or NULL if table allocation failed or the bitmask is invalid + */ +struct rte_efd_table * +rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len, + uint8_t online_cpu_socket_bitmask, uint8_t offline_cpu_socket); + +/** + * Releases the resources from an EFD table + * + * @param table + * Table to free + */ +void +rte_efd_free(struct rte_efd_table *table); + +/** + * Find an existing EFD table object and return a pointer to it. + * + * @param name + * Name of the EFD table as passed to rte_efd_create() + * @return + * Pointer to EFD table or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_efd_table* +rte_efd_find_existing(const char *name); + +#define RTE_EFD_UPDATE_WARN_GROUP_FULL (1) +#define RTE_EFD_UPDATE_NO_CHANGE (2) +#define RTE_EFD_UPDATE_FAILED (3) + +/** + * Computes an updated table entry for the supplied key/value pair. + * The update is then immediately applied to the provided table and + * all socket-local copies of the chunks are updated. + * This operation is not multi-thread safe + * and should only be called one from thread. + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to lookup existing value (ideally caller's socket id) + * @param key + * EFD table key to modify + * @param value + * Value to associate with the key + * + * @return + * RTE_EFD_UPDATE_WARN_GROUP_FULL + * Operation is insert, and the last available space in the + * key's group was just used + * Future inserts may fail as groups fill up + * This operation was still successful, and entry contains a valid update + * RTE_EFD_UPDATE_FAILED + * Either the EFD failed to find a suitable perfect hash or the group was full + * This is a fatal error, and the table is now in an indeterminite state + * RTE_EFD_UPDATE_NO_CHANGE + * Operation resulted in no change to the table (same value already exists) + * 0 - success + */ +int +rte_efd_update(struct rte_efd_table *table, unsigned int socket_id, + const void *key, efd_value_t value); + +/** + * Removes any value currently associated with the specified key from the table + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to lookup existing value (ideally caller's socket id) + * @param key + * EFD table key to delete + * @param prev_value + * If not NULL, will store the previous value here before deleting it + * + * @return + * 0 - successfully found and deleted the key + * nonzero otherwise + */ +int +rte_efd_delete(struct rte_efd_table *table, unsigned int socket_id, + const void *key, efd_value_t *prev_value); + +/** + * Looks up the value associated with a key + * This operation is multi-thread safe. + * + * NOTE: Lookups will *always* succeed - this is a property of + * using a perfect hash table. + * If the specified key was never inserted, a pseudorandom answer will be returned. + * There is no way to know based on the lookup if the key was ever inserted + * originally, so this must be tracked elsewhere. + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to lookup existing value (ideally caller's socket id) + * @param key + * EFD table key to look up + * + * @return + * Value associated with the key, or random junk if they key was never inserted + */ +efd_value_t +rte_efd_lookup(const struct rte_efd_table *table, unsigned int socket_id, + const void *key); + +/** + * Looks up the value associated with several keys. + * This operation is multi-thread safe. + * + * NOTE: Lookups will *always* succeed - this is a property of + * using a perfect hash table. + * If the specified key was never inserted, a pseudorandom answer will be returned. + * There is no way to know based on the lookup if the key was ever inserted + * originally, so this must be tracked elsewhere. + * + * @param table + * EFD table to reference + * @param socket_id + * Socket ID to use to lookup existing value (ideally caller's socket id) + * @param num_keys + * Number of keys in the key_list array, must be less than RTE_EFD_BURST_MAX + * @param key_list + * Array of num_keys pointers which point to keys to look up + * @param value_list + * Array of size num_keys where lookup values will be stored + */ +void +rte_efd_lookup_bulk(const struct rte_efd_table *table, unsigned int socket_id, + int num_keys, const void **key_list, + efd_value_t *value_list); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EFD_H_ */ diff --git a/lib/librte_efd/rte_efd_version.map b/lib/librte_efd/rte_efd_version.map new file mode 100644 index 00000000..ae60a641 --- /dev/null +++ b/lib/librte_efd/rte_efd_version.map @@ -0,0 +1,13 @@ +DPDK_17.02 { + global: + + rte_efd_create; + rte_efd_delete; + rte_efd_find_existing; + rte_efd_free; + rte_efd_lookup; + rte_efd_lookup_bulk; + rte_efd_update; + + local: *; +}; diff --git a/lib/librte_efd/rte_efd_x86.h b/lib/librte_efd/rte_efd_x86.h new file mode 100644 index 00000000..34f37d73 --- /dev/null +++ b/lib/librte_efd/rte_efd_x86.h @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* rte_efd_x86.h + * This file holds all x86 specific EFD functions + */ +#include <immintrin.h> + +#if (RTE_EFD_VALUE_NUM_BITS == 8 || RTE_EFD_VALUE_NUM_BITS == 16 || \ + RTE_EFD_VALUE_NUM_BITS == 24 || RTE_EFD_VALUE_NUM_BITS == 32) +#define EFD_LOAD_SI128(val) _mm_load_si128(val) +#else +#define EFD_LOAD_SI128(val) _mm_lddqu_si128(val) +#endif + +static inline efd_value_t +efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx, + const efd_lookuptbl_t *group_lookup_table, + const uint32_t hash_val_a, const uint32_t hash_val_b) +{ +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + efd_value_t value = 0; + uint32_t i = 0; + __m256i vhash_val_a = _mm256_set1_epi32(hash_val_a); + __m256i vhash_val_b = _mm256_set1_epi32(hash_val_b); + + for (; i < RTE_EFD_VALUE_NUM_BITS; i += 8) { + __m256i vhash_idx = + _mm256_cvtepu16_epi32(EFD_LOAD_SI128( + (__m128i const *) &group_hash_idx[i])); + __m256i vlookup_table = _mm256_cvtepu16_epi32( + EFD_LOAD_SI128((__m128i const *) + &group_lookup_table[i])); + __m256i vhash = _mm256_add_epi32(vhash_val_a, + _mm256_mullo_epi32(vhash_idx, vhash_val_b)); + __m256i vbucket_idx = _mm256_srli_epi32(vhash, + EFD_LOOKUPTBL_SHIFT); + __m256i vresult = _mm256_srlv_epi32(vlookup_table, + vbucket_idx); + + value |= (_mm256_movemask_ps( + (__m256) _mm256_slli_epi32(vresult, 31)) + & ((1 << (RTE_EFD_VALUE_NUM_BITS - i)) - 1)) << i; + } + + return value; +#else + RTE_SET_USED(group_hash_idx); + RTE_SET_USED(group_lookup_table); + RTE_SET_USED(hash_val_a); + RTE_SET_USED(hash_val_b); + /* Return dummy value, only to avoid compilation breakage */ + return 0; +#endif + +} diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile index efe1e5fe..93fdde10 100644 --- a/lib/librte_ether/Makefile +++ b/lib/librte_ether/Makefile @@ -1,6 +1,6 @@ # BSD LICENSE # -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -41,18 +41,20 @@ CFLAGS += $(WERROR_FLAGS) EXPORT_MAP := rte_ether_version.map -LIBABIVER := 5 +LIBABIVER := 6 SRCS-y += rte_ethdev.c +SRCS-y += rte_flow.c # # Export include files # SYMLINK-y-include += rte_ethdev.h +SYMLINK-y-include += rte_ethdev_pci.h +SYMLINK-y-include += rte_ethdev_vdev.h SYMLINK-y-include += rte_eth_ctrl.h SYMLINK-y-include += rte_dev_info.h - -# this lib depends upon: -DEPDIRS-y += lib/librte_net lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf +SYMLINK-y-include += rte_flow.h +SYMLINK-y-include += rte_flow_driver.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h index fe80eb01..83869042 100644 --- a/lib/librte_ether/rte_eth_ctrl.h +++ b/lib/librte_ether/rte_eth_ctrl.h @@ -99,6 +99,7 @@ enum rte_filter_type { RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_HASH, RTE_ETH_FILTER_L2_TUNNEL, + RTE_ETH_FILTER_GENERIC, RTE_ETH_FILTER_MAX }; diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 5a317594..83898a8f 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -138,10 +138,18 @@ enum { STAT_QMAP_RX }; -enum { - DEV_DETACHED = 0, - DEV_ATTACHED -}; +uint8_t +rte_eth_find_next(uint8_t port_id) +{ + while (port_id < RTE_MAX_ETHPORTS && + rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED) + port_id++; + + if (port_id >= RTE_MAX_ETHPORTS) + return RTE_MAX_ETHPORTS; + + return port_id; +} static void rte_eth_dev_data_alloc(void) @@ -170,7 +178,7 @@ rte_eth_dev_allocated(const char *name) unsigned i; for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if ((rte_eth_devices[i].attached == DEV_ATTACHED) && + if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) && strcmp(rte_eth_devices[i].data->name, name) == 0) return &rte_eth_devices[i]; } @@ -183,7 +191,7 @@ rte_eth_dev_find_free_port(void) unsigned i; for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if (rte_eth_devices[i].attached == DEV_DETACHED) + if (rte_eth_devices[i].state == RTE_ETH_DEV_UNUSED) return i; } return RTE_MAX_ETHPORTS; @@ -195,7 +203,8 @@ eth_dev_get(uint8_t port_id) struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; eth_dev->data = &rte_eth_dev_data[port_id]; - eth_dev->attached = DEV_ATTACHED; + eth_dev->state = RTE_ETH_DEV_ATTACHED; + TAILQ_INIT(&(eth_dev->link_intr_cbs)); eth_dev_last_created_port = port_id; nb_ports++; @@ -224,9 +233,11 @@ rte_eth_dev_allocate(const char *name) return NULL; } + memset(&rte_eth_dev_data[port_id], 0, sizeof(struct rte_eth_dev_data)); eth_dev = eth_dev_get(port_id); snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); eth_dev->data->port_id = port_id; + eth_dev->data->mtu = ETHER_MTU; return eth_dev; } @@ -236,8 +247,8 @@ rte_eth_dev_allocate(const char *name) * makes sure that the same device would have the same port id both * in the primary and secondary process. */ -static struct rte_eth_dev * -eth_dev_attach_secondary(const char *name) +struct rte_eth_dev * +rte_eth_dev_attach_secondary(const char *name) { uint8_t i; struct rte_eth_dev *eth_dev; @@ -268,121 +279,16 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) if (eth_dev == NULL) return -EINVAL; - eth_dev->attached = DEV_DETACHED; + eth_dev->state = RTE_ETH_DEV_UNUSED; nb_ports--; return 0; } int -rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) -{ - struct eth_driver *eth_drv; - struct rte_eth_dev *eth_dev; - char ethdev_name[RTE_ETH_NAME_MAX_LEN]; - - int diag; - - eth_drv = (struct eth_driver *)pci_drv; - - rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, - sizeof(ethdev_name)); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - eth_dev = rte_eth_dev_allocate(ethdev_name); - if (eth_dev == NULL) - return -ENOMEM; - - eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", - eth_drv->dev_private_size, - RTE_CACHE_LINE_SIZE); - if (eth_dev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private port data\n"); - } else { - eth_dev = eth_dev_attach_secondary(ethdev_name); - if (eth_dev == NULL) { - /* - * if we failed to attach a device, it means the - * device is skipped in primary process, due to - * some errors. If so, we return a positive value, - * to let EAL skip it for the secondary process - * as well. - */ - return 1; - } - } - eth_dev->pci_dev = pci_dev; - eth_dev->driver = eth_drv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - - /* init user callbacks */ - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - - /* - * Set the default MTU. - */ - eth_dev->data->mtu = ETHER_MTU; - - /* Invoke PMD device initialization function */ - diag = (*eth_drv->eth_dev_init)(eth_dev); - if (diag == 0) - return 0; - - RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n", - pci_drv->driver.name, - (unsigned) pci_dev->id.vendor_id, - (unsigned) pci_dev->id.device_id); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(eth_dev->data->dev_private); - rte_eth_dev_release_port(eth_dev); - return diag; -} - -int -rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev) -{ - const struct eth_driver *eth_drv; - struct rte_eth_dev *eth_dev; - char ethdev_name[RTE_ETH_NAME_MAX_LEN]; - int ret; - - if (pci_dev == NULL) - return -EINVAL; - - rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, - sizeof(ethdev_name)); - - eth_dev = rte_eth_dev_allocated(ethdev_name); - if (eth_dev == NULL) - return -ENODEV; - - eth_drv = (const struct eth_driver *)pci_dev->driver; - - /* Invoke PMD device uninit function */ - if (*eth_drv->eth_dev_uninit) { - ret = (*eth_drv->eth_dev_uninit)(eth_dev); - if (ret) - return ret; - } - - /* free ether device */ - rte_eth_dev_release_port(eth_dev); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(eth_dev->data->dev_private); - - eth_dev->pci_dev = NULL; - eth_dev->driver = NULL; - eth_dev->data = NULL; - - return 0; -} - -int rte_eth_dev_is_valid_port(uint8_t port_id) { if (port_id >= RTE_MAX_ETHPORTS || - rte_eth_devices[port_id].attached != DEV_ATTACHED) + rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED) return 0; else return 1; @@ -434,9 +340,7 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -ENODEV; *port_id = RTE_MAX_ETHPORTS; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - + RTE_ETH_FOREACH_DEV(i) { if (!strncmp(name, rte_eth_dev_data[i].name, strlen(name))) { @@ -460,8 +364,8 @@ rte_eth_dev_is_detachable(uint8_t port_id) case RTE_KDRV_UIO_GENERIC: case RTE_KDRV_NIC_UIO: case RTE_KDRV_NONE: - break; case RTE_KDRV_VFIO: + break; default: return -ENOTSUP; } @@ -588,6 +492,9 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->rx_queue_release)(rxq[i]); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; } dev->data->nb_rx_queues = nb_queues; return 0; @@ -739,6 +646,9 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->tx_queue_release)(txq[i]); + + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; } dev->data->nb_tx_queues = nb_queues; return 0; @@ -839,16 +749,19 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return -EINVAL; } - /* - * If link state interrupt is enabled, check that the - * device supports it. - */ + /* Check that the device supports requested interrupts */ if ((dev_conf->intr_conf.lsc == 1) && (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n", dev->data->drv_name); return -EINVAL; } + if ((dev_conf->intr_conf.rmv == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) { + RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n", + dev->data->drv_name); + return -EINVAL; + } /* * If jumbo frames are enabled, check that the maximum RX packet @@ -909,39 +822,61 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return 0; } +void +_rte_eth_dev_reset(struct rte_eth_dev *dev) +{ + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow reset\n", + dev->data->port_id); + return; + } + + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + + memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf)); +} + static void rte_eth_dev_config_restore(uint8_t port_id) { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; - struct ether_addr addr; + struct ether_addr *addr; uint16_t i; uint32_t pool = 0; + uint64_t pool_mask; dev = &rte_eth_devices[port_id]; rte_eth_dev_info_get(port_id, &dev_info); - if (RTE_ETH_DEV_SRIOV(dev).active) - pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx; - - /* replay MAC address configuration */ - for (i = 0; i < dev_info.max_mac_addrs; i++) { - addr = dev->data->mac_addrs[i]; - - /* skip zero address */ - if (is_zero_ether_addr(&addr)) - continue; - - /* add address to the hardware */ - if (*dev->dev_ops->mac_addr_add && - (dev->data->mac_pool_sel[i] & (1ULL << pool))) - (*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool); - else { - RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n", - port_id); - /* exit the loop but not return an error */ - break; + /* replay MAC address configuration including default MAC */ + addr = &dev->data->mac_addrs[0]; + if (*dev->dev_ops->mac_addr_set != NULL) + (*dev->dev_ops->mac_addr_set)(dev, addr); + else if (*dev->dev_ops->mac_addr_add != NULL) + (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool); + + if (*dev->dev_ops->mac_addr_add != NULL) { + for (i = 1; i < dev_info.max_mac_addrs; i++) { + addr = &dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(addr)) + continue; + + pool = 0; + pool_mask = dev->data->mac_pool_sel[i]; + + do { + if (pool_mask & 1ULL) + (*dev->dev_ops->mac_addr_add)(dev, + addr, i, pool); + pool_mask >>= 1; + pool++; + } while (pool_mask); } } @@ -1051,8 +986,10 @@ rte_eth_dev_close(uint8_t port_id) dev->data->dev_started = 0; (*dev->dev_ops->dev_close)(dev); + dev->data->nb_rx_queues = 0; rte_free(dev->data->rx_queues); dev->data->rx_queues = NULL; + dev->data->nb_tx_queues = 0; rte_free(dev->data->tx_queues); dev->data->tx_queues = NULL; } @@ -1067,6 +1004,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, uint32_t mbp_buf_size; struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + void **rxq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1125,6 +1063,14 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, return -EINVAL; } + rxq = dev->data->rx_queues; + if (rxq[rx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, + -ENOTSUP); + (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]); + rxq[rx_queue_id] = NULL; + } + if (rx_conf == NULL) rx_conf = &dev_info.default_rxconf; @@ -1146,6 +1092,7 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + void **txq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1178,6 +1125,14 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, return -EINVAL; } + txq = dev->data->tx_queues; + if (txq[tx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, + -ENOTSUP); + (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]); + txq[tx_queue_id] = NULL; + } + if (tx_conf == NULL) tx_conf = &dev_info.default_txconf; @@ -1234,6 +1189,20 @@ rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size) return ret; } +int +rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + /* Validate Input Data. Bail if not valid or not supported. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_done_cleanup, -ENOTSUP); + + /* Call driver to free pending mbufs. */ + return (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id], + free_cnt); +} + void rte_eth_promiscuous_enable(uint8_t port_id) { @@ -1393,12 +1362,19 @@ get_xstats_count(uint8_t port_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + if (dev->dev_ops->xstats_get_names_by_id != NULL) { + count = (*dev->dev_ops->xstats_get_names_by_id)(dev, NULL, + NULL, 0); + if (count < 0) + return count; + } if (dev->dev_ops->xstats_get_names != NULL) { count = (*dev->dev_ops->xstats_get_names)(dev, NULL, 0); if (count < 0) return count; } else count = 0; + count += RTE_NB_STATS; count += RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * RTE_NB_RXQ_STATS; @@ -1408,9 +1384,170 @@ get_xstats_count(uint8_t port_id) } int +rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name, + uint64_t *id) +{ + int cnt_xstats, idx_xstat; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!id) { + RTE_PMD_DEBUG_TRACE("Error: id pointer is NULL\n"); + return -ENOMEM; + } + + if (!xstat_name) { + RTE_PMD_DEBUG_TRACE("Error: xstat_name pointer is NULL\n"); + return -ENOMEM; + } + + /* Get count */ + cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL); + if (cnt_xstats < 0) { + RTE_PMD_DEBUG_TRACE("Error: Cannot get count of xstats\n"); + return -ENODEV; + } + + /* Get id-name lookup table */ + struct rte_eth_xstat_name xstats_names[cnt_xstats]; + + if (cnt_xstats != rte_eth_xstats_get_names_by_id( + port_id, xstats_names, cnt_xstats, NULL)) { + RTE_PMD_DEBUG_TRACE("Error: Cannot get xstats lookup\n"); + return -1; + } + + for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) { + if (!strcmp(xstats_names[idx_xstat].name, xstat_name)) { + *id = idx_xstat; + return 0; + }; + } + + return -EINVAL; +} + +int +rte_eth_xstats_get_names_by_id(uint8_t port_id, + struct rte_eth_xstat_name *xstats_names, unsigned int size, + uint64_t *ids) +{ + /* Get all xstats */ + if (!ids) { + struct rte_eth_dev *dev; + int cnt_used_entries; + int cnt_expected_entries; + int cnt_driver_entries; + uint32_t idx, id_queue; + uint16_t num_q; + + cnt_expected_entries = get_xstats_count(port_id); + if (xstats_names == NULL || cnt_expected_entries < 0 || + (int)size < cnt_expected_entries) + return cnt_expected_entries; + + /* port_id checked in get_xstats_count() */ + dev = &rte_eth_devices[port_id]; + cnt_used_entries = 0; + + for (idx = 0; idx < RTE_NB_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "%s", rte_stats_strings[idx].name); + cnt_used_entries++; + } + num_q = RTE_MIN(dev->data->nb_rx_queues, + RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { + for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "rx_q%u%s", + id_queue, + rte_rxq_stats_strings[idx].name); + cnt_used_entries++; + } + + } + num_q = RTE_MIN(dev->data->nb_tx_queues, + RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { + for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "tx_q%u%s", + id_queue, + rte_txq_stats_strings[idx].name); + cnt_used_entries++; + } + } + + if (dev->dev_ops->xstats_get_names_by_id != NULL) { + /* If there are any driver-specific xstats, append them + * to end of list. + */ + cnt_driver_entries = + (*dev->dev_ops->xstats_get_names_by_id)( + dev, + xstats_names + cnt_used_entries, + NULL, + size - cnt_used_entries); + if (cnt_driver_entries < 0) + return cnt_driver_entries; + cnt_used_entries += cnt_driver_entries; + + } else if (dev->dev_ops->xstats_get_names != NULL) { + /* If there are any driver-specific xstats, append them + * to end of list. + */ + cnt_driver_entries = (*dev->dev_ops->xstats_get_names)( + dev, + xstats_names + cnt_used_entries, + size - cnt_used_entries); + if (cnt_driver_entries < 0) + return cnt_driver_entries; + cnt_used_entries += cnt_driver_entries; + } + + return cnt_used_entries; + } + /* Get only xstats given by IDS */ + else { + uint16_t len, i; + struct rte_eth_xstat_name *xstats_names_copy; + + len = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL); + + xstats_names_copy = + malloc(sizeof(struct rte_eth_xstat_name) * len); + if (!xstats_names_copy) { + RTE_PMD_DEBUG_TRACE( + "ERROR: can't allocate memory for values_copy\n"); + free(xstats_names_copy); + return -1; + } + + rte_eth_xstats_get_names_by_id(port_id, xstats_names_copy, + len, NULL); + + for (i = 0; i < size; i++) { + if (ids[i] >= len) { + RTE_PMD_DEBUG_TRACE( + "ERROR: id value isn't valid\n"); + return -1; + } + strcpy(xstats_names[i].name, + xstats_names_copy[ids[i]].name); + } + free(xstats_names_copy); + return size; + } +} + +int rte_eth_xstats_get_names(uint8_t port_id, struct rte_eth_xstat_name *xstats_names, - unsigned size) + unsigned int size) { struct rte_eth_dev *dev; int cnt_used_entries; @@ -1474,13 +1611,139 @@ rte_eth_xstats_get_names(uint8_t port_id, /* retrieve ethdev extended statistics */ int +rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids, uint64_t *values, + unsigned int n) +{ + /* If need all xstats */ + if (!ids) { + struct rte_eth_stats eth_stats; + struct rte_eth_dev *dev; + unsigned int count = 0, i, q; + signed int xcount = 0; + uint64_t val, *stats_ptr; + uint16_t nb_rxqs, nb_txqs; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, + RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, + RTE_ETHDEV_QUEUE_STAT_CNTRS); + + /* Return generic statistics */ + count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + + (nb_txqs * RTE_NB_TXQ_STATS); + + + /* implemented by the driver */ + if (dev->dev_ops->xstats_get_by_id != NULL) { + /* Retrieve the xstats from the driver at the end of the + * xstats struct. Retrieve all xstats. + */ + xcount = (*dev->dev_ops->xstats_get_by_id)(dev, + NULL, + values ? values + count : NULL, + (n > count) ? n - count : 0); + + if (xcount < 0) + return xcount; + /* implemented by the driver */ + } else if (dev->dev_ops->xstats_get != NULL) { + /* Retrieve the xstats from the driver at the end of the + * xstats struct. Retrieve all xstats. + * Compatibility for PMD without xstats_get_by_ids + */ + unsigned int size = (n > count) ? n - count : 1; + struct rte_eth_xstat xstats[size]; + + xcount = (*dev->dev_ops->xstats_get)(dev, + values ? xstats : NULL, size); + + if (xcount < 0) + return xcount; + + if (values != NULL) + for (i = 0 ; i < (unsigned int)xcount; i++) + values[i + count] = xstats[i].value; + } + + if (n < count + xcount || values == NULL) + return count + xcount; + + /* now fill the xstats structure */ + count = 0; + rte_eth_stats_get(port_id, ð_stats); + + /* global stats */ + for (i = 0; i < RTE_NB_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_stats_strings[i].offset); + val = *stats_ptr; + values[count++] = val; + } + + /* per-rxq stats */ + for (q = 0; q < nb_rxqs; q++) { + for (i = 0; i < RTE_NB_RXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_rxq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + values[count++] = val; + } + } + + /* per-txq stats */ + for (q = 0; q < nb_txqs; q++) { + for (i = 0; i < RTE_NB_TXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_txq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + values[count++] = val; + } + } + + return count + xcount; + } + /* Need only xstats given by IDS array */ + else { + uint16_t i, size; + uint64_t *values_copy; + + size = rte_eth_xstats_get_by_id(port_id, NULL, NULL, 0); + + values_copy = malloc(sizeof(*values_copy) * size); + if (!values_copy) { + RTE_PMD_DEBUG_TRACE( + "ERROR: can't allocate memory for values_copy\n"); + return -1; + } + + rte_eth_xstats_get_by_id(port_id, NULL, values_copy, size); + + for (i = 0; i < n; i++) { + if (ids[i] >= size) { + RTE_PMD_DEBUG_TRACE( + "ERROR: id value isn't valid\n"); + return -1; + } + values[i] = values_copy[ids[i]]; + } + free(values_copy); + return n; + } +} + +int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, - unsigned n) + unsigned int n) { struct rte_eth_stats eth_stats; struct rte_eth_dev *dev; - unsigned count = 0, i, q; - signed xcount = 0; + unsigned int count = 0, i, q; + signed int xcount = 0; uint64_t val, *stats_ptr; uint16_t nb_rxqs, nb_txqs; @@ -1606,6 +1869,18 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, STAT_QMAP_RX); } +int +rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP); + return (*dev->dev_ops->fw_version_get)(dev, fw_version, fw_size); +} + void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) { @@ -1625,7 +1900,6 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->pci_dev = dev->pci_dev; dev_info->driver_name = dev->data->drv_name; dev_info->nb_rx_queues = dev->data->nb_rx_queues; dev_info->nb_tx_queues = dev->data->nb_tx_queues; @@ -1883,13 +2157,7 @@ rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf, if (!reta_conf) return -EINVAL; - if (reta_size != RTE_ALIGN(reta_size, RTE_RETA_GROUP_SIZE)) { - RTE_PMD_DEBUG_TRACE("Invalid reta size, should be %u aligned\n", - RTE_RETA_GROUP_SIZE); - return -EINVAL; - } - - num = reta_size / RTE_RETA_GROUP_SIZE; + num = (reta_size + RTE_RETA_GROUP_SIZE - 1) / RTE_RETA_GROUP_SIZE; for (i = 0; i < num; i++) { if (reta_conf[i].mask) return 0; @@ -2101,6 +2369,7 @@ rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr, struct rte_eth_dev *dev; int index; uint64_t pool_mask; + int ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; @@ -2133,15 +2402,17 @@ rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr, } /* Update NIC */ - (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); + ret = (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); - /* Update address in NIC data structure */ - ether_addr_copy(addr, &dev->data->mac_addrs[index]); + if (ret == 0) { + /* Update address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[index]); - /* Update pool bitmap in NIC data structure */ - dev->data->mac_pool_sel[index] |= (1ULL << pool); + /* Update pool bitmap in NIC data structure */ + dev->data->mac_pool_sel[index] |= (1ULL << pool); + } - return 0; + return ret; } int @@ -2194,32 +2465,6 @@ rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) return 0; } -int -rte_eth_dev_set_vf_rxmode(uint8_t port_id, uint16_t vf, - uint16_t rx_mode, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf); - return -EINVAL; - } - - if (rx_mode == 0) { - RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n"); - return -EINVAL; - } - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP); - return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on); -} /* * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find @@ -2309,76 +2554,6 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) return (*dev->dev_ops->uc_all_hash_table_set)(dev, on); } -int -rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP); - return (*dev->dev_ops->set_vf_rx)(dev, vf, on); -} - -int -rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP); - return (*dev->dev_ops->set_vf_tx)(dev, vf, on); -} - -int -rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id, - uint64_t vf_mask, uint8_t vlan_on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - if (vlan_id > ETHER_MAX_VLAN_ID) { - RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n", - vlan_id); - return -EINVAL; - } - - if (vf_mask == 0) { - RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n"); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP); - return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id, - vf_mask, vlan_on); -} - int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate) { @@ -2409,45 +2584,12 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate); } -int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate, - uint64_t q_msk) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_link link; - - if (q_msk == 0) - return 0; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - link = dev->data->dev_link; - - if (vf > dev_info.max_vfs) { - RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: " - "invalid vf id=%d\n", port_id, vf); - return -EINVAL; - } - - if (tx_rate > link.link_speed) { - RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, " - "bigger than link speed= %d\n", - tx_rate, link.link_speed); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP); - return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk); -} - int rte_eth_mirror_rule_set(uint8_t port_id, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); if (mirror_conf->rule_type == 0) { @@ -2483,7 +2625,7 @@ rte_eth_mirror_rule_set(uint8_t port_id, int rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -2590,7 +2732,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev, dev_cb = *cb_lst; cb_lst->active = 1; if (cb_arg != NULL) - dev_cb.cb_arg = (void *) cb_arg; + dev_cb.cb_arg = cb_arg; rte_spinlock_unlock(&rte_eth_dev_cb_lock); dev_cb.cb_fn(dev->data->port_id, dev_cb.event, @@ -2613,7 +2755,13 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; - intr_handle = &dev->pci_dev->intr_handle; + + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; if (!intr_handle->intr_vec) { RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); return -EPERM; @@ -2641,7 +2789,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.driver.name, ring_name, + dev->data->drv_name, ring_name, dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); @@ -2673,7 +2821,12 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, return -EINVAL; } - intr_handle = &dev->pci_dev->intr_handle; + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; if (!intr_handle->intr_vec) { RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); return -EPERM; @@ -3266,26 +3419,6 @@ rte_eth_dev_get_dcb_info(uint8_t port_id, return (*dev->dev_ops->get_dcb_info)(dev, dcb_info); } -void -rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_dev) -{ - if ((eth_dev == NULL) || (pci_dev == NULL)) { - RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n", - eth_dev, pci_dev); - return; - } - - eth_dev->data->dev_flags = 0; - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) - eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE) - eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; - - eth_dev->data->kdrv = pci_dev->kdrv; - eth_dev->data->numa_node = pci_dev->device.numa_node; - eth_dev->data->drv_name = pci_dev->driver->driver.name; -} - int rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, struct rte_eth_l2_tunnel_conf *l2_tunnel) diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 96781792..0f38b45f 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -179,9 +179,9 @@ extern "C" { #include <rte_log.h> #include <rte_interrupts.h> -#include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> +#include <rte_errno.h> #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -564,7 +564,7 @@ struct rte_eth_rss_reta_entry64 { /** * This enum indicates the possible number of traffic classes - * in DCB configratioins + * in DCB configurations */ enum rte_eth_nb_tcs { ETH_4_TCS = 4, /**< 4 TCs with DCB. */ @@ -702,6 +702,29 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + + /** + * Max allowed number of segments per whole packet. + * + * - For TSO packet this is the total number of data descriptors allowed + * by device. + * + * @see nb_mtu_seg_max + */ + uint16_t nb_seg_max; + + /** + * Max number of segments per one MTU. + * + * - For non-TSO packet, this is the maximum allowed number of segments + * in a single transmit packet. + * + * - For TSO packet each segment within the TSO may span up to this + * value. + * + * @see nb_seg_max + */ + uint16_t nb_mtu_seg_max; }; /** @@ -792,9 +815,11 @@ struct rte_eth_udp_tunnel { */ struct rte_intr_conf { /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */ - uint16_t lsc; + uint32_t lsc:1; /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */ - uint16_t rxq; + uint32_t rxq:1; + /** enable/disable rmv interrupt. 0 (default) - disable, 1 enable */ + uint32_t rmv:1; }; /** @@ -857,6 +882,7 @@ struct rte_eth_conf { #define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 #define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 #define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 +#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 /** * TX offload capabilities of a device. @@ -874,6 +900,9 @@ struct rte_eth_conf { #define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 + +struct rte_pci_device; /** * Ethernet device information @@ -938,23 +967,26 @@ struct rte_eth_txq_info { /** * An Ethernet device extended statistic structure * - * This structure is used by ethdev->eth_xstats_get() to provide - * statistics that are not provided in the generic rte_eth_stats + * This structure is used by rte_eth_xstats_get() to provide + * statistics that are not provided in the generic *rte_eth_stats* * structure. + * It maps a name id, corresponding to an index in the array returned + * by rte_eth_xstats_get_names(), to a statistic value. */ struct rte_eth_xstat { - uint64_t id; - uint64_t value; + uint64_t id; /**< The index in xstats name array. */ + uint64_t value; /**< The statistic counter value. */ }; /** - * A name-key lookup element for extended statistics. + * A name element for extended statistics. * - * This structure is used to map between names and ID numbers - * for extended ethernet statistics. + * An array of this structure is returned by rte_eth_xstats_get_names(). + * It lists the names of extended statistics for a PMD. The *rte_eth_xstat* + * structure references these names by their array index. */ struct rte_eth_xstat_name { - char name[RTE_ETH_XSTATS_NAME_SIZE]; + char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */ }; #define ETH_DCB_NUM_TCS 8 @@ -1001,15 +1033,6 @@ struct rte_eth_dev_callback; /** @internal Structure to keep track of registered callbacks */ TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG -#define RTE_PMD_DEBUG_TRACE(...) \ - rte_pmd_debug_trace(__func__, __VA_ARGS__) -#else -#define RTE_PMD_DEBUG_TRACE(...) -#endif - - /* Macros to check for valid port */ #define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ if (!rte_eth_dev_is_valid_port(port_id)) { \ @@ -1089,6 +1112,12 @@ typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, unsigned n); /**< @internal Get extended stats of an Ethernet device. */ +typedef int (*eth_xstats_get_by_id_t)(struct rte_eth_dev *dev, + const uint64_t *ids, + uint64_t *values, + unsigned int n); +/**< @internal Get extended stats of an Ethernet device. */ + typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev); /**< @internal Reset extended stats of an Ethernet device. */ @@ -1096,6 +1125,11 @@ typedef int (*eth_xstats_get_names_t)(struct rte_eth_dev *dev, struct rte_eth_xstat_name *xstats_names, unsigned size); /**< @internal Get names of extended stats of an Ethernet device. */ +typedef int (*eth_xstats_get_names_by_id_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, const uint64_t *ids, + unsigned int size); +/**< @internal Get names of extended stats of an Ethernet device. */ + typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev, uint16_t queue_id, uint8_t stat_idx, @@ -1145,11 +1179,24 @@ typedef void (*eth_queue_release_t)(void *queue); typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, uint16_t rx_queue_id); -/**< @internal Get number of available descriptors on a receive queue of an Ethernet device. */ +/**< @internal Get number of used descriptors on a receive queue. */ typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); /**< @internal Check DD bit of specific RX descriptor */ +typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset); +/**< @internal Check the status of a Rx descriptor */ + +typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset); +/**< @internal Check the status of a Tx descriptor */ + +typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); +/**< @internal Get firmware information of an Ethernet device. */ + +typedef int (*eth_tx_done_cleanup_t)(void *txq, uint32_t free_cnt); +/**< @internal Force mbufs to be from TX ring. */ + typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); @@ -1191,6 +1238,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1230,7 +1282,7 @@ typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev); typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index); /**< @internal Remove MAC address from receive address register */ -typedef void (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, +typedef int (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, struct ether_addr *mac_addr, uint32_t index, uint32_t vmdq); @@ -1249,39 +1301,11 @@ typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, uint8_t on); /**< @internal Set all Unicast Hash bitmap */ -typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint16_t rx_mode, - uint8_t on); -/**< @internal Set a VF receive mode */ - -typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint8_t on); -/**< @internal Set a VF receive mode */ - -typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint8_t on); -/**< @internal Enable or disable a VF transmit */ - -typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev, - uint16_t vlan, - uint64_t vf_mask, - uint8_t vlan_on); -/**< @internal Set VF VLAN pool filter */ - typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t tx_rate); /**< @internal Set queue TX rate */ -typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint16_t tx_rate, - uint64_t q_msk); -/**< @internal Set VF TX rate */ - typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, @@ -1431,11 +1455,18 @@ struct eth_dev_ops { eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ eth_dev_close_t dev_close; /**< Close device. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */ - eth_link_update_t link_update; /**< Get device link state. */ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs. */ + mtu_set_t mtu_set; /**< Set MTU. */ + eth_stats_get_t stats_get; /**< Get generic device statistics. */ eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ @@ -1444,109 +1475,104 @@ struct eth_dev_ops { /**< Get names of extended statistics. */ eth_queue_stats_mapping_set_t queue_stats_mapping_set; /**< Configure per queue stat counter mapping. */ + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + eth_rxq_info_get_t rxq_info_get; /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */ + eth_fw_version_get_t fw_version_get; /**< Get firmware version. */ eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; - /**< Get packet types supported and identified by device*/ - mtu_set_t mtu_set; /**< Set MTU. */ - vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ - vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ + /**< Get packet types supported and identified by device. */ + + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ - vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion */ - eth_queue_start_t rx_queue_start;/**< Start RX for a queue.*/ - eth_queue_stop_t rx_queue_stop;/**< Stop RX for a queue.*/ - eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/ - eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/ - eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/ - eth_queue_release_t rx_queue_release;/**< Release RX queue.*/ - eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */ - eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */ - /**< Enable Rx queue interrupt. */ - eth_rx_enable_intr_t rx_queue_intr_enable; - /**< Disable Rx queue interrupt.*/ - eth_rx_disable_intr_t rx_queue_intr_disable; - eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue.*/ - eth_queue_release_t tx_queue_release;/**< Release TX queue.*/ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion. */ + + eth_queue_start_t rx_queue_start;/**< Start RX for a queue. */ + eth_queue_stop_t rx_queue_stop; /**< Stop RX for a queue. */ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue. */ + eth_queue_stop_t tx_queue_stop; /**< Stop TX for a queue. */ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue. */ + eth_queue_release_t rx_queue_release; /**< Release RX queue. */ + eth_rx_queue_count_t rx_queue_count; + /**< Get the number of used RX descriptors. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit. */ + eth_rx_descriptor_status_t rx_descriptor_status; + /**< Check the status of a Rx descriptor. */ + eth_tx_descriptor_status_t tx_descriptor_status; + /**< Check the status of a Tx descriptor. */ + eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */ + eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt. */ + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue. */ + eth_queue_release_t tx_queue_release; /**< Release TX queue. */ + eth_tx_done_cleanup_t tx_done_cleanup;/**< Free tx ring mbufs */ + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ - priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control.*/ - eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address */ - eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address */ - eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address */ - eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array */ - eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap */ - eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule.*/ - eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule.*/ - eth_set_vf_rx_mode_t set_vf_rx_mode; /**< Set VF RX mode */ - eth_set_vf_rx_t set_vf_rx; /**< enable/disable a VF receive */ - eth_set_vf_tx_t set_vf_tx; /**< enable/disable a VF transmit */ - eth_set_vf_vlan_filter_t set_vf_vlan_filter; /**< Set VF VLAN filter */ - /** Add UDP tunnel port. */ - eth_udp_tunnel_port_add_t udp_tunnel_port_add; - /** Del UDP tunnel port. */ - eth_udp_tunnel_port_del_t udp_tunnel_port_del; - eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit */ - eth_set_vf_rate_limit_t set_vf_rate_limit; /**< Set VF rate limit */ - /** Update redirection table. */ - reta_update_t reta_update; - /** Query redirection table. */ - reta_query_t reta_query; - - eth_get_reg_t get_reg; - /**< Get registers */ - eth_get_eeprom_length_t get_eeprom_length; - /**< Get eeprom length */ - eth_get_eeprom_t get_eeprom; - /**< Get eeprom data */ - eth_set_eeprom_t set_eeprom; - /**< Set eeprom */ - /* bypass control */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control. */ + + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array. */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */ + + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule. */ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule. */ + + eth_udp_tunnel_port_add_t udp_tunnel_port_add; /** Add UDP tunnel port. */ + eth_udp_tunnel_port_del_t udp_tunnel_port_del; /** Del UDP tunnel port. */ + eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; + /** Config ether type of l2 tunnel. */ + eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Enable/disable l2 tunnel offload functions. */ + + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */ + + rss_hash_update_t rss_hash_update; /** Configure RSS hash protocols. */ + rss_hash_conf_get_t rss_hash_conf_get; /** Get current RSS hash configuration. */ + reta_update_t reta_update; /** Update redirection table. */ + reta_query_t reta_query; /** Query redirection table. */ + + eth_get_reg_t get_reg; /**< Get registers. */ + eth_get_eeprom_length_t get_eeprom_length; /**< Get eeprom length. */ + eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ + eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ + + /* bypass control */ #ifdef RTE_NIC_BYPASS - bypass_init_t bypass_init; - bypass_state_set_t bypass_state_set; - bypass_state_show_t bypass_state_show; - bypass_event_set_t bypass_event_set; - bypass_event_show_t bypass_event_show; - bypass_wd_timeout_set_t bypass_wd_timeout_set; - bypass_wd_timeout_show_t bypass_wd_timeout_show; - bypass_ver_show_t bypass_ver_show; - bypass_wd_reset_t bypass_wd_reset; + bypass_init_t bypass_init; + bypass_state_set_t bypass_state_set; + bypass_state_show_t bypass_state_show; + bypass_event_set_t bypass_event_set; + bypass_event_show_t bypass_event_show; + bypass_wd_timeout_set_t bypass_wd_timeout_set; + bypass_wd_timeout_show_t bypass_wd_timeout_show; + bypass_ver_show_t bypass_ver_show; + bypass_wd_reset_t bypass_wd_reset; #endif - /** Configure RSS hash protocols. */ - rss_hash_update_t rss_hash_update; - /** Get current RSS hash configuration. */ - rss_hash_conf_get_t rss_hash_conf_get; - eth_filter_ctrl_t filter_ctrl; - /**< common filter control. */ - eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */ - eth_rxq_info_get_t rxq_info_get; - /**< retrieve RX queue information. */ - eth_txq_info_get_t txq_info_get; - /**< retrieve TX queue information. */ + eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ + + eth_get_dcb_info get_dcb_info; /** Get DCB information. */ + + eth_timesync_enable_t timesync_enable; /** Turn IEEE1588/802.1AS timestamping on. */ - eth_timesync_enable_t timesync_enable; + eth_timesync_disable_t timesync_disable; /** Turn IEEE1588/802.1AS timestamping off. */ - eth_timesync_disable_t timesync_disable; - /** Read the IEEE1588/802.1AS RX timestamp. */ eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; - /** Read the IEEE1588/802.1AS TX timestamp. */ + /** Read the IEEE1588/802.1AS RX timestamp. */ eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; - - /** Get DCB information */ - eth_get_dcb_info get_dcb_info; - /** Adjust the device clock.*/ - eth_timesync_adjust_time timesync_adjust_time; - /** Get the device clock time. */ - eth_timesync_read_time timesync_read_time; - /** Set the device clock time. */ - eth_timesync_write_time timesync_write_time; - /** Config ether type of l2 tunnel */ - eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; - /** Enable/disable l2 tunnel offload functions */ - eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_adjust_time timesync_adjust_time; /** Adjust the device clock. */ + eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ + eth_timesync_write_time timesync_write_time; /** Set the device clock time. */ + + eth_xstats_get_by_id_t xstats_get_by_id; + /**< Get extended device statistic values by ID. */ + eth_xstats_get_names_by_id_t xstats_get_names_by_id; + /**< Get name of extended device statistics by ID. */ }; /** @@ -1613,6 +1639,14 @@ struct rte_eth_rxtx_callback { }; /** + * A set of values to describe the possible states of an eth device. + */ +enum rte_eth_dev_state { + RTE_ETH_DEV_UNUSED = 0, + RTE_ETH_DEV_ATTACHED, +}; + +/** * @internal * The generic data structure associated with each ethernet device. * @@ -1625,10 +1659,11 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ - const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ - struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */ + struct rte_device *device; /**< Backing device */ + struct rte_intr_handle *intr_handle; /**< Device interrupt handle */ /** User application callbacks for NIC interrupts */ struct rte_eth_dev_cb_list link_intr_cbs; /** @@ -1641,7 +1676,7 @@ struct rte_eth_dev { * received packets before passing them to the driver for transmission. */ struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; - uint8_t attached; /**< Flag indicating the port is attached */ + enum rte_eth_dev_state state; /**< Flag indicating the port state */ } __rte_cache_aligned; struct rte_eth_dev_sriov { @@ -1711,6 +1746,8 @@ struct rte_eth_dev_data { #define RTE_ETH_DEV_INTR_LSC 0x0002 /** Device is a bonded slave */ #define RTE_ETH_DEV_BONDED_SLAVE 0x0004 +/** Device supports device removal interrupt */ +#define RTE_ETH_DEV_INTR_RMV 0x0008 /** * @internal @@ -1720,6 +1757,25 @@ struct rte_eth_dev_data { extern struct rte_eth_dev rte_eth_devices[]; /** + * Iterates over valid ethdev ports. + * + * @param port_id + * The id of the next possible valid port. + * @return + * Next valid port id, RTE_MAX_ETHPORTS if there is none. + */ +uint8_t rte_eth_find_next(uint8_t port_id); + +/** + * Macro to iterate over all enabled ethdev ports. + */ +#define RTE_ETH_FOREACH_DEV(p) \ + for (p = rte_eth_find_next(0); \ + (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS; \ + p = rte_eth_find_next(p + 1)) + + +/** * Get the total number of Ethernet devices that have been successfully * initialized by the [matching] Ethernet driver during the PCI probing phase. * All devices whose port identifier is in the range @@ -1727,7 +1783,7 @@ extern struct rte_eth_dev rte_eth_devices[]; * immediately after invoking rte_eal_init(). * If the application unplugs a port using hotplug function, The enabled port * numbers may be noncontiguous. In the case, the applications need to manage - * enabled port by themselves. + * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro. * * @return * - The total number of usable Ethernet devices. @@ -1759,6 +1815,19 @@ struct rte_eth_dev *rte_eth_dev_allocate(const char *name); /** * @internal + * Attach to the ethdev already initialized by the primary + * process. + * + * @param name Ethernet device's name. + * @return + * - Success: Slot in the rte_dev_devices array for attached + * device. + * - Error: Null pointer. + */ +struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name); + +/** + * @internal * Release the specified ethdev port. * * @param eth_dev @@ -1769,7 +1838,7 @@ struct rte_eth_dev *rte_eth_dev_allocate(const char *name); int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); /** - * Attach a new Ethernet device specified by aruguments. + * Attach a new Ethernet device specified by arguments. * * @param devargs * A pointer to a strings array describing the new device @@ -1796,78 +1865,6 @@ int rte_eth_dev_attach(const char *devargs, uint8_t *port_id); */ int rte_eth_dev_detach(uint8_t port_id, char *devname); -struct eth_driver; -/** - * @internal - * Initialization function of an Ethernet driver invoked for each matching - * Ethernet PCI device detected during the PCI probing phase. - * - * @param eth_dev - * The *eth_dev* pointer is the address of the *rte_eth_dev* structure - * associated with the matching device and which have been [automatically] - * allocated in the *rte_eth_devices* array. - * The *eth_dev* structure is supplied to the driver initialization function - * with the following fields already initialized: - * - * - *pci_dev*: Holds the pointers to the *rte_pci_device* structure which - * contains the generic PCI information of the matching device. - * - * - *driver*: Holds the pointer to the *eth_driver* structure. - * - * - *dev_private*: Holds a pointer to the device private data structure. - * - * - *mtu*: Contains the default Ethernet maximum frame length (1500). - * - * - *port_id*: Contains the port index of the device (actually the index - * of the *eth_dev* structure in the *rte_eth_devices* array). - * - * @return - * - 0: Success, the device is properly initialized by the driver. - * In particular, the driver MUST have set up the *dev_ops* pointer - * of the *eth_dev* structure. - * - <0: Error code of the device initialization failure. - */ -typedef int (*eth_dev_init_t)(struct rte_eth_dev *eth_dev); - -/** - * @internal - * Finalization function of an Ethernet driver invoked for each matching - * Ethernet PCI device detected during the PCI closing phase. - * - * @param eth_dev - * The *eth_dev* pointer is the address of the *rte_eth_dev* structure - * associated with the matching device and which have been [automatically] - * allocated in the *rte_eth_devices* array. - * @return - * - 0: Success, the device is properly finalized by the driver. - * In particular, the driver MUST free the *dev_ops* pointer - * of the *eth_dev* structure. - * - <0: Error code of the device initialization failure. - */ -typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev); - -/** - * @internal - * The structure associated with a PMD Ethernet driver. - * - * Each Ethernet driver acts as a PCI driver and is represented by a generic - * *eth_driver* structure that holds: - * - * - An *rte_pci_driver* structure (which must be the first field). - * - * - The *eth_dev_init* function invoked for each matching PCI device. - * - * - The *eth_dev_uninit* function invoked for each matching PCI device. - * - * - The size of the private data to allocate for each matching device. - */ -struct eth_driver { - struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ - eth_dev_init_t eth_dev_init; /**< Device init function. */ - eth_dev_uninit_t eth_dev_uninit; /**< Device uninit function. */ - unsigned int dev_private_size; /**< Size of device private data. */ -}; - /** * Convert a numerical speed in Mbps to a bitmap flag that can be used in * the bitmap link_speeds of the struct rte_eth_conf @@ -1914,6 +1911,19 @@ int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue, uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); /** + * @internal + * Release device queues and clear its configuration to force the user + * application to reconfigure it. It is for internal use only. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * void + */ +void _rte_eth_dev_reset(struct rte_eth_dev *dev); + +/** * Allocate and set up a receive queue for an Ethernet device. * * The function allocates a contiguous block of memory for *nb_rx_desc* @@ -2272,22 +2282,23 @@ void rte_eth_stats_reset(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. * @param xstats_names - * Block of memory to insert names into. Must be at least size in capacity. - * If set to NULL, function returns required capacity. + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. * @param size - * Capacity of xstats_names (number of names). + * The size of the xstats_names array (number of elements). * @return - * - positive value lower or equal to size: success. The return value + * - A positive value lower or equal to size: success. The return value * is the number of entries filled in the stats table. - * - positive value higher than size: error, the given statistics table + * - A positive value higher than size: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. - * - negative value on error (invalid port id) + * - A negative value on error (invalid port id). */ int rte_eth_xstats_get_names(uint8_t port_id, struct rte_eth_xstat_name *xstats_names, - unsigned size); + unsigned int size); /** * Retrieve extended statistics of an Ethernet device. @@ -2296,22 +2307,96 @@ int rte_eth_xstats_get_names(uint8_t port_id, * The port identifier of the Ethernet device. * @param xstats * A pointer to a table of structure of type *rte_eth_xstat* - * to be filled with device statistics ids and values. + * to be filled with device statistics ids and values: id is the + * index of the name string in xstats_names (see rte_eth_xstats_get_names()), + * and value is the statistic counter. * This parameter can be set to NULL if n is 0. * @param n - * The size of the stats table, which should be large enough to store - * all the statistics of the device. + * The size of the xstats array (number of elements). * @return - * - positive value lower or equal to n: success. The return value + * - A positive value lower or equal to n: success. The return value * is the number of entries filled in the stats table. - * - positive value higher than n: error, the given statistics table + * - A positive value higher than n: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. - * - negative value on error (invalid port id) + * - A negative value on error (invalid port id). */ int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, - unsigned n); + unsigned int n); + +/** + * Retrieve names of extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats_names + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. + * @param ids + * IDs array given by app to retrieve specific statistics + * @param size + * The size of the xstats_names array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int +rte_eth_xstats_get_names_by_id(uint8_t port_id, + struct rte_eth_xstat_name *xstats_names, unsigned int size, + uint64_t *ids); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param ids + * A pointer to an ids array passed by application. This tells wich + * statistics values function should retrieve. This parameter + * can be set to NULL if n is 0. In this case function will retrieve + * all avalible statistics. + * @param values + * A pointer to a table to be filled with device statistics values. + * @param n + * The size of the ids array (number of elements). + * @return + * - A positive value lower or equal to n: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than n: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int n); + +/** + * Gets the ID of a statistic from its name. + * + * This function searches for the statistics using string compares, and + * as such should not be used on the fast-path. For fast-path retrieval of + * specific statistics, store the ID as provided in *id* from this function, + * and pass the ID to rte_eth_xstats_get() + * + * @param port_id The port to look up statistics from + * @param xstat_name The name of the statistic to return + * @param[out] id A pointer to an app-supplied uint64_t which should be + * set to the ID of the stat if the stat exists. + * @return + * 0 on success + * -ENODEV for invalid port_id, + * -EINVAL if the xstat_name doesn't exist in port_id + */ +int rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name, + uint64_t *id); /** * Reset extended statistics of an Ethernet device. @@ -2385,6 +2470,27 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); /** + * Retrieve the firmware version of a device. + * + * @param port_id + * The port identifier of the device. + * @param fw_version + * A pointer to a string array storing the firmware version of a device, + * the string includes terminating null. This pointer is allocated by caller. + * @param fw_size + * The size of the string array pointed by fw_version, which should be + * large enough to store firmware version of the device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (>0) if *fw_size* is not enough to store firmware version, return + * the size of the non truncated string. + */ +int rte_eth_dev_fw_version_get(uint8_t port_id, + char *fw_version, size_t fw_size); + +/** * Retrieve the supported packet types of an Ethernet device. * * When a packet type is announced as supported, it *must* be recognized by @@ -2413,7 +2519,7 @@ void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); * @param ptype_mask * A hint of what kind of packet type which the caller is interested in. * @param ptypes - * An array pointer to store adequent packet types, allocated by caller. + * An array pointer to store adequate packet types, allocated by caller. * @param num * Size of the array pointed by param ptypes. * @return @@ -2553,12 +2659,12 @@ int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask); int rte_eth_dev_get_vlan_offload(uint8_t port_id); /** - * Set port based TX VLAN insersion on or off. + * Set port based TX VLAN insertion on or off. * * @param port_id * The port identifier of the Ethernet device. * @param pvid - * Port based TX VLAN identifier togeth with user priority. + * Port based TX VLAN identifier together with user priority. * @param on * Turn on or off the port based TX VLAN insertion. * @@ -2615,7 +2721,7 @@ int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on); * method to retrieve bursts of received packets and to immediately * queue them for further parallel processing by another logical core, * for instance. However, instead of having received packets being - * individually queued by the driver, this approach allows the invoker + * individually queued by the driver, this approach allows the caller * of the rte_eth_rx_burst() function to queue a burst of retrieved * packets at a time and therefore dramatically reduce the cost of * enqueue/dequeue operations per packet. @@ -2684,7 +2790,7 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, } /** - * Get the number of used descriptors in a specific queue + * Get the number of used descriptors of a rx queue * * @param port_id * The port identifier of the Ethernet device. @@ -2692,16 +2798,21 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, * The queue id on the specific port. * @return * The number of used descriptors in the specific queue, or: - * (-EINVAL) if *port_id* is invalid + * (-EINVAL) if *port_id* or *queue_id* is invalid * (-ENOTSUP) if the device does not support this function */ static inline int rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); - return (*dev->dev_ops->rx_queue_count)(dev, queue_id); + if (queue_id >= dev->data->nb_rx_queues) + return -EINVAL; + + return (*dev->dev_ops->rx_queue_count)(dev, queue_id); } /** @@ -2729,6 +2840,121 @@ rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset) dev->data->rx_queues[queue_id], offset); } +#define RTE_ETH_RX_DESC_AVAIL 0 /**< Desc available for hw. */ +#define RTE_ETH_RX_DESC_DONE 1 /**< Desc done, filled by hw. */ +#define RTE_ETH_RX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ + +/** + * Check the status of a Rx descriptor in the queue + * + * It should be called in a similar context than the Rx function: + * - on a dataplane core + * - not concurrently on the same queue + * + * Since it's a dataplane function, no check is performed on port_id and + * queue_id. The caller must therefore ensure that the port is enabled + * and the queue is configured and running. + * + * Note: accessing to a random descriptor in the ring may trigger cache + * misses and have a performance impact. + * + * @param port_id + * A valid port identifier of the Ethernet device which. + * @param queue_id + * A valid Rx queue identifier on this port. + * @param offset + * The offset of the descriptor starting from tail (0 is the next + * packet to be received by the driver). + * + * @return + * - (RTE_ETH_RX_DESC_AVAIL): Descriptor is available for the hardware to + * receive a packet. + * - (RTE_ETH_RX_DESC_DONE): Descriptor is done, it is filled by hw, but + * not yet processed by the driver (i.e. in the receive queue). + * - (RTE_ETH_RX_DESC_UNAVAIL): Descriptor is unavailable, either hold by + * the driver and not yet returned to hw, or reserved by the hw. + * - (-EINVAL) bad descriptor offset. + * - (-ENOTSUP) if the device does not support this function. + * - (-ENODEV) bad port or queue (only if compiled with debug). + */ +static inline int +rte_eth_rx_descriptor_status(uint8_t port_id, uint16_t queue_id, + uint16_t offset) +{ + struct rte_eth_dev *dev; + void *rxq; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +#endif + dev = &rte_eth_devices[port_id]; +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_rx_queues) + return -ENODEV; +#endif + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_status, -ENOTSUP); + rxq = dev->data->rx_queues[queue_id]; + + return (*dev->dev_ops->rx_descriptor_status)(rxq, offset); +} + +#define RTE_ETH_TX_DESC_FULL 0 /**< Desc filled for hw, waiting xmit. */ +#define RTE_ETH_TX_DESC_DONE 1 /**< Desc done, packet is transmitted. */ +#define RTE_ETH_TX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ + +/** + * Check the status of a Tx descriptor in the queue. + * + * It should be called in a similar context than the Tx function: + * - on a dataplane core + * - not concurrently on the same queue + * + * Since it's a dataplane function, no check is performed on port_id and + * queue_id. The caller must therefore ensure that the port is enabled + * and the queue is configured and running. + * + * Note: accessing to a random descriptor in the ring may trigger cache + * misses and have a performance impact. + * + * @param port_id + * A valid port identifier of the Ethernet device which. + * @param queue_id + * A valid Tx queue identifier on this port. + * @param offset + * The offset of the descriptor starting from tail (0 is the place where + * the next packet will be send). + * + * @return + * - (RTE_ETH_TX_DESC_FULL) Descriptor is being processed by the hw, i.e. + * in the transmit queue. + * - (RTE_ETH_TX_DESC_DONE) Hardware is done with this descriptor, it can + * be reused by the driver. + * - (RTE_ETH_TX_DESC_UNAVAIL): Descriptor is unavailable, reserved by the + * driver or the hardware. + * - (-EINVAL) bad descriptor offset. + * - (-ENOTSUP) if the device does not support this function. + * - (-ENODEV) bad port or queue (only if compiled with debug). + */ +static inline int rte_eth_tx_descriptor_status(uint8_t port_id, + uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev; + void *txq; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +#endif + dev = &rte_eth_devices[port_id]; +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) + return -ENODEV; +#endif + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_descriptor_status, -ENOTSUP); + txq = dev->data->tx_queues[queue_id]; + + return (*dev->dev_ops->tx_descriptor_status)(txq, offset); +} + /** * Send a burst of output packets on a transmit queue of an Ethernet device. * @@ -2819,6 +3045,115 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prepare() function is invoked to prepare output packets to be + * transmitted on the output queue *queue_id* of the Ethernet device designated + * by its *port_id*. + * The *nb_pkts* parameter is the number of packets to be prepared which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * For each packet to send, the rte_eth_tx_prepare() function performs + * the following operations: + * + * - Check if packet meets devices requirements for tx offloads. + * + * - Check limitations about number of segments. + * + * - Check additional requirements when debug is enabled. + * + * - Update and/or reset required checksums when tx offload is set for packet. + * + * Since this function can modify packet data, provided mbufs must be safely + * writable (e.g. modified data cannot be in shared segment). + * + * The rte_eth_tx_prepare() function returns the number of packets ready to be + * sent. A return value equal to *nb_pkts* means that all packets are valid and + * ready to be sent, otherwise stops processing on the first invalid packet and + * leaves the rest packets untouched. + * + * When this functionality is not implemented in the driver, all packets are + * are returned untouched. + * + * @param port_id + * The port identifier of the Ethernet device. + * The value must be a valid port id. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to process. + * @return + * The number of packets correct and ready to be sent. The return value can be + * less than the value of the *tx_pkts* parameter when some packet doesn't + * meet devices requirements with rte_errno set appropriately: + * - -EINVAL: offload flags are not correctly set + * - -ENOTSUP: the offload feature is not supported by the hardware + * + */ + +#ifndef RTE_ETHDEV_TX_PREPARE_NOOP + +static inline uint16_t +rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + if (!dev->tx_pkt_prepare) + return nb_pkts; + + return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id], + tx_pkts, nb_pkts); +} + +#else + +/* + * Native NOOP operation for compilation targets which doesn't require any + * preparations steps, and functional NOOP may introduce unnecessary performance + * drop. + * + * Generally this is not a good idea to turn it on globally and didn't should + * be used if behavior of tx_preparation can change. + */ + +static inline uint16_t +rte_eth_tx_prepare(__rte_unused uint8_t port_id, __rte_unused uint16_t queue_id, + __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + return nb_pkts; +} + +#endif + typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, void *userdata); @@ -3024,6 +3359,33 @@ rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, void *userdata); /** + * Request the driver to free mbufs currently cached by the driver. The + * driver will only free the mbuf if it is no longer in use. It is the + * application's responsibity to ensure rte_eth_tx_buffer_flush(..) is + * called if needed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param free_cnt + * Maximum number of packets to free. Use 0 to indicate all possible packets + * should be freed. Note that a packet may be using multiple mbufs. + * @return + * Failure: < 0 + * -ENODEV: Invalid interface + * -ENOTSUP: Driver does not support function + * Success: >= 0 + * 0-n: Number of packets freed. More packets may still remain in ring that + * are in use. + */ +int +rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt); + +/** * The eth device event type for interrupt, and maybe others in the future. */ enum rte_eth_event_type { @@ -3034,6 +3396,8 @@ enum rte_eth_event_type { RTE_ETH_EVENT_INTR_RESET, /**< reset interrupt event, sent to VF on PF reset */ RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ + RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */ + RTE_ETH_EVENT_INTR_RMV, /**< device removal event */ RTE_ETH_EVENT_MAX /**< max value of this enum */ }; @@ -3112,7 +3476,7 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, /** * When there is no rx packet coming in Rx Queue for a long time, we can * sleep lcore related to RX Queue for power saving, and enable rx interrupt - * to be triggered when rx packect arrives. + * to be triggered when Rx packet arrives. * * The rte_eth_dev_rx_intr_enable() function enables rx queue * interrupt on specific rx queue of a port. @@ -3403,93 +3767,6 @@ int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, */ int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); - /** - * Set RX L2 Filtering mode of a VF of an Ethernet device. - * - * @param port - * The port identifier of the Ethernet device. - * @param vf - * VF id. - * @param rx_mode - * The RX mode mask, which is one or more of accepting Untagged Packets, - * packets that match the PFUTA table, Broadcast and Multicast Promiscuous. - * ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, - * ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used - * in rx_mode. - * @param on - * 1 - Enable a VF RX mode. - * 0 - Disable a VF RX mode. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode, - uint8_t on); - -/** -* Enable or disable a VF traffic transmit of the Ethernet device. -* -* @param port -* The port identifier of the Ethernet device. -* @param vf -* VF id. -* @param on -* 1 - Enable a VF traffic transmit. -* 0 - Disable a VF traffic transmit. -* @return -* - (0) if successful. -* - (-ENODEV) if *port_id* invalid. -* - (-ENOTSUP) if hardware doesn't support. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on); - -/** -* Enable or disable a VF traffic receive of an Ethernet device. -* -* @param port -* The port identifier of the Ethernet device. -* @param vf -* VF id. -* @param on -* 1 - Enable a VF traffic receive. -* 0 - Disable a VF traffic receive. -* @return -* - (0) if successful. -* - (-ENOTSUP) if hardware doesn't support. -* - (-ENODEV) if *port_id* invalid. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on); - -/** -* Enable/Disable hardware VF VLAN filtering by an Ethernet device of -* received VLAN packets tagged with a given VLAN Tag Identifier. -* -* @param port id -* The port identifier of the Ethernet device. -* @param vlan_id -* The VLAN Tag Identifier whose filtering must be enabled or disabled. -* @param vf_mask -* Bitmap listing which VFs participate in the VLAN filtering. -* @param vlan_on -* 1 - Enable VFs VLAN filtering. -* 0 - Disable VFs VLAN filtering. -* @return -* - (0) if successful. -* - (-ENOTSUP) if hardware doesn't support. -* - (-ENODEV) if *port_id* invalid. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, - uint64_t vf_mask, - uint8_t vlan_on); - /** * Set a traffic mirroring rule on an Ethernet device * @@ -3551,26 +3828,6 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate); /** - * Set the rate limitation for a vf on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param vf - * VF id. - * @param tx_rate - * The tx rate allocated from the total link speed for this VF id. - * @param q_msk - * The queue mask which need to set the rate. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port_id* invalid. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk); - -/** * Initialize bypass logic. This function needs to be called before * executing any other bypass API. * @@ -3773,7 +4030,7 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id, * The packets with this UDP port will be identified as this type of tunnel. * Before enabling any offloading function for a tunnel, users can call this API * to change or add more UDP port for the tunnel. So the offloading function - * can take effect on the packets with the sepcific UDP port. + * can take effect on the packets with the specific UDP port. * * @param port_id * The port identifier of the Ethernet device. @@ -3795,7 +4052,7 @@ rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, * any more. * Before enabling any offloading function for a tunnel, users can call this API * to delete a UDP port for the tunnel. So the offloading function will not take - * effect on the packets with the sepcific UDP port. + * effect on the packets with the specific UDP port. * * @param port_id * The port identifier of the Ethernet device. @@ -3889,31 +4146,31 @@ int rte_eth_dev_get_dcb_info(uint8_t port_id, void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, rte_rx_callback_fn fn, void *user_param); -/* -* Add a callback that must be called first on packet RX on a given port -* and queue. -* -* This API configures a first function to be called for each burst of -* packets received on a given NIC port queue. The return value is a pointer -* that can be used to later remove the callback using -* rte_eth_remove_rx_callback(). -* -* Multiple functions are called in the order that they are added. -* -* @param port_id -* The port identifier of the Ethernet device. -* @param queue_id -* The queue on the Ethernet device on which the callback is to be added. -* @param fn -* The callback function -* @param user_param -* A generic pointer parameter which will be passed to each invocation of the -* callback function on this port and queue. -* -* @return -* NULL on error. -* On success, a pointer value which can later be used to remove the callback. -*/ +/** + * Add a callback that must be called first on packet RX on a given port + * and queue. + * + * This API configures a first function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ void *rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id, rte_rx_callback_fn fn, void *user_param); @@ -4251,20 +4508,6 @@ int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time); int rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *time); /** - * Copy pci device info to the Ethernet device data. - * - * @param eth_dev - * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. - * @param pci_dev - * The *pci_dev* pointer is the address of the *rte_pci_device* structure. - * - * @return - * - 0 on success, negative on error - */ -void rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, - struct rte_pci_device *pci_dev); - -/** * Create memzone for HW rings. * malloc can't be used as the physical address is needed. * If the memzone is already created, then this function returns a ptr @@ -4336,7 +4579,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, uint8_t en); /** -* Get the port id from pci adrress or device name +* Get the port id from pci address or device name * Ex: 0000:2:00.0 or vdev name net_pcap0 * * @param name @@ -4364,21 +4607,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); -/** - * @internal - * Wrapper for use by pci drivers as a .probe function to attach to a ethdev - * interface. - */ -int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev); - -/** - * @internal - * Wrapper for use by pci drivers as a .remove function to detach a ethdev - * interface. - */ -int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev); - #ifdef __cplusplus } #endif diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h new file mode 100644 index 00000000..d3bc03cf --- /dev/null +++ b/lib/librte_ether/rte_ethdev_pci.h @@ -0,0 +1,193 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Brocade Communications Systems, Inc. + * Author: Jan Blunck <jblunck@infradead.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_PCI_H_ +#define _RTE_ETHDEV_PCI_H_ + +#include <rte_malloc.h> +#include <rte_pci.h> +#include <rte_ethdev.h> + +/** + * Copy pci device info to the Ethernet device data. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @param pci_dev + * The *pci_dev* pointer is the address of the *rte_pci_device* structure. + * + * @return + * - 0 on success, negative on error + */ +static inline void +rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, + struct rte_pci_device *pci_dev) +{ + if ((eth_dev == NULL) || (pci_dev == NULL)) { + RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n", + eth_dev, pci_dev); + return; + } + + eth_dev->intr_handle = &pci_dev->intr_handle; + + eth_dev->data->dev_flags = 0; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_RMV) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; + + eth_dev->data->kdrv = pci_dev->kdrv; + eth_dev->data->numa_node = pci_dev->device.numa_node; + eth_dev->data->drv_name = pci_dev->driver->driver.name; +} + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param dev + * Pointer to the PCI device + * + * @param private_data_size + * Size of private data structure + * + * @return + * A pointer to a rte_eth_dev or NULL if allocation failed. + */ +static inline struct rte_eth_dev * +rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name; + + if (!dev) + return NULL; + + name = dev->device.name; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) + return NULL; + + if (private_data_size) { + eth_dev->data->dev_private = rte_zmalloc_socket(name, + private_data_size, RTE_CACHE_LINE_SIZE, + dev->device.numa_node); + if (!eth_dev->data->dev_private) { + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + } else { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) + return NULL; + } + + eth_dev->device = &dev->device; + eth_dev->intr_handle = &dev->intr_handle; + rte_eth_copy_pci_info(eth_dev, dev); + return eth_dev; +} + +static inline void +rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) +{ + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->data->dev_private = NULL; + + eth_dev->device = NULL; + eth_dev->intr_handle = NULL; +} + +typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Wrapper for use by pci drivers in a .probe function to attach to a ethdev + * interface. + */ +static inline int +rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev, + size_t private_data_size, eth_dev_pci_callback_t dev_init) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size); + if (!eth_dev) + return -ENOMEM; + + RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL); + ret = dev_init(eth_dev); + if (ret) + rte_eth_dev_pci_release(eth_dev); + + return ret; +} + +/** + * @internal + * Wrapper for use by pci drivers in a .remove function to detach a ethdev + * interface. + */ +static inline int +rte_eth_dev_pci_generic_remove(struct rte_pci_device *pci_dev, + eth_dev_pci_callback_t dev_uninit) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_allocated(pci_dev->device.name); + if (!eth_dev) + return -ENODEV; + + if (dev_uninit) { + ret = dev_uninit(eth_dev); + if (ret) + return ret; + } + + rte_eth_dev_pci_release(eth_dev); + return 0; +} + +#endif /* _RTE_ETHDEV_PCI_H_ */ diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ether/rte_ethdev_vdev.h new file mode 100644 index 00000000..fa2cb61e --- /dev/null +++ b/lib/librte_ether/rte_ethdev_vdev.h @@ -0,0 +1,84 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Brocade Communications Systems, Inc. + * Author: Jan Blunck <jblunck@infradead.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_VDEV_H_ +#define _RTE_ETHDEV_VDEV_H_ + +#include <rte_malloc.h> +#include <rte_vdev.h> +#include <rte_ethdev.h> + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param dev + * Pointer to virtual device + * + * @param private_data_size + * Size of private data structure + * + * @return + * A pointer to a rte_eth_dev or NULL if allocation failed. + */ +static inline struct rte_eth_dev * +rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name = rte_vdev_device_name(dev); + + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) + return NULL; + + if (private_data_size) { + eth_dev->data->dev_private = rte_zmalloc_socket(name, + private_data_size, RTE_CACHE_LINE_SIZE, + dev->device.numa_node); + if (!eth_dev->data->dev_private) { + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + + eth_dev->device = &dev->device; + eth_dev->intr_handle = NULL; + + eth_dev->data->kdrv = RTE_KDRV_NONE; + eth_dev->data->numa_node = dev->device.numa_node; + eth_dev->data->drv_name = dev->device.driver->name; + return eth_dev; +} + +#endif /* _RTE_ETHDEV_VDEV_H_ */ diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map index fd622635..d6726bb1 100644 --- a/lib/librte_ether/rte_ether_version.map +++ b/lib/librte_ether/rte_ether_version.map @@ -7,7 +7,6 @@ DPDK_2.2 { rte_eth_allmulticast_disable; rte_eth_allmulticast_enable; rte_eth_allmulticast_get; - rte_eth_copy_pci_info; rte_eth_dev_allocate; rte_eth_dev_allocated; rte_eth_dev_attach; @@ -60,10 +59,6 @@ DPDK_2.2 { rte_eth_dev_set_mtu; rte_eth_dev_set_rx_queue_stats_mapping; rte_eth_dev_set_tx_queue_stats_mapping; - rte_eth_dev_set_vf_rx; - rte_eth_dev_set_vf_rxmode; - rte_eth_dev_set_vf_tx; - rte_eth_dev_set_vf_vlan_filter; rte_eth_dev_set_vlan_offload; rte_eth_dev_set_vlan_pvid; rte_eth_dev_set_vlan_strip_on_queue; @@ -93,7 +88,6 @@ DPDK_2.2 { rte_eth_rx_queue_info_get; rte_eth_rx_queue_setup; rte_eth_set_queue_rate_limit; - rte_eth_set_vf_rate_limit; rte_eth_stats; rte_eth_stats_get; rte_eth_stats_reset; @@ -139,10 +133,26 @@ DPDK_16.07 { } DPDK_16.04; -DPDK_16.11 { +DPDK_17.02 { global: - rte_eth_dev_pci_probe; - rte_eth_dev_pci_remove; + _rte_eth_dev_reset; + rte_eth_dev_fw_version_get; + rte_flow_create; + rte_flow_destroy; + rte_flow_flush; + rte_flow_query; + rte_flow_validate; } DPDK_16.07; + +DPDK_17.05 { + global: + + rte_eth_dev_attach_secondary; + rte_eth_find_next; + rte_eth_xstats_get_by_id; + rte_eth_xstats_get_id_by_name; + rte_eth_xstats_get_names_by_id; + +} DPDK_17.02; diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c new file mode 100644 index 00000000..aaa70d68 --- /dev/null +++ b/lib/librte_ether/rte_flow.c @@ -0,0 +1,159 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include <rte_branch_prediction.h> +#include "rte_ethdev.h" +#include "rte_flow_driver.h" +#include "rte_flow.h" + +/* Get generic flow operations structure from a port. */ +const struct rte_flow_ops * +rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops; + int code; + + if (unlikely(!rte_eth_dev_is_valid_port(port_id))) + code = ENODEV; + else if (unlikely(!dev->dev_ops->filter_ctrl || + dev->dev_ops->filter_ctrl(dev, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_GET, + &ops) || + !ops)) + code = ENOSYS; + else + return ops; + rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(code)); + return NULL; +} + +/* Check whether a flow rule can be created on a given port. */ +int +rte_flow_validate(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->validate)) + return ops->validate(dev, attr, pattern, actions, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Create a flow rule on a given port. */ +struct rte_flow * +rte_flow_create(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return NULL; + if (likely(!!ops->create)) + return ops->create(dev, attr, pattern, actions, error); + rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); + return NULL; +} + +/* Destroy a flow rule on a given port. */ +int +rte_flow_destroy(uint8_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->destroy)) + return ops->destroy(dev, flow, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Destroy all flow rules associated with a port. */ +int +rte_flow_flush(uint8_t port_id, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->flush)) + return ops->flush(dev, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Query an existing flow rule. */ +int +rte_flow_query(uint8_t port_id, + struct rte_flow *flow, + enum rte_flow_action_type action, + void *data, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->query)) + return ops->query(dev, flow, action, data, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h new file mode 100644 index 00000000..c47edbc9 --- /dev/null +++ b/lib/librte_ether/rte_flow.h @@ -0,0 +1,1198 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_FLOW_H_ +#define RTE_FLOW_H_ + +/** + * @file + * RTE generic flow API + * + * This interface provides the ability to program packet matching and + * associated actions in hardware through flow rules. + */ + +#include <rte_arp.h> +#include <rte_ether.h> +#include <rte_icmp.h> +#include <rte_ip.h> +#include <rte_sctp.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_byteorder.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Flow rule attributes. + * + * Priorities are set on two levels: per group and per rule within groups. + * + * Lower values denote higher priority, the highest priority for both levels + * is 0, so that a rule with priority 0 in group 8 is always matched after a + * rule with priority 8 in group 0. + * + * Although optional, applications are encouraged to group similar rules as + * much as possible to fully take advantage of hardware capabilities + * (e.g. optimized matching) and work around limitations (e.g. a single + * pattern type possibly allowed in a given group). + * + * Group and priority levels are arbitrary and up to the application, they + * do not need to be contiguous nor start from 0, however the maximum number + * varies between devices and may be affected by existing flow rules. + * + * If a packet is matched by several rules of a given group for a given + * priority level, the outcome is undefined. It can take any path, may be + * duplicated or even cause unrecoverable errors. + * + * Note that support for more than a single group and priority level is not + * guaranteed. + * + * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). + * + * Several pattern items and actions are valid and can be used in both + * directions. Those valid for only one direction are described as such. + * + * At least one direction must be specified. + * + * Specifying both directions at once for a given rule is not recommended + * but may be valid in a few cases (e.g. shared counter). + */ +struct rte_flow_attr { + uint32_t group; /**< Priority group. */ + uint32_t priority; /**< Priority level within group. */ + uint32_t ingress:1; /**< Rule applies to ingress traffic. */ + uint32_t egress:1; /**< Rule applies to egress traffic. */ + uint32_t reserved:30; /**< Reserved, must be zero. */ +}; + +/** + * Matching pattern item types. + * + * Pattern items fall in two categories: + * + * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4, + * IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a + * specification structure. These must be stacked in the same order as the + * protocol layers to match, starting from the lowest. + * + * - Matching meta-data or affecting pattern processing (END, VOID, INVERT, + * PF, VF, PORT and so on), often without a specification structure. Since + * they do not match packet contents, these can be specified anywhere + * within item lists without affecting others. + * + * See the description of individual types for more information. Those + * marked with [META] fall into the second category. + */ +enum rte_flow_item_type { + /** + * [META] + * + * End marker for item lists. Prevents further processing of items, + * thereby ending the pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_VOID, + + /** + * [META] + * + * Inverted matching, i.e. process packets that do not match the + * pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_INVERT, + + /** + * Matches any protocol in place of the current layer, a single ANY + * may also stand for several protocol layers. + * + * See struct rte_flow_item_any. + */ + RTE_FLOW_ITEM_TYPE_ANY, + + /** + * [META] + * + * Matches packets addressed to the physical function of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item + * prevents it from reaching that device unless the flow rule + * contains a PF action. Packets are not duplicated between device + * instances by default. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_PF, + + /** + * [META] + * + * Matches packets addressed to a virtual function ID of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item + * prevents it from reaching that device unless the flow rule + * contains a VF action. Packets are not duplicated between device + * instances by default. + * + * See struct rte_flow_item_vf. + */ + RTE_FLOW_ITEM_TYPE_VF, + + /** + * [META] + * + * Matches packets coming from the specified physical port of the + * underlying device. + * + * The first PORT item overrides the physical port normally + * associated with the specified DPDK input port (port_id). This + * item can be provided several times to match additional physical + * ports. + * + * See struct rte_flow_item_port. + */ + RTE_FLOW_ITEM_TYPE_PORT, + + /** + * Matches a byte string of a given length at a given offset. + * + * See struct rte_flow_item_raw. + */ + RTE_FLOW_ITEM_TYPE_RAW, + + /** + * Matches an Ethernet header. + * + * See struct rte_flow_item_eth. + */ + RTE_FLOW_ITEM_TYPE_ETH, + + /** + * Matches an 802.1Q/ad VLAN tag. + * + * See struct rte_flow_item_vlan. + */ + RTE_FLOW_ITEM_TYPE_VLAN, + + /** + * Matches an IPv4 header. + * + * See struct rte_flow_item_ipv4. + */ + RTE_FLOW_ITEM_TYPE_IPV4, + + /** + * Matches an IPv6 header. + * + * See struct rte_flow_item_ipv6. + */ + RTE_FLOW_ITEM_TYPE_IPV6, + + /** + * Matches an ICMP header. + * + * See struct rte_flow_item_icmp. + */ + RTE_FLOW_ITEM_TYPE_ICMP, + + /** + * Matches a UDP header. + * + * See struct rte_flow_item_udp. + */ + RTE_FLOW_ITEM_TYPE_UDP, + + /** + * Matches a TCP header. + * + * See struct rte_flow_item_tcp. + */ + RTE_FLOW_ITEM_TYPE_TCP, + + /** + * Matches a SCTP header. + * + * See struct rte_flow_item_sctp. + */ + RTE_FLOW_ITEM_TYPE_SCTP, + + /** + * Matches a VXLAN header. + * + * See struct rte_flow_item_vxlan. + */ + RTE_FLOW_ITEM_TYPE_VXLAN, + + /** + * Matches a E_TAG header. + * + * See struct rte_flow_item_e_tag. + */ + RTE_FLOW_ITEM_TYPE_E_TAG, + + /** + * Matches a NVGRE header. + * + * See struct rte_flow_item_nvgre. + */ + RTE_FLOW_ITEM_TYPE_NVGRE, + + /** + * Matches a MPLS header. + * + * See struct rte_flow_item_mpls. + */ + RTE_FLOW_ITEM_TYPE_MPLS, + + /** + * Matches a GRE header. + * + * See struct rte_flow_item_gre. + */ + RTE_FLOW_ITEM_TYPE_GRE, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ANY + * + * Matches any protocol in place of the current layer, a single ANY may also + * stand for several protocol layers. + * + * This is usually specified as the first pattern item when looking for a + * protocol anywhere in a packet. + * + * A zeroed mask stands for any number of layers. + */ +struct rte_flow_item_any { + uint32_t num; /**< Number of layers covered. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ +#ifndef __cplusplus +static const struct rte_flow_item_any rte_flow_item_any_mask = { + .num = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VF + * + * Matches packets addressed to a virtual function ID of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item prevents it + * from reaching that device unless the flow rule contains a VF + * action. Packets are not duplicated between device instances by default. + * + * - Likely to return an error or never match any traffic if this causes a + * VF device to match traffic addressed to a different VF. + * - Can be specified multiple times to match traffic addressed to several + * VF IDs. + * - Can be combined with a PF item to match both PF and VF traffic. + * + * A zeroed mask can be used to match any VF ID. + */ +struct rte_flow_item_vf { + uint32_t id; /**< Destination VF ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ +#ifndef __cplusplus +static const struct rte_flow_item_vf rte_flow_item_vf_mask = { + .id = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_PORT + * + * Matches packets coming from the specified physical port of the underlying + * device. + * + * The first PORT item overrides the physical port normally associated with + * the specified DPDK input port (port_id). This item can be provided + * several times to match additional physical ports. + * + * Note that physical ports are not necessarily tied to DPDK input ports + * (port_id) when those are not under DPDK control. Possible values are + * specific to each device, they are not necessarily indexed from zero and + * may not be contiguous. + * + * As a device property, the list of allowed values as well as the value + * associated with a port_id should be retrieved by other means. + * + * A zeroed mask can be used to match any port index. + */ +struct rte_flow_item_port { + uint32_t index; /**< Physical port index. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */ +#ifndef __cplusplus +static const struct rte_flow_item_port rte_flow_item_port_mask = { + .index = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_RAW + * + * Matches a byte string of a given length at a given offset. + * + * Offset is either absolute (using the start of the packet) or relative to + * the end of the previous matched item in the stack, in which case negative + * values are allowed. + * + * If search is enabled, offset is used as the starting point. The search + * area can be delimited by setting limit to a nonzero value, which is the + * maximum number of bytes after offset where the pattern may start. + * + * Matching a zero-length pattern is allowed, doing so resets the relative + * offset for subsequent items. + * + * This type does not support ranges (struct rte_flow_item.last). + */ +struct rte_flow_item_raw { + uint32_t relative:1; /**< Look for pattern after the previous item. */ + uint32_t search:1; /**< Search pattern from offset (see also limit). */ + uint32_t reserved:30; /**< Reserved, must be set to zero. */ + int32_t offset; /**< Absolute or relative offset for pattern. */ + uint16_t limit; /**< Search area limit for start of pattern. */ + uint16_t length; /**< Pattern length. */ + uint8_t pattern[]; /**< Byte string to look for. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ +#ifndef __cplusplus +static const struct rte_flow_item_raw rte_flow_item_raw_mask = { + .relative = 1, + .search = 1, + .reserved = 0x3fffffff, + .offset = 0xffffffff, + .limit = 0xffff, + .length = 0xffff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ETH + * + * Matches an Ethernet header. + */ +struct rte_flow_item_eth { + struct ether_addr dst; /**< Destination MAC. */ + struct ether_addr src; /**< Source MAC. */ + uint16_t type; /**< EtherType. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_eth rte_flow_item_eth_mask = { + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .type = 0x0000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VLAN + * + * Matches an 802.1Q/ad VLAN tag. + * + * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or + * RTE_FLOW_ITEM_TYPE_VLAN. + */ +struct rte_flow_item_vlan { + uint16_t tpid; /**< Tag protocol identifier. */ + uint16_t tci; /**< Tag control information. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ +#ifndef __cplusplus +static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { + .tpid = 0x0000, + .tci = 0xffff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV4 + * + * Matches an IPv4 header. + * + * Note: IPv4 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv4 { + struct ipv4_hdr hdr; /**< IPv4 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ +#ifndef __cplusplus +static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { + .hdr = { + .src_addr = 0xffffffff, + .dst_addr = 0xffffffff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV6. + * + * Matches an IPv6 header. + * + * Note: IPv6 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv6 { + struct ipv6_hdr hdr; /**< IPv6 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ +#ifndef __cplusplus +static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { + .hdr = { + .src_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .dst_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP. + * + * Matches an ICMP header. + */ +struct rte_flow_item_icmp { + struct icmp_hdr hdr; /**< ICMP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { + .hdr = { + .icmp_type = 0xff, + .icmp_code = 0xff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_UDP. + * + * Matches a UDP header. + */ +struct rte_flow_item_udp { + struct udp_hdr hdr; /**< UDP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ +#ifndef __cplusplus +static const struct rte_flow_item_udp rte_flow_item_udp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_TCP. + * + * Matches a TCP header. + */ +struct rte_flow_item_tcp { + struct tcp_hdr hdr; /**< TCP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ +#ifndef __cplusplus +static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_SCTP. + * + * Matches a SCTP header. + */ +struct rte_flow_item_sctp { + struct sctp_hdr hdr; /**< SCTP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ +#ifndef __cplusplus +static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN. + * + * Matches a VXLAN header (RFC 7348). + */ +struct rte_flow_item_vxlan { + uint8_t flags; /**< Normally 0x08 (I flag). */ + uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ +#ifndef __cplusplus +static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_E_TAG. + * + * Matches a E-tag header. + */ +struct rte_flow_item_e_tag { + uint16_t tpid; /**< Tag protocol identifier (0x893F). */ + /** + * E-Tag control information (E-TCI). + * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). + */ + uint16_t epcp_edei_in_ecid_b; + /** Reserved (2b), GRP (2b), E-CID base (12b). */ + uint16_t rsvd_grp_ecid_b; + uint8_t in_ecid_e; /**< Ingress E-CID ext. */ + uint8_t ecid_e; /**< E-CID ext. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */ +#ifndef __cplusplus +static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = { +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + .rsvd_grp_ecid_b = 0x3fff, +#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + .rsvd_grp_ecid_b = 0xff3f, +#else +#error Unsupported endianness. +#endif +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_NVGRE. + * + * Matches a NVGRE header. + */ +struct rte_flow_item_nvgre { + /** + * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), + * reserved 0 (9b), version (3b). + * + * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. + */ + uint16_t c_k_s_rsvd0_ver; + uint16_t protocol; /**< Protocol type (0x6558). */ + uint8_t tni[3]; /**< Virtual subnet ID. */ + uint8_t flow_id; /**< Flow ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_NVGRE. */ +#ifndef __cplusplus +static const struct rte_flow_item_nvgre rte_flow_item_nvgre_mask = { + .tni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_MPLS. + * + * Matches a MPLS header. + */ +struct rte_flow_item_mpls { + /** + * Label (20b), TC (3b), Bottom of Stack (1b). + */ + uint8_t label_tc_s[3]; + uint8_t ttl; /** Time-to-Live. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_MPLS. */ +#ifndef __cplusplus +static const struct rte_flow_item_mpls rte_flow_item_mpls_mask = { + .label_tc_s = "\xff\xff\xf0", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GRE. + * + * Matches a GRE header. + */ +struct rte_flow_item_gre { + /** + * Checksum (1b), reserved 0 (12b), version (3b). + * Refer to RFC 2784. + */ + uint16_t c_rsvd0_ver; + uint16_t protocol; /**< Protocol type. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */ +#ifndef __cplusplus +static const struct rte_flow_item_gre rte_flow_item_gre_mask = { + .protocol = 0xffff, +}; +#endif + +/** + * Matching pattern item definition. + * + * A pattern is formed by stacking items starting from the lowest protocol + * layer to match. This stacking restriction does not apply to meta items + * which can be placed anywhere in the stack without affecting the meaning + * of the resulting pattern. + * + * Patterns are terminated by END items. + * + * The spec field should be a valid pointer to a structure of the related + * item type. It may remain unspecified (NULL) in many cases to request + * broad (nonspecific) matching. In such cases, last and mask must also be + * set to NULL. + * + * Optionally, last can point to a structure of the same type to define an + * inclusive range. This is mostly supported by integer and address fields, + * may cause errors otherwise. Fields that do not support ranges must be set + * to 0 or to the same value as the corresponding fields in spec. + * + * Only the fields defined to nonzero values in the default masks (see + * rte_flow_item_{name}_mask constants) are considered relevant by + * default. This can be overridden by providing a mask structure of the + * same type with applicable bits set to one. It can also be used to + * partially filter out specific fields (e.g. as an alternate mean to match + * ranges of IP addresses). + * + * Mask is a simple bit-mask applied before interpreting the contents of + * spec and last, which may yield unexpected results if not used + * carefully. For example, if for an IPv4 address field, spec provides + * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the + * effective range becomes 10.1.0.0 to 10.3.255.255. + */ +struct rte_flow_item { + enum rte_flow_item_type type; /**< Item type. */ + const void *spec; /**< Pointer to item specification structure. */ + const void *last; /**< Defines an inclusive range (spec to last). */ + const void *mask; /**< Bit-mask applied to spec and last. */ +}; + +/** + * Action types. + * + * Each possible action is represented by a type. Some have associated + * configuration structures. Several actions combined in a list can be + * affected to a flow rule. That list is not ordered. + * + * They fall in three categories: + * + * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent + * processing matched packets by subsequent flow rules, unless overridden + * with PASSTHRU. + * + * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up + * for additional processing by subsequent flow rules. + * + * - Other non terminating meta actions that do not affect the fate of + * packets (END, VOID, MARK, FLAG, COUNT). + * + * When several actions are combined in a flow rule, they should all have + * different types (e.g. dropping a packet twice is not possible). + * + * Only the last action of a given type is taken into account. PMDs still + * perform error checking on the entire list. + * + * Note that PASSTHRU is the only action able to override a terminating + * rule. + */ +enum rte_flow_action_type { + /** + * [META] + * + * End marker for action lists. Prevents further processing of + * actions, thereby ending the list. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_VOID, + + /** + * Leaves packets up for additional processing by subsequent flow + * rules. This is the default when a rule does not contain a + * terminating action, but can be specified to force a rule to + * become non-terminating. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PASSTHRU, + + /** + * [META] + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * See struct rte_flow_action_mark. + */ + RTE_FLOW_ACTION_TYPE_MARK, + + /** + * [META] + * + * Flags packets. Similar to MARK without a specific value; only + * sets the PKT_RX_FDIR mbuf flag. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_FLAG, + + /** + * Assigns packets to a given queue index. + * + * See struct rte_flow_action_queue. + */ + RTE_FLOW_ACTION_TYPE_QUEUE, + + /** + * Drops packets. + * + * PASSTHRU overrides this action if both are specified. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_DROP, + + /** + * [META] + * + * Enables counters for this rule. + * + * These counters can be retrieved and reset through rte_flow_query(), + * see struct rte_flow_query_count. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_COUNT, + + /** + * Duplicates packets to a given queue index. + * + * This is normally combined with QUEUE, however when used alone, it + * is actually similar to QUEUE + PASSTHRU. + * + * See struct rte_flow_action_dup. + */ + RTE_FLOW_ACTION_TYPE_DUP, + + /** + * Similar to QUEUE, except RSS is additionally performed on packets + * to spread them among several queues according to the provided + * parameters. + * + * See struct rte_flow_action_rss. + */ + RTE_FLOW_ACTION_TYPE_RSS, + + /** + * Redirects packets to the physical function (PF) of the current + * device. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PF, + + /** + * Redirects packets to the virtual function (VF) of the current + * device with the specified ID. + * + * See struct rte_flow_action_vf. + */ + RTE_FLOW_ACTION_TYPE_VF, +}; + +/** + * RTE_FLOW_ACTION_TYPE_MARK + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. It is returned in the + * hash.fdir.hi mbuf field. + */ +struct rte_flow_action_mark { + uint32_t id; /**< Integer value to return with packets. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_QUEUE + * + * Assign packets to a given queue index. + * + * Terminating by default. + */ +struct rte_flow_action_queue { + uint16_t index; /**< Queue index to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_COUNT (query) + * + * Query structure to retrieve and reset flow rule counters. + */ +struct rte_flow_query_count { + uint32_t reset:1; /**< Reset counters after query [in]. */ + uint32_t hits_set:1; /**< hits field is set [out]. */ + uint32_t bytes_set:1; /**< bytes field is set [out]. */ + uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ + uint64_t hits; /**< Number of hits for this rule [out]. */ + uint64_t bytes; /**< Number of bytes through this rule [out]. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_DUP + * + * Duplicates packets to a given queue index. + * + * This is normally combined with QUEUE, however when used alone, it is + * actually similar to QUEUE + PASSTHRU. + * + * Non-terminating by default. + */ +struct rte_flow_action_dup { + uint16_t index; /**< Queue index to duplicate packets to. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_RSS + * + * Similar to QUEUE, except RSS is additionally performed on packets to + * spread them among several queues according to the provided parameters. + * + * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps + * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, + * both can be requested simultaneously. + * + * Terminating by default. + */ +struct rte_flow_action_rss { + const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */ + uint16_t num; /**< Number of entries in queue[]. */ + uint16_t queue[]; /**< Queues indices to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_VF + * + * Redirects packets to a virtual function (VF) of the current device. + * + * Packets matched by a VF pattern item can be redirected to their original + * VF ID instead of the specified one. This parameter may not be available + * and is not guaranteed to work properly if the VF part is matched by a + * prior flow rule or if packets are not addressed to a VF in the first + * place. + * + * Terminating by default. + */ +struct rte_flow_action_vf { + uint32_t original:1; /**< Use original VF ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< VF ID to redirect packets to. */ +}; + +/** + * Definition of a single action. + * + * A list of actions is terminated by a END action. + * + * For simple actions without a configuration structure, conf remains NULL. + */ +struct rte_flow_action { + enum rte_flow_action_type type; /**< Action type. */ + const void *conf; /**< Pointer to action configuration structure. */ +}; + +/** + * Opaque type returned after successfully creating a flow. + * + * This handle can be used to manage and query the related flow (e.g. to + * destroy it or retrieve counters). + */ +struct rte_flow; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_flow_error.cause. + */ +enum rte_flow_error_type { + RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ + RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ + RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ + RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ + RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ + RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_flow_error { + enum rte_flow_error_type type; /**< Cause field and error types. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Check whether a flow rule can be created on a given port. + * + * The flow rule is validated for correctness and whether it could be accepted + * by the device given sufficient resources. The rule is checked against the + * current device mode and queue configuration. The flow rule may also + * optionally be validated against existing flow rules and device resources. + * This function has no effect on the target device. + * + * The returned value is guaranteed to remain valid only as long as no + * successful calls to rte_flow_create() or rte_flow_destroy() are made in + * the meantime and no device parameter affecting flow rules in any way are + * modified, due to possible collisions or resource limitations (although in + * such cases EINVAL should not be returned). + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 if flow rule is valid and can be created. A negative errno value + * otherwise (rte_errno is also set), the following errors are defined: + * + * -ENOSYS: underlying device does not support this functionality. + * + * -EINVAL: unknown or invalid rule specification. + * + * -ENOTSUP: valid but unsupported rule specification (e.g. partial + * bit-masks are unsupported). + * + * -EEXIST: collision with an existing rule. Only returned if device + * supports flow rule collision checking and there was a flow rule + * collision. Not receiving this return code is no guarantee that creating + * the rule will not fail due to a collision. + * + * -ENOMEM: not enough memory to execute the function, or if the device + * supports resource validation, resource limitation on the device. + * + * -EBUSY: action cannot be performed due to busy device resources, may + * succeed if the affected queues or even the entire port are in a stopped + * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). + */ +int +rte_flow_validate(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Create a flow rule on a given port. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * A valid handle in case of success, NULL otherwise and rte_errno is set + * to the positive version of one of the error codes defined for + * rte_flow_validate(). + */ +struct rte_flow * +rte_flow_create(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Destroy a flow rule on a given port. + * + * Failure to destroy a flow rule handle may occur when other flow rules + * depend on it, and destroying it would result in an inconsistent state. + * + * This function is only guaranteed to succeed if handles are destroyed in + * reverse order of their creation. + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to destroy. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_destroy(uint8_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error); + +/** + * Destroy all flow rules associated with a port. + * + * In the unlikely event of failure, handles are still considered destroyed + * and no longer valid but the port must be assumed to be in an inconsistent + * state. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_flush(uint8_t port_id, + struct rte_flow_error *error); + +/** + * Query an existing flow rule. + * + * This function allows retrieving flow-specific data such as counters. + * Data is gathered by special actions which must be present in the flow + * rule definition. + * + * \see RTE_FLOW_ACTION_TYPE_COUNT + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to query. + * @param action + * Action type to query. + * @param[in, out] data + * Pointer to storage for the associated query data type. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_query(uint8_t port_id, + struct rte_flow *flow, + enum rte_flow_action_type action, + void *data, + struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_H_ */ diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h new file mode 100644 index 00000000..da5749d5 --- /dev/null +++ b/lib/librte_ether/rte_flow_driver.h @@ -0,0 +1,182 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_FLOW_DRIVER_H_ +#define RTE_FLOW_DRIVER_H_ + +/** + * @file + * RTE generic flow API (driver side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_flow.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Generic flow operations structure implemented and returned by PMDs. + * + * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter + * type in their .filter_ctrl callback function (struct eth_dev_ops) as well + * as the RTE_ETH_FILTER_GET filter operation. + * + * If successful, this operation must result in a pointer to a PMD-specific + * struct rte_flow_ops written to the argument address as described below: + * + * \code + * + * // PMD filter_ctrl callback + * + * static const struct rte_flow_ops pmd_flow_ops = { ... }; + * + * switch (filter_type) { + * case RTE_ETH_FILTER_GENERIC: + * if (filter_op != RTE_ETH_FILTER_GET) + * return -EINVAL; + * *(const void **)arg = &pmd_flow_ops; + * return 0; + * } + * + * \endcode + * + * See also rte_flow_ops_get(). + * + * These callback functions are not supposed to be used by applications + * directly, which must rely on the API defined in rte_flow.h. + * + * Public-facing wrapper functions perform a few consistency checks so that + * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These + * callbacks otherwise only differ by their first argument (with port ID + * already resolved to a pointer to struct rte_eth_dev). + */ +struct rte_flow_ops { + /** See rte_flow_validate(). */ + int (*validate) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_create(). */ + struct rte_flow *(*create) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_destroy(). */ + int (*destroy) + (struct rte_eth_dev *, + struct rte_flow *, + struct rte_flow_error *); + /** See rte_flow_flush(). */ + int (*flush) + (struct rte_eth_dev *, + struct rte_flow_error *); + /** See rte_flow_query(). */ + int (*query) + (struct rte_eth_dev *, + struct rte_flow *, + enum rte_flow_action_type, + void *, + struct rte_flow_error *); +}; + +/** + * Initialize generic flow error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to flow error structure (may be NULL). + * @param code + * Related error code (rte_errno). + * @param type + * Cause field and error types. + * @param cause + * Object responsible for the error. + * @param message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_flow_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic flow operations structure from a port. + * + * @param port_id + * Port identifier to query. + * @param[out] error + * Pointer to flow error structure. + * + * @return + * The flow operations structure associated with port_id, NULL in case of + * error, in which case rte_errno is set and the error structure contains + * additional details. + */ +const struct rte_flow_ops * +rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_DRIVER_H_ */ diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile new file mode 100644 index 00000000..e06346a6 --- /dev/null +++ b/lib/librte_eventdev/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2016 Cavium networks. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Cavium networks nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_eventdev.a + +# library version +LIBABIVER := 1 + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# library source files +SRCS-y += rte_eventdev.c + +# export include files +SYMLINK-y-include += rte_eventdev.h +SYMLINK-y-include += rte_eventdev_pmd.h + +# versioning export map +EXPORT_MAP := rte_eventdev_version.map + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c new file mode 100644 index 00000000..20afc3f0 --- /dev/null +++ b/lib/librte_eventdev/rte_eventdev.c @@ -0,0 +1,1345 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/queue.h> + +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_dev.h> +#include <rte_pci.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_errno.h> + +#include "rte_eventdev.h" +#include "rte_eventdev_pmd.h" + +struct rte_eventdev rte_event_devices[RTE_EVENT_MAX_DEVS]; + +struct rte_eventdev *rte_eventdevs = &rte_event_devices[0]; + +static struct rte_eventdev_global eventdev_globals = { + .nb_devs = 0 +}; + +struct rte_eventdev_global *rte_eventdev_globals = &eventdev_globals; + +/* Event dev north bound API implementation */ + +uint8_t +rte_event_dev_count(void) +{ + return rte_eventdev_globals->nb_devs; +} + +int +rte_event_dev_get_dev_id(const char *name) +{ + int i; + + if (!name) + return -EINVAL; + + for (i = 0; i < rte_eventdev_globals->nb_devs; i++) + if ((strcmp(rte_event_devices[i].data->name, name) + == 0) && + (rte_event_devices[i].attached == + RTE_EVENTDEV_ATTACHED)) + return i; + return -ENODEV; +} + +int +rte_event_dev_socket_id(uint8_t dev_id) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + return dev->data->socket_id; +} + +int +rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + if (dev_info == NULL) + return -EINVAL; + + memset(dev_info, 0, sizeof(struct rte_event_dev_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + + dev_info->dequeue_timeout_ns = dev->data->dev_conf.dequeue_timeout_ns; + + dev_info->dev = dev->dev; + if (dev->driver) + dev_info->driver_name = dev->driver->pci_drv.driver.name; + return 0; +} + +static inline int +rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues) +{ + uint8_t old_nb_queues = dev->data->nb_queues; + uint8_t *queues_prio; + unsigned int i; + + RTE_EDEV_LOG_DEBUG("Setup %d queues on device %u", nb_queues, + dev->data->dev_id); + + /* First time configuration */ + if (dev->data->queues_prio == NULL && nb_queues != 0) { + /* Allocate memory to store queue priority */ + dev->data->queues_prio = rte_zmalloc_socket( + "eventdev->data->queues_prio", + sizeof(dev->data->queues_prio[0]) * nb_queues, + RTE_CACHE_LINE_SIZE, dev->data->socket_id); + if (dev->data->queues_prio == NULL) { + dev->data->nb_queues = 0; + RTE_EDEV_LOG_ERR("failed to get mem for queue priority," + "nb_queues %u", nb_queues); + return -(ENOMEM); + } + /* Re-configure */ + } else if (dev->data->queues_prio != NULL && nb_queues != 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_release, -ENOTSUP); + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->queue_release)(dev, i); + + /* Re allocate memory to store queue priority */ + queues_prio = dev->data->queues_prio; + queues_prio = rte_realloc(queues_prio, + sizeof(queues_prio[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (queues_prio == NULL) { + RTE_EDEV_LOG_ERR("failed to realloc queue priority," + " nb_queues %u", nb_queues); + return -(ENOMEM); + } + dev->data->queues_prio = queues_prio; + + if (nb_queues > old_nb_queues) { + uint8_t new_qs = nb_queues - old_nb_queues; + + memset(queues_prio + old_nb_queues, 0, + sizeof(queues_prio[0]) * new_qs); + } + } else if (dev->data->queues_prio != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_release, -ENOTSUP); + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->queue_release)(dev, i); + } + + dev->data->nb_queues = nb_queues; + return 0; +} + +#define EVENT_QUEUE_SERVICE_PRIORITY_INVALID (0xdead) + +static inline int +rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) +{ + uint8_t old_nb_ports = dev->data->nb_ports; + void **ports; + uint16_t *links_map; + uint8_t *ports_dequeue_depth; + uint8_t *ports_enqueue_depth; + unsigned int i; + + RTE_EDEV_LOG_DEBUG("Setup %d ports on device %u", nb_ports, + dev->data->dev_id); + + /* First time configuration */ + if (dev->data->ports == NULL && nb_ports != 0) { + dev->data->ports = rte_zmalloc_socket("eventdev->data->ports", + sizeof(dev->data->ports[0]) * nb_ports, + RTE_CACHE_LINE_SIZE, dev->data->socket_id); + if (dev->data->ports == NULL) { + dev->data->nb_ports = 0; + RTE_EDEV_LOG_ERR("failed to get mem for port meta data," + "nb_ports %u", nb_ports); + return -(ENOMEM); + } + + /* Allocate memory to store ports dequeue depth */ + dev->data->ports_dequeue_depth = + rte_zmalloc_socket("eventdev->ports_dequeue_depth", + sizeof(dev->data->ports_dequeue_depth[0]) * nb_ports, + RTE_CACHE_LINE_SIZE, dev->data->socket_id); + if (dev->data->ports_dequeue_depth == NULL) { + dev->data->nb_ports = 0; + RTE_EDEV_LOG_ERR("failed to get mem for port deq meta," + "nb_ports %u", nb_ports); + return -(ENOMEM); + } + + /* Allocate memory to store ports enqueue depth */ + dev->data->ports_enqueue_depth = + rte_zmalloc_socket("eventdev->ports_enqueue_depth", + sizeof(dev->data->ports_enqueue_depth[0]) * nb_ports, + RTE_CACHE_LINE_SIZE, dev->data->socket_id); + if (dev->data->ports_enqueue_depth == NULL) { + dev->data->nb_ports = 0; + RTE_EDEV_LOG_ERR("failed to get mem for port enq meta," + "nb_ports %u", nb_ports); + return -(ENOMEM); + } + + /* Allocate memory to store queue to port link connection */ + dev->data->links_map = + rte_zmalloc_socket("eventdev->links_map", + sizeof(dev->data->links_map[0]) * nb_ports * + RTE_EVENT_MAX_QUEUES_PER_DEV, + RTE_CACHE_LINE_SIZE, dev->data->socket_id); + if (dev->data->links_map == NULL) { + dev->data->nb_ports = 0; + RTE_EDEV_LOG_ERR("failed to get mem for port_map area," + "nb_ports %u", nb_ports); + return -(ENOMEM); + } + for (i = 0; i < nb_ports * RTE_EVENT_MAX_QUEUES_PER_DEV; i++) + dev->data->links_map[i] = + EVENT_QUEUE_SERVICE_PRIORITY_INVALID; + } else if (dev->data->ports != NULL && nb_ports != 0) {/* re-config */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_release, -ENOTSUP); + + ports = dev->data->ports; + ports_dequeue_depth = dev->data->ports_dequeue_depth; + ports_enqueue_depth = dev->data->ports_enqueue_depth; + links_map = dev->data->links_map; + + for (i = nb_ports; i < old_nb_ports; i++) + (*dev->dev_ops->port_release)(ports[i]); + + /* Realloc memory for ports */ + ports = rte_realloc(ports, sizeof(ports[0]) * nb_ports, + RTE_CACHE_LINE_SIZE); + if (ports == NULL) { + RTE_EDEV_LOG_ERR("failed to realloc port meta data," + " nb_ports %u", nb_ports); + return -(ENOMEM); + } + + /* Realloc memory for ports_dequeue_depth */ + ports_dequeue_depth = rte_realloc(ports_dequeue_depth, + sizeof(ports_dequeue_depth[0]) * nb_ports, + RTE_CACHE_LINE_SIZE); + if (ports_dequeue_depth == NULL) { + RTE_EDEV_LOG_ERR("failed to realloc port dequeue meta," + " nb_ports %u", nb_ports); + return -(ENOMEM); + } + + /* Realloc memory for ports_enqueue_depth */ + ports_enqueue_depth = rte_realloc(ports_enqueue_depth, + sizeof(ports_enqueue_depth[0]) * nb_ports, + RTE_CACHE_LINE_SIZE); + if (ports_enqueue_depth == NULL) { + RTE_EDEV_LOG_ERR("failed to realloc port enqueue meta," + " nb_ports %u", nb_ports); + return -(ENOMEM); + } + + /* Realloc memory to store queue to port link connection */ + links_map = rte_realloc(links_map, + sizeof(dev->data->links_map[0]) * nb_ports * + RTE_EVENT_MAX_QUEUES_PER_DEV, + RTE_CACHE_LINE_SIZE); + if (dev->data->links_map == NULL) { + dev->data->nb_ports = 0; + RTE_EDEV_LOG_ERR("failed to realloc mem for port_map," + "nb_ports %u", nb_ports); + return -(ENOMEM); + } + + if (nb_ports > old_nb_ports) { + uint8_t new_ps = nb_ports - old_nb_ports; + unsigned int old_links_map_end = + old_nb_ports * RTE_EVENT_MAX_QUEUES_PER_DEV; + unsigned int links_map_end = + nb_ports * RTE_EVENT_MAX_QUEUES_PER_DEV; + + memset(ports + old_nb_ports, 0, + sizeof(ports[0]) * new_ps); + memset(ports_dequeue_depth + old_nb_ports, 0, + sizeof(ports_dequeue_depth[0]) * new_ps); + memset(ports_enqueue_depth + old_nb_ports, 0, + sizeof(ports_enqueue_depth[0]) * new_ps); + for (i = old_links_map_end; i < links_map_end; i++) + links_map[i] = + EVENT_QUEUE_SERVICE_PRIORITY_INVALID; + } + + dev->data->ports = ports; + dev->data->ports_dequeue_depth = ports_dequeue_depth; + dev->data->ports_enqueue_depth = ports_enqueue_depth; + dev->data->links_map = links_map; + } else if (dev->data->ports != NULL && nb_ports == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_release, -ENOTSUP); + + ports = dev->data->ports; + for (i = nb_ports; i < old_nb_ports; i++) + (*dev->dev_ops->port_release)(ports[i]); + } + + dev->data->nb_ports = nb_ports; + return 0; +} + +int +rte_event_dev_configure(uint8_t dev_id, + const struct rte_event_dev_config *dev_conf) +{ + struct rte_eventdev *dev; + struct rte_event_dev_info info; + int diag; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + + if (dev->data->dev_started) { + RTE_EDEV_LOG_ERR( + "device %d must be stopped to allow configuration", dev_id); + return -EBUSY; + } + + if (dev_conf == NULL) + return -EINVAL; + + (*dev->dev_ops->dev_infos_get)(dev, &info); + + /* Check dequeue_timeout_ns value is in limit */ + if (!(dev_conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)) { + if (dev_conf->dequeue_timeout_ns < info.min_dequeue_timeout_ns + || dev_conf->dequeue_timeout_ns > + info.max_dequeue_timeout_ns) { + RTE_EDEV_LOG_ERR("dev%d invalid dequeue_timeout_ns=%d" + " min_dequeue_timeout_ns=%d max_dequeue_timeout_ns=%d", + dev_id, dev_conf->dequeue_timeout_ns, + info.min_dequeue_timeout_ns, + info.max_dequeue_timeout_ns); + return -EINVAL; + } + } + + /* Check nb_events_limit is in limit */ + if (dev_conf->nb_events_limit > info.max_num_events) { + RTE_EDEV_LOG_ERR("dev%d nb_events_limit=%d > max_num_events=%d", + dev_id, dev_conf->nb_events_limit, info.max_num_events); + return -EINVAL; + } + + /* Check nb_event_queues is in limit */ + if (!dev_conf->nb_event_queues) { + RTE_EDEV_LOG_ERR("dev%d nb_event_queues cannot be zero", + dev_id); + return -EINVAL; + } + if (dev_conf->nb_event_queues > info.max_event_queues) { + RTE_EDEV_LOG_ERR("%d nb_event_queues=%d > max_event_queues=%d", + dev_id, dev_conf->nb_event_queues, info.max_event_queues); + return -EINVAL; + } + + /* Check nb_event_ports is in limit */ + if (!dev_conf->nb_event_ports) { + RTE_EDEV_LOG_ERR("dev%d nb_event_ports cannot be zero", dev_id); + return -EINVAL; + } + if (dev_conf->nb_event_ports > info.max_event_ports) { + RTE_EDEV_LOG_ERR("id%d nb_event_ports=%d > max_event_ports= %d", + dev_id, dev_conf->nb_event_ports, info.max_event_ports); + return -EINVAL; + } + + /* Check nb_event_queue_flows is in limit */ + if (!dev_conf->nb_event_queue_flows) { + RTE_EDEV_LOG_ERR("dev%d nb_flows cannot be zero", dev_id); + return -EINVAL; + } + if (dev_conf->nb_event_queue_flows > info.max_event_queue_flows) { + RTE_EDEV_LOG_ERR("dev%d nb_flows=%x > max_flows=%x", + dev_id, dev_conf->nb_event_queue_flows, + info.max_event_queue_flows); + return -EINVAL; + } + + /* Check nb_event_port_dequeue_depth is in limit */ + if (!dev_conf->nb_event_port_dequeue_depth) { + RTE_EDEV_LOG_ERR("dev%d nb_dequeue_depth cannot be zero", + dev_id); + return -EINVAL; + } + if (dev_conf->nb_event_port_dequeue_depth > + info.max_event_port_dequeue_depth) { + RTE_EDEV_LOG_ERR("dev%d nb_dq_depth=%d > max_dq_depth=%d", + dev_id, dev_conf->nb_event_port_dequeue_depth, + info.max_event_port_dequeue_depth); + return -EINVAL; + } + + /* Check nb_event_port_enqueue_depth is in limit */ + if (!dev_conf->nb_event_port_enqueue_depth) { + RTE_EDEV_LOG_ERR("dev%d nb_enqueue_depth cannot be zero", + dev_id); + return -EINVAL; + } + if (dev_conf->nb_event_port_enqueue_depth > + info.max_event_port_enqueue_depth) { + RTE_EDEV_LOG_ERR("dev%d nb_enq_depth=%d > max_enq_depth=%d", + dev_id, dev_conf->nb_event_port_enqueue_depth, + info.max_event_port_enqueue_depth); + return -EINVAL; + } + + /* Copy the dev_conf parameter into the dev structure */ + memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf)); + + /* Setup new number of queues and reconfigure device. */ + diag = rte_event_dev_queue_config(dev, dev_conf->nb_event_queues); + if (diag != 0) { + RTE_EDEV_LOG_ERR("dev%d rte_event_dev_queue_config = %d", + dev_id, diag); + return diag; + } + + /* Setup new number of ports and reconfigure device. */ + diag = rte_event_dev_port_config(dev, dev_conf->nb_event_ports); + if (diag != 0) { + rte_event_dev_queue_config(dev, 0); + RTE_EDEV_LOG_ERR("dev%d rte_event_dev_port_config = %d", + dev_id, diag); + return diag; + } + + /* Configure the device */ + diag = (*dev->dev_ops->dev_configure)(dev); + if (diag != 0) { + RTE_EDEV_LOG_ERR("dev%d dev_configure = %d", dev_id, diag); + rte_event_dev_queue_config(dev, 0); + rte_event_dev_port_config(dev, 0); + } + + dev->data->event_dev_cap = info.event_dev_cap; + return diag; +} + +static inline int +is_valid_queue(struct rte_eventdev *dev, uint8_t queue_id) +{ + if (queue_id < dev->data->nb_queues && queue_id < + RTE_EVENT_MAX_QUEUES_PER_DEV) + return 1; + else + return 0; +} + +int +rte_event_queue_default_conf_get(uint8_t dev_id, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + if (queue_conf == NULL) + return -EINVAL; + + if (!is_valid_queue(dev, queue_id)) { + RTE_EDEV_LOG_ERR("Invalid queue_id=%" PRIu8, queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_def_conf, -ENOTSUP); + memset(queue_conf, 0, sizeof(struct rte_event_queue_conf)); + (*dev->dev_ops->queue_def_conf)(dev, queue_id, queue_conf); + return 0; +} + +static inline int +is_valid_atomic_queue_conf(const struct rte_event_queue_conf *queue_conf) +{ + if (queue_conf && ( + ((queue_conf->event_queue_cfg & + RTE_EVENT_QUEUE_CFG_TYPE_MASK) + == RTE_EVENT_QUEUE_CFG_ALL_TYPES) || + ((queue_conf->event_queue_cfg & + RTE_EVENT_QUEUE_CFG_TYPE_MASK) + == RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY) + )) + return 1; + else + return 0; +} + +static inline int +is_valid_ordered_queue_conf(const struct rte_event_queue_conf *queue_conf) +{ + if (queue_conf && ( + ((queue_conf->event_queue_cfg & + RTE_EVENT_QUEUE_CFG_TYPE_MASK) + == RTE_EVENT_QUEUE_CFG_ALL_TYPES) || + ((queue_conf->event_queue_cfg & + RTE_EVENT_QUEUE_CFG_TYPE_MASK) + == RTE_EVENT_QUEUE_CFG_ORDERED_ONLY) + )) + return 1; + else + return 0; +} + + +int +rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf) +{ + struct rte_eventdev *dev; + struct rte_event_queue_conf def_conf; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + if (!is_valid_queue(dev, queue_id)) { + RTE_EDEV_LOG_ERR("Invalid queue_id=%" PRIu8, queue_id); + return -EINVAL; + } + + /* Check nb_atomic_flows limit */ + if (is_valid_atomic_queue_conf(queue_conf)) { + if (queue_conf->nb_atomic_flows == 0 || + queue_conf->nb_atomic_flows > + dev->data->dev_conf.nb_event_queue_flows) { + RTE_EDEV_LOG_ERR( + "dev%d queue%d Invalid nb_atomic_flows=%d max_flows=%d", + dev_id, queue_id, queue_conf->nb_atomic_flows, + dev->data->dev_conf.nb_event_queue_flows); + return -EINVAL; + } + } + + /* Check nb_atomic_order_sequences limit */ + if (is_valid_ordered_queue_conf(queue_conf)) { + if (queue_conf->nb_atomic_order_sequences == 0 || + queue_conf->nb_atomic_order_sequences > + dev->data->dev_conf.nb_event_queue_flows) { + RTE_EDEV_LOG_ERR( + "dev%d queue%d Invalid nb_atomic_order_seq=%d max_flows=%d", + dev_id, queue_id, queue_conf->nb_atomic_order_sequences, + dev->data->dev_conf.nb_event_queue_flows); + return -EINVAL; + } + } + + if (dev->data->dev_started) { + RTE_EDEV_LOG_ERR( + "device %d must be stopped to allow queue setup", dev_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_setup, -ENOTSUP); + + if (queue_conf == NULL) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_def_conf, + -ENOTSUP); + (*dev->dev_ops->queue_def_conf)(dev, queue_id, &def_conf); + queue_conf = &def_conf; + } + + dev->data->queues_prio[queue_id] = queue_conf->priority; + return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf); +} + +uint8_t +rte_event_queue_count(uint8_t dev_id) +{ + struct rte_eventdev *dev; + + dev = &rte_eventdevs[dev_id]; + return dev->data->nb_queues; +} + +uint8_t +rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id) +{ + struct rte_eventdev *dev; + + dev = &rte_eventdevs[dev_id]; + if (dev->data->event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_QOS) + return dev->data->queues_prio[queue_id]; + else + return RTE_EVENT_DEV_PRIORITY_NORMAL; +} + +static inline int +is_valid_port(struct rte_eventdev *dev, uint8_t port_id) +{ + if (port_id < dev->data->nb_ports) + return 1; + else + return 0; +} + +int +rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + if (port_conf == NULL) + return -EINVAL; + + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_def_conf, -ENOTSUP); + memset(port_conf, 0, sizeof(struct rte_event_port_conf)); + (*dev->dev_ops->port_def_conf)(dev, port_id, port_conf); + return 0; +} + +int +rte_event_port_setup(uint8_t dev_id, uint8_t port_id, + const struct rte_event_port_conf *port_conf) +{ + struct rte_eventdev *dev; + struct rte_event_port_conf def_conf; + int diag; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } + + /* Check new_event_threshold limit */ + if ((port_conf && !port_conf->new_event_threshold) || + (port_conf && port_conf->new_event_threshold > + dev->data->dev_conf.nb_events_limit)) { + RTE_EDEV_LOG_ERR( + "dev%d port%d Invalid event_threshold=%d nb_events_limit=%d", + dev_id, port_id, port_conf->new_event_threshold, + dev->data->dev_conf.nb_events_limit); + return -EINVAL; + } + + /* Check dequeue_depth limit */ + if ((port_conf && !port_conf->dequeue_depth) || + (port_conf && port_conf->dequeue_depth > + dev->data->dev_conf.nb_event_port_dequeue_depth)) { + RTE_EDEV_LOG_ERR( + "dev%d port%d Invalid dequeue depth=%d max_dequeue_depth=%d", + dev_id, port_id, port_conf->dequeue_depth, + dev->data->dev_conf.nb_event_port_dequeue_depth); + return -EINVAL; + } + + /* Check enqueue_depth limit */ + if ((port_conf && !port_conf->enqueue_depth) || + (port_conf && port_conf->enqueue_depth > + dev->data->dev_conf.nb_event_port_enqueue_depth)) { + RTE_EDEV_LOG_ERR( + "dev%d port%d Invalid enqueue depth=%d max_enqueue_depth=%d", + dev_id, port_id, port_conf->enqueue_depth, + dev->data->dev_conf.nb_event_port_enqueue_depth); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_EDEV_LOG_ERR( + "device %d must be stopped to allow port setup", dev_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_setup, -ENOTSUP); + + if (port_conf == NULL) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_def_conf, + -ENOTSUP); + (*dev->dev_ops->port_def_conf)(dev, port_id, &def_conf); + port_conf = &def_conf; + } + + dev->data->ports_dequeue_depth[port_id] = + port_conf->dequeue_depth; + dev->data->ports_enqueue_depth[port_id] = + port_conf->enqueue_depth; + + diag = (*dev->dev_ops->port_setup)(dev, port_id, port_conf); + + /* Unlink all the queues from this port(default state after setup) */ + if (!diag) + diag = rte_event_port_unlink(dev_id, port_id, NULL, 0); + + if (diag < 0) + return diag; + + return 0; +} + +uint8_t +rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id) +{ + struct rte_eventdev *dev; + + dev = &rte_eventdevs[dev_id]; + return dev->data->ports_dequeue_depth[port_id]; +} + +uint8_t +rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id) +{ + struct rte_eventdev *dev; + + dev = &rte_eventdevs[dev_id]; + return dev->data->ports_enqueue_depth[port_id]; +} + +uint8_t +rte_event_port_count(uint8_t dev_id) +{ + struct rte_eventdev *dev; + + dev = &rte_eventdevs[dev_id]; + return dev->data->nb_ports; +} + +int +rte_event_port_link(uint8_t dev_id, uint8_t port_id, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links) +{ + struct rte_eventdev *dev; + uint8_t queues_list[RTE_EVENT_MAX_QUEUES_PER_DEV]; + uint8_t priorities_list[RTE_EVENT_MAX_QUEUES_PER_DEV]; + uint16_t *links_map; + int i, diag; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_link, -ENOTSUP); + + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } + + if (queues == NULL) { + for (i = 0; i < dev->data->nb_queues; i++) + queues_list[i] = i; + + queues = queues_list; + nb_links = dev->data->nb_queues; + } + + if (priorities == NULL) { + for (i = 0; i < nb_links; i++) + priorities_list[i] = RTE_EVENT_DEV_PRIORITY_NORMAL; + + priorities = priorities_list; + } + + for (i = 0; i < nb_links; i++) + if (queues[i] >= dev->data->nb_queues) + return -EINVAL; + + diag = (*dev->dev_ops->port_link)(dev, dev->data->ports[port_id], + queues, priorities, nb_links); + if (diag < 0) + return diag; + + links_map = dev->data->links_map; + /* Point links_map to this port specific area */ + links_map += (port_id * RTE_EVENT_MAX_QUEUES_PER_DEV); + for (i = 0; i < diag; i++) + links_map[queues[i]] = (uint8_t)priorities[i]; + + return diag; +} + +int +rte_event_port_unlink(uint8_t dev_id, uint8_t port_id, + uint8_t queues[], uint16_t nb_unlinks) +{ + struct rte_eventdev *dev; + uint8_t all_queues[RTE_EVENT_MAX_QUEUES_PER_DEV]; + int i, diag; + uint16_t *links_map; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_unlink, -ENOTSUP); + + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } + + if (queues == NULL) { + for (i = 0; i < dev->data->nb_queues; i++) + all_queues[i] = i; + queues = all_queues; + nb_unlinks = dev->data->nb_queues; + } + + for (i = 0; i < nb_unlinks; i++) + if (queues[i] >= dev->data->nb_queues) + return -EINVAL; + + diag = (*dev->dev_ops->port_unlink)(dev, dev->data->ports[port_id], + queues, nb_unlinks); + + if (diag < 0) + return diag; + + links_map = dev->data->links_map; + /* Point links_map to this port specific area */ + links_map += (port_id * RTE_EVENT_MAX_QUEUES_PER_DEV); + for (i = 0; i < diag; i++) + links_map[queues[i]] = EVENT_QUEUE_SERVICE_PRIORITY_INVALID; + + return diag; +} + +int +rte_event_port_links_get(uint8_t dev_id, uint8_t port_id, + uint8_t queues[], uint8_t priorities[]) +{ + struct rte_eventdev *dev; + uint16_t *links_map; + int i, count = 0; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } + + links_map = dev->data->links_map; + /* Point links_map to this port specific area */ + links_map += (port_id * RTE_EVENT_MAX_QUEUES_PER_DEV); + for (i = 0; i < dev->data->nb_queues; i++) { + if (links_map[i] != EVENT_QUEUE_SERVICE_PRIORITY_INVALID) { + queues[count] = i; + priorities[count] = (uint8_t)links_map[i]; + ++count; + } + } + return count; +} + +int +rte_event_dequeue_timeout_ticks(uint8_t dev_id, uint64_t ns, + uint64_t *timeout_ticks) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timeout_ticks, -ENOTSUP); + + if (timeout_ticks == NULL) + return -EINVAL; + + return (*dev->dev_ops->timeout_ticks)(dev, ns, timeout_ticks); +} + +int +rte_event_dev_dump(uint8_t dev_id, FILE *f) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dump, -ENOTSUP); + + (*dev->dev_ops->dump)(dev, f); + return 0; + +} + +static int +xstats_get_count(uint8_t dev_id, enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id) +{ + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + if (dev->dev_ops->xstats_get_names != NULL) + return (*dev->dev_ops->xstats_get_names)(dev, mode, + queue_port_id, + NULL, NULL, 0); + return 0; +} + +int +rte_event_dev_xstats_names_get(uint8_t dev_id, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -ENODEV); + const int cnt_expected_entries = xstats_get_count(dev_id, mode, + queue_port_id); + if (xstats_names == NULL || cnt_expected_entries < 0 || + (int)size < cnt_expected_entries) + return cnt_expected_entries; + + /* dev_id checked above */ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + if (dev->dev_ops->xstats_get_names != NULL) + return (*dev->dev_ops->xstats_get_names)(dev, mode, + queue_port_id, xstats_names, ids, size); + + return -ENOTSUP; +} + +/* retrieve eventdev extended statistics */ +int +rte_event_dev_xstats_get(uint8_t dev_id, enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, const unsigned int ids[], + uint64_t values[], unsigned int n) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -ENODEV); + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + /* implemented by the driver */ + if (dev->dev_ops->xstats_get != NULL) + return (*dev->dev_ops->xstats_get)(dev, mode, queue_port_id, + ids, values, n); + return -ENOTSUP; +} + +uint64_t +rte_event_dev_xstats_by_name_get(uint8_t dev_id, const char *name, + unsigned int *id) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, 0); + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + unsigned int temp = -1; + + if (id != NULL) + *id = (unsigned int)-1; + else + id = &temp; /* ensure driver never gets a NULL value */ + + /* implemented by driver */ + if (dev->dev_ops->xstats_get_by_name != NULL) + return (*dev->dev_ops->xstats_get_by_name)(dev, name, id); + return -ENOTSUP; +} + +int rte_event_dev_xstats_reset(uint8_t dev_id, + enum rte_event_dev_xstats_mode mode, int16_t queue_port_id, + const uint32_t ids[], uint32_t nb_ids) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + if (dev->dev_ops->xstats_reset != NULL) + return (*dev->dev_ops->xstats_reset)(dev, mode, queue_port_id, + ids, nb_ids); + return -ENOTSUP; +} + +int +rte_event_dev_start(uint8_t dev_id) +{ + struct rte_eventdev *dev; + int diag; + + RTE_EDEV_LOG_DEBUG("Start dev_id=%" PRIu8, dev_id); + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + RTE_EDEV_LOG_ERR("Device with dev_id=%" PRIu8 "already started", + dev_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return diag; + + return 0; +} + +void +rte_event_dev_stop(uint8_t dev_id) +{ + struct rte_eventdev *dev; + + RTE_EDEV_LOG_DEBUG("Stop dev_id=%" PRIu8, dev_id); + + RTE_EVENTDEV_VALID_DEVID_OR_RET(dev_id); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + RTE_EDEV_LOG_ERR("Device with dev_id=%" PRIu8 "already stopped", + dev_id); + return; + } + + dev->data->dev_started = 0; + (*dev->dev_ops->dev_stop)(dev); +} + +int +rte_event_dev_close(uint8_t dev_id) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_close, -ENOTSUP); + + /* Device must be stopped before it can be closed */ + if (dev->data->dev_started == 1) { + RTE_EDEV_LOG_ERR("Device %u must be stopped before closing", + dev_id); + return -EBUSY; + } + + return (*dev->dev_ops->dev_close)(dev); +} + +static inline int +rte_eventdev_data_alloc(uint8_t dev_id, struct rte_eventdev_data **data, + int socket_id) +{ + char mz_name[RTE_EVENTDEV_NAME_MAX_LEN]; + const struct rte_memzone *mz; + int n; + + /* Generate memzone name */ + n = snprintf(mz_name, sizeof(mz_name), "rte_eventdev_data_%u", dev_id); + if (n >= (int)sizeof(mz_name)) + return -EINVAL; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + mz = rte_memzone_reserve(mz_name, + sizeof(struct rte_eventdev_data), + socket_id, 0); + } else + mz = rte_memzone_lookup(mz_name); + + if (mz == NULL) + return -ENOMEM; + + *data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + memset(*data, 0, sizeof(struct rte_eventdev_data)); + + return 0; +} + +static inline uint8_t +rte_eventdev_find_free_device_index(void) +{ + uint8_t dev_id; + + for (dev_id = 0; dev_id < RTE_EVENT_MAX_DEVS; dev_id++) { + if (rte_eventdevs[dev_id].attached == + RTE_EVENTDEV_DETACHED) + return dev_id; + } + return RTE_EVENT_MAX_DEVS; +} + +struct rte_eventdev * +rte_event_pmd_allocate(const char *name, int socket_id) +{ + struct rte_eventdev *eventdev; + uint8_t dev_id; + + if (rte_event_pmd_get_named_dev(name) != NULL) { + RTE_EDEV_LOG_ERR("Event device with name %s already " + "allocated!", name); + return NULL; + } + + dev_id = rte_eventdev_find_free_device_index(); + if (dev_id == RTE_EVENT_MAX_DEVS) { + RTE_EDEV_LOG_ERR("Reached maximum number of event devices"); + return NULL; + } + + eventdev = &rte_eventdevs[dev_id]; + + if (eventdev->data == NULL) { + struct rte_eventdev_data *eventdev_data = NULL; + + int retval = rte_eventdev_data_alloc(dev_id, &eventdev_data, + socket_id); + + if (retval < 0 || eventdev_data == NULL) + return NULL; + + eventdev->data = eventdev_data; + + snprintf(eventdev->data->name, RTE_EVENTDEV_NAME_MAX_LEN, + "%s", name); + + eventdev->data->dev_id = dev_id; + eventdev->data->socket_id = socket_id; + eventdev->data->dev_started = 0; + + eventdev->attached = RTE_EVENTDEV_ATTACHED; + + eventdev_globals.nb_devs++; + } + + return eventdev; +} + +int +rte_event_pmd_release(struct rte_eventdev *eventdev) +{ + int ret; + char mz_name[RTE_EVENTDEV_NAME_MAX_LEN]; + const struct rte_memzone *mz; + + if (eventdev == NULL) + return -EINVAL; + + ret = rte_event_dev_close(eventdev->data->dev_id); + if (ret < 0) + return ret; + + eventdev->attached = RTE_EVENTDEV_DETACHED; + eventdev_globals.nb_devs--; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_free(eventdev->data->dev_private); + + /* Generate memzone name */ + ret = snprintf(mz_name, sizeof(mz_name), "rte_eventdev_data_%u", + eventdev->data->dev_id); + if (ret >= (int)sizeof(mz_name)) + return -EINVAL; + + mz = rte_memzone_lookup(mz_name); + if (mz == NULL) + return -ENOMEM; + + ret = rte_memzone_free(mz); + if (ret) + return ret; + } + + eventdev->data = NULL; + return 0; +} + +struct rte_eventdev * +rte_event_pmd_vdev_init(const char *name, size_t dev_private_size, + int socket_id) +{ + struct rte_eventdev *eventdev; + + /* Allocate device structure */ + eventdev = rte_event_pmd_allocate(name, socket_id); + if (eventdev == NULL) + return NULL; + + /* Allocate private device structure */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eventdev->data->dev_private = + rte_zmalloc_socket("eventdev device private", + dev_private_size, + RTE_CACHE_LINE_SIZE, + socket_id); + + if (eventdev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private device" + " data"); + } + + return eventdev; +} + +int +rte_event_pmd_vdev_uninit(const char *name) +{ + struct rte_eventdev *eventdev; + + if (name == NULL) + return -EINVAL; + + eventdev = rte_event_pmd_get_named_dev(name); + if (eventdev == NULL) + return -ENODEV; + + /* Free the event device */ + rte_event_pmd_release(eventdev); + + return 0; +} + +int +rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + struct rte_eventdev_driver *eventdrv; + struct rte_eventdev *eventdev; + + char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN]; + + int retval; + + eventdrv = (struct rte_eventdev_driver *)pci_drv; + if (eventdrv == NULL) + return -ENODEV; + + rte_pci_device_name(&pci_dev->addr, eventdev_name, + sizeof(eventdev_name)); + + eventdev = rte_event_pmd_allocate(eventdev_name, + pci_dev->device.numa_node); + if (eventdev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eventdev->data->dev_private = + rte_zmalloc_socket( + "eventdev private structure", + eventdrv->dev_private_size, + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + + if (eventdev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private " + "device data"); + } + + eventdev->dev = &pci_dev->device; + eventdev->driver = eventdrv; + + /* Invoke PMD device initialization function */ + retval = (*eventdrv->eventdev_init)(eventdev); + if (retval == 0) + return 0; + + RTE_EDEV_LOG_ERR("driver %s: (vendor_id=0x%x device_id=0x%x)" + " failed", pci_drv->driver.name, + (unsigned int) pci_dev->id.vendor_id, + (unsigned int) pci_dev->id.device_id); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eventdev->data->dev_private); + + eventdev->attached = RTE_EVENTDEV_DETACHED; + eventdev_globals.nb_devs--; + + return -ENXIO; +} + +int +rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev) +{ + const struct rte_eventdev_driver *eventdrv; + struct rte_eventdev *eventdev; + char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN]; + int ret; + + if (pci_dev == NULL) + return -EINVAL; + + rte_pci_device_name(&pci_dev->addr, eventdev_name, + sizeof(eventdev_name)); + + eventdev = rte_event_pmd_get_named_dev(eventdev_name); + if (eventdev == NULL) + return -ENODEV; + + eventdrv = (const struct rte_eventdev_driver *)pci_dev->driver; + if (eventdrv == NULL) + return -ENODEV; + + /* Invoke PMD device un-init function */ + if (*eventdrv->eventdev_uninit) { + ret = (*eventdrv->eventdev_uninit)(eventdev); + if (ret) + return ret; + } + + /* Free event device */ + rte_event_pmd_release(eventdev); + + eventdev->dev = NULL; + eventdev->driver = NULL; + + return 0; +} diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h new file mode 100644 index 00000000..20e7293e --- /dev/null +++ b/lib/librte_eventdev/rte_eventdev.h @@ -0,0 +1,1588 @@ +/* + * BSD LICENSE + * + * Copyright 2016 Cavium. + * Copyright 2016 Intel Corporation. + * Copyright 2016 NXP. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EVENTDEV_H_ +#define _RTE_EVENTDEV_H_ + +/** + * @file + * + * RTE Event Device API + * + * In a polling model, lcores poll ethdev ports and associated rx queues + * directly to look for packet. In an event driven model, by contrast, lcores + * call the scheduler that selects packets for them based on programmer + * specified criteria. Eventdev library adds support for event driven + * programming model, which offer applications automatic multicore scaling, + * dynamic load balancing, pipelining, packet ingress order maintenance and + * synchronization services to simplify application packet processing. + * + * The Event Device API is composed of two parts: + * + * - The application-oriented Event API that includes functions to setup + * an event device (configure it, setup its queues, ports and start it), to + * establish the link between queues to port and to receive events, and so on. + * + * - The driver-oriented Event API that exports a function allowing + * an event poll Mode Driver (PMD) to simultaneously register itself as + * an event device driver. + * + * Event device components: + * + * +-----------------+ + * | +-------------+ | + * +-------+ | | flow 0 | | + * |Packet | | +-------------+ | + * |event | | +-------------+ | + * | | | | flow 1 | |port_link(port0, queue0) + * +-------+ | +-------------+ | | +--------+ + * +-------+ | +-------------+ o-----v-----o |dequeue +------+ + * |Crypto | | | flow n | | | event +------->|Core 0| + * |work | | +-------------+ o----+ | port 0 | | | + * |done ev| | event queue 0 | | +--------+ +------+ + * +-------+ +-----------------+ | + * +-------+ | + * |Timer | +-----------------+ | +--------+ + * |expiry | | +-------------+ | +------o |dequeue +------+ + * |event | | | flow 0 | o-----------o event +------->|Core 1| + * +-------+ | +-------------+ | +----o port 1 | | | + * Event enqueue | +-------------+ | | +--------+ +------+ + * o-------------> | | flow 1 | | | + * enqueue( | +-------------+ | | + * queue_id, | | | +--------+ +------+ + * flow_id, | +-------------+ | | | |dequeue |Core 2| + * sched_type, | | flow n | o-----------o event +------->| | + * event_type, | +-------------+ | | | port 2 | +------+ + * subev_type, | event queue 1 | | +--------+ + * event) +-----------------+ | +--------+ + * | | |dequeue +------+ + * +-------+ +-----------------+ | | event +------->|Core n| + * |Core | | +-------------+ o-----------o port n | | | + * |(SW) | | | flow 0 | | | +--------+ +--+---+ + * |event | | +-------------+ | | | + * +-------+ | +-------------+ | | | + * ^ | | flow 1 | | | | + * | | +-------------+ o------+ | + * | | +-------------+ | | + * | | | flow n | | | + * | | +-------------+ | | + * | | event queue n | | + * | +-----------------+ | + * | | + * +-----------------------------------------------------------+ + * + * Event device: A hardware or software-based event scheduler. + * + * Event: A unit of scheduling that encapsulates a packet or other datatype + * like SW generated event from the CPU, Crypto work completion notification, + * Timer expiry event notification etc as well as metadata. + * The metadata includes flow ID, scheduling type, event priority, event_type, + * sub_event_type etc. + * + * Event queue: A queue containing events that are scheduled by the event dev. + * An event queue contains events of different flows associated with scheduling + * types, such as atomic, ordered, or parallel. + * + * Event port: An application's interface into the event dev for enqueue and + * dequeue operations. Each event port can be linked with one or more + * event queues for dequeue operations. + * + * By default, all the functions of the Event Device API exported by a PMD + * are lock-free functions which assume to not be invoked in parallel on + * different logical cores to work on the same target object. For instance, + * the dequeue function of a PMD cannot be invoked in parallel on two logical + * cores to operates on same event port. Of course, this function + * can be invoked in parallel by different logical cores on different ports. + * It is the responsibility of the upper level application to enforce this rule. + * + * In all functions of the Event API, the Event device is + * designated by an integer >= 0 named the device identifier *dev_id* + * + * At the Event driver level, Event devices are represented by a generic + * data structure of type *rte_event_dev*. + * + * Event devices are dynamically registered during the PCI/SoC device probing + * phase performed at EAL initialization time. + * When an Event device is being probed, a *rte_event_dev* structure and + * a new device identifier are allocated for that device. Then, the + * event_dev_init() function supplied by the Event driver matching the probed + * device is invoked to properly initialize the device. + * + * The role of the device init function consists of resetting the hardware or + * software event driver implementations. + * + * If the device init operation is successful, the correspondence between + * the device identifier assigned to the new device and its associated + * *rte_event_dev* structure is effectively registered. + * Otherwise, both the *rte_event_dev* structure and the device identifier are + * freed. + * + * The functions exported by the application Event API to setup a device + * designated by its device identifier must be invoked in the following order: + * - rte_event_dev_configure() + * - rte_event_queue_setup() + * - rte_event_port_setup() + * - rte_event_port_link() + * - rte_event_dev_start() + * + * Then, the application can invoke, in any order, the functions + * exported by the Event API to schedule events, dequeue events, enqueue events, + * change event queue(s) to event port [un]link establishment and so on. + * + * Application may use rte_event_[queue/port]_default_conf_get() to get the + * default configuration to set up an event queue or event port by + * overriding few default values. + * + * If the application wants to change the configuration (i.e. call + * rte_event_dev_configure(), rte_event_queue_setup(), or + * rte_event_port_setup()), it must call rte_event_dev_stop() first to stop the + * device and then do the reconfiguration before calling rte_event_dev_start() + * again. The schedule, enqueue and dequeue functions should not be invoked + * when the device is stopped. + * + * Finally, an application can close an Event device by invoking the + * rte_event_dev_close() function. + * + * Each function of the application Event API invokes a specific function + * of the PMD that controls the target device designated by its device + * identifier. + * + * For this purpose, all device-specific functions of an Event driver are + * supplied through a set of pointers contained in a generic structure of type + * *event_dev_ops*. + * The address of the *event_dev_ops* structure is stored in the *rte_event_dev* + * structure by the device init function of the Event driver, which is + * invoked during the PCI/SoC device probing phase, as explained earlier. + * + * In other words, each function of the Event API simply retrieves the + * *rte_event_dev* structure associated with the device identifier and + * performs an indirect invocation of the corresponding driver function + * supplied in the *event_dev_ops* structure of the *rte_event_dev* structure. + * + * For performance reasons, the address of the fast-path functions of the + * Event driver is not contained in the *event_dev_ops* structure. + * Instead, they are directly stored at the beginning of the *rte_event_dev* + * structure to avoid an extra indirect memory access during their invocation. + * + * RTE event device drivers do not use interrupts for enqueue or dequeue + * operation. Instead, Event drivers export Poll-Mode enqueue and dequeue + * functions to applications. + * + * An event driven based application has following typical workflow on fastpath: + * \code{.c} + * while (1) { + * + * rte_event_schedule(dev_id); + * + * rte_event_dequeue(...); + * + * (event processing) + * + * rte_event_enqueue(...); + * } + * \endcode + * + * The events are injected to event device through *enqueue* operation by + * event producers in the system. The typical event producers are ethdev + * subsystem for generating packet events, CPU(SW) for generating events based + * on different stages of application processing, cryptodev for generating + * crypto work completion notification etc + * + * The *dequeue* operation gets one or more events from the event ports. + * The application process the events and send to downstream event queue through + * rte_event_enqueue_burst() if it is an intermediate stage of event processing, + * on the final stage, the application may send to different subsystem like + * ethdev to send the packet/event on the wire using ethdev + * rte_eth_tx_burst() API. + * + * The point at which events are scheduled to ports depends on the device. + * For hardware devices, scheduling occurs asynchronously without any software + * intervention. Software schedulers can either be distributed + * (each worker thread schedules events to its own port) or centralized + * (a dedicated thread schedules to all ports). Distributed software schedulers + * perform the scheduling in rte_event_dequeue_burst(), whereas centralized + * scheduler logic is located in rte_event_schedule(). + * The RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED capability flag is not set + * indicates the device is centralized and thus needs a dedicated scheduling + * thread that repeatedly calls rte_event_schedule(). + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_errno.h> + +struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ + +/* Event device capability bitmap flags */ +#define RTE_EVENT_DEV_CAP_QUEUE_QOS (1ULL << 0) +/**< Event scheduling prioritization is based on the priority associated with + * each event queue. + * + * @see rte_event_queue_setup() + */ +#define RTE_EVENT_DEV_CAP_EVENT_QOS (1ULL << 1) +/**< Event scheduling prioritization is based on the priority associated with + * each event. Priority of each event is supplied in *rte_event* structure + * on each enqueue operation. + * + * @see rte_event_enqueue_burst() + */ +#define RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED (1ULL << 2) +/**< Event device operates in distributed scheduling mode. + * In distributed scheduling mode, event scheduling happens in HW or + * rte_event_dequeue_burst() or the combination of these two. + * If the flag is not set then eventdev is centralized and thus needs a + * dedicated scheduling thread that repeatedly calls rte_event_schedule(). + * + * @see rte_event_schedule(), rte_event_dequeue_burst() + */ +#define RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES (1ULL << 3) +/**< Event device is capable of enqueuing events of any type to any queue. + * If this capability is not set, the queue only supports events of the + * *RTE_EVENT_QUEUE_CFG_* type that it was created with. + * + * @see RTE_EVENT_QUEUE_CFG_* values + */ + +/* Event device priority levels */ +#define RTE_EVENT_DEV_PRIORITY_HIGHEST 0 +/**< Highest priority expressed across eventdev subsystem + * @see rte_event_queue_setup(), rte_event_enqueue_burst() + * @see rte_event_port_link() + */ +#define RTE_EVENT_DEV_PRIORITY_NORMAL 128 +/**< Normal priority expressed across eventdev subsystem + * @see rte_event_queue_setup(), rte_event_enqueue_burst() + * @see rte_event_port_link() + */ +#define RTE_EVENT_DEV_PRIORITY_LOWEST 255 +/**< Lowest priority expressed across eventdev subsystem + * @see rte_event_queue_setup(), rte_event_enqueue_burst() + * @see rte_event_port_link() + */ + +/** + * Get the total number of event devices that have been successfully + * initialised. + * + * @return + * The total number of usable event devices. + */ +uint8_t +rte_event_dev_count(void); + +/** + * Get the device identifier for the named event device. + * + * @param name + * Event device name to select the event device identifier. + * + * @return + * Returns event device identifier on success. + * - <0: Failure to find named event device. + */ +int +rte_event_dev_get_dev_id(const char *name); + +/** + * Return the NUMA socket to which a device is connected. + * + * @param dev_id + * The identifier of the device. + * @return + * The NUMA socket id to which the device is connected or + * a default of zero if the socket could not be determined. + * -(-EINVAL) dev_id value is out of range. + */ +int +rte_event_dev_socket_id(uint8_t dev_id); + +/** + * Event device information + */ +struct rte_event_dev_info { + const char *driver_name; /**< Event driver name */ + struct rte_device *dev; /**< Device information */ + uint32_t min_dequeue_timeout_ns; + /**< Minimum supported global dequeue timeout(ns) by this device */ + uint32_t max_dequeue_timeout_ns; + /**< Maximum supported global dequeue timeout(ns) by this device */ + uint32_t dequeue_timeout_ns; + /**< Configured global dequeue timeout(ns) for this device */ + uint8_t max_event_queues; + /**< Maximum event_queues supported by this device */ + uint32_t max_event_queue_flows; + /**< Maximum supported flows in an event queue by this device*/ + uint8_t max_event_queue_priority_levels; + /**< Maximum number of event queue priority levels by this device. + * Valid when the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability + */ + uint8_t max_event_priority_levels; + /**< Maximum number of event priority levels by this device. + * Valid when the device has RTE_EVENT_DEV_CAP_EVENT_QOS capability + */ + uint8_t max_event_ports; + /**< Maximum number of event ports supported by this device */ + uint8_t max_event_port_dequeue_depth; + /**< Maximum number of events can be dequeued at a time from an + * event port by this device. + * A device that does not support bulk dequeue will set this as 1. + */ + uint32_t max_event_port_enqueue_depth; + /**< Maximum number of events can be enqueued at a time from an + * event port by this device. + * A device that does not support bulk enqueue will set this as 1. + */ + int32_t max_num_events; + /**< A *closed system* event dev has a limit on the number of events it + * can manage at a time. An *open system* event dev does not have a + * limit and will specify this as -1. + */ + uint32_t event_dev_cap; + /**< Event device capabilities(RTE_EVENT_DEV_CAP_)*/ +}; + +/** + * Retrieve the contextual information of an event device. + * + * @param dev_id + * The identifier of the device. + * + * @param[out] dev_info + * A pointer to a structure of type *rte_event_dev_info* to be filled with the + * contextual information of the device. + * + * @return + * - 0: Success, driver updates the contextual information of the event device + * - <0: Error code returned by the driver info get function. + * + */ +int +rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info); + +/* Event device configuration bitmap flags */ +#define RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT (1ULL << 0) +/**< Override the global *dequeue_timeout_ns* and use per dequeue timeout in ns. + * @see rte_event_dequeue_timeout_ticks(), rte_event_dequeue_burst() + */ + +/** Event device configuration structure */ +struct rte_event_dev_config { + uint32_t dequeue_timeout_ns; + /**< rte_event_dequeue_burst() timeout on this device. + * This value should be in the range of *min_dequeue_timeout_ns* and + * *max_dequeue_timeout_ns* which previously provided in + * rte_event_dev_info_get() + * @see RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT + */ + int32_t nb_events_limit; + /**< In a *closed system* this field is the limit on maximum number of + * events that can be inflight in the eventdev at a given time. The + * limit is required to ensure that the finite space in a closed system + * is not overwhelmed. The value cannot exceed the *max_num_events* + * as provided by rte_event_dev_info_get(). + * This value should be set to -1 for *open system*. + */ + uint8_t nb_event_queues; + /**< Number of event queues to configure on this device. + * This value cannot exceed the *max_event_queues* which previously + * provided in rte_event_dev_info_get() + */ + uint8_t nb_event_ports; + /**< Number of event ports to configure on this device. + * This value cannot exceed the *max_event_ports* which previously + * provided in rte_event_dev_info_get() + */ + uint32_t nb_event_queue_flows; + /**< Number of flows for any event queue on this device. + * This value cannot exceed the *max_event_queue_flows* which previously + * provided in rte_event_dev_info_get() + */ + uint32_t nb_event_port_dequeue_depth; + /**< Maximum number of events can be dequeued at a time from an + * event port by this device. + * This value cannot exceed the *max_event_port_dequeue_depth* + * which previously provided in rte_event_dev_info_get() + * @see rte_event_port_setup() + */ + uint32_t nb_event_port_enqueue_depth; + /**< Maximum number of events can be enqueued at a time from an + * event port by this device. + * This value cannot exceed the *max_event_port_enqueue_depth* + * which previously provided in rte_event_dev_info_get() + * @see rte_event_port_setup() + */ + uint32_t event_dev_cfg; + /**< Event device config flags(RTE_EVENT_DEV_CFG_)*/ +}; + +/** + * Configure an event device. + * + * This function must be invoked first before any other function in the + * API. This function can also be re-invoked when a device is in the + * stopped state. + * + * The caller may use rte_event_dev_info_get() to get the capability of each + * resources available for this event device. + * + * @param dev_id + * The identifier of the device to configure. + * @param dev_conf + * The event device configuration structure. + * + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +int +rte_event_dev_configure(uint8_t dev_id, + const struct rte_event_dev_config *dev_conf); + + +/* Event queue specific APIs */ + +/* Event queue configuration bitmap flags */ +#define RTE_EVENT_QUEUE_CFG_TYPE_MASK (3ULL << 0) +/**< Mask for event queue schedule type configuration request */ +#define RTE_EVENT_QUEUE_CFG_ALL_TYPES (0ULL << 0) +/**< Allow ATOMIC,ORDERED,PARALLEL schedule type enqueue + * + * @see RTE_SCHED_TYPE_ORDERED, RTE_SCHED_TYPE_ATOMIC, RTE_SCHED_TYPE_PARALLEL + * @see rte_event_enqueue_burst() + */ +#define RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY (1ULL << 0) +/**< Allow only ATOMIC schedule type enqueue + * + * The rte_event_enqueue_burst() result is undefined if the queue configured + * with ATOMIC only and sched_type != RTE_SCHED_TYPE_ATOMIC + * + * @see RTE_SCHED_TYPE_ATOMIC, rte_event_enqueue_burst() + */ +#define RTE_EVENT_QUEUE_CFG_ORDERED_ONLY (2ULL << 0) +/**< Allow only ORDERED schedule type enqueue + * + * The rte_event_enqueue_burst() result is undefined if the queue configured + * with ORDERED only and sched_type != RTE_SCHED_TYPE_ORDERED + * + * @see RTE_SCHED_TYPE_ORDERED, rte_event_enqueue_burst() + */ +#define RTE_EVENT_QUEUE_CFG_PARALLEL_ONLY (3ULL << 0) +/**< Allow only PARALLEL schedule type enqueue + * + * The rte_event_enqueue_burst() result is undefined if the queue configured + * with PARALLEL only and sched_type != RTE_SCHED_TYPE_PARALLEL + * + * @see RTE_SCHED_TYPE_PARALLEL, rte_event_enqueue_burst() + */ +#define RTE_EVENT_QUEUE_CFG_SINGLE_LINK (1ULL << 2) +/**< This event queue links only to a single event port. + * + * @see rte_event_port_setup(), rte_event_port_link() + */ + +/** Event queue configuration structure */ +struct rte_event_queue_conf { + uint32_t nb_atomic_flows; + /**< The maximum number of active flows this queue can track at any + * given time. The value must be in the range of + * [1 - nb_event_queue_flows)] which previously provided in + * rte_event_dev_info_get(). + */ + uint32_t nb_atomic_order_sequences; + /**< The maximum number of outstanding events waiting to be + * reordered by this queue. In other words, the number of entries in + * this queue’s reorder buffer.When the number of events in the + * reorder buffer reaches to *nb_atomic_order_sequences* then the + * scheduler cannot schedule the events from this queue and invalid + * event will be returned from dequeue until one or more entries are + * freed up/released. + * The value must be in the range of [1 - nb_event_queue_flows)] + * which previously supplied to rte_event_dev_configure(). + */ + uint32_t event_queue_cfg; /**< Queue cfg flags(EVENT_QUEUE_CFG_) */ + uint8_t priority; + /**< Priority for this event queue relative to other event queues. + * The requested priority should in the range of + * [RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST]. + * The implementation shall normalize the requested priority to + * event device supported priority value. + * Valid when the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability + */ +}; + +/** + * Retrieve the default configuration information of an event queue designated + * by its *queue_id* from the event driver for an event device. + * + * This function intended to be used in conjunction with rte_event_queue_setup() + * where caller needs to set up the queue by overriding few default values. + * + * @param dev_id + * The identifier of the device. + * @param queue_id + * The index of the event queue to get the configuration information. + * The value must be in the range [0, nb_event_queues - 1] + * previously supplied to rte_event_dev_configure(). + * @param[out] queue_conf + * The pointer to the default event queue configuration data. + * @return + * - 0: Success, driver updates the default event queue configuration data. + * - <0: Error code returned by the driver info get function. + * + * @see rte_event_queue_setup() + * + */ +int +rte_event_queue_default_conf_get(uint8_t dev_id, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf); + +/** + * Allocate and set up an event queue for an event device. + * + * @param dev_id + * The identifier of the device. + * @param queue_id + * The index of the event queue to setup. The value must be in the range + * [0, nb_event_queues - 1] previously supplied to rte_event_dev_configure(). + * @param queue_conf + * The pointer to the configuration data to be used for the event queue. + * NULL value is allowed, in which case default configuration used. + * + * @see rte_event_queue_default_conf_get() + * + * @return + * - 0: Success, event queue correctly set up. + * - <0: event queue configuration failed + */ +int +rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf); + +/** + * Get the number of event queues on a specific event device + * + * @param dev_id + * Event device identifier. + * @return + * - The number of configured event queues + */ +uint8_t +rte_event_queue_count(uint8_t dev_id); + +/** + * Get the priority of the event queue on a specific event device + * + * @param dev_id + * Event device identifier. + * @param queue_id + * Event queue identifier. + * @return + * - If the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability then the + * configured priority of the event queue in + * [RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST] range + * else the value RTE_EVENT_DEV_PRIORITY_NORMAL + */ +uint8_t +rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id); + +/* Event port specific APIs */ + +/** Event port configuration structure */ +struct rte_event_port_conf { + int32_t new_event_threshold; + /**< A backpressure threshold for new event enqueues on this port. + * Use for *closed system* event dev where event capacity is limited, + * and cannot exceed the capacity of the event dev. + * Configuring ports with different thresholds can make higher priority + * traffic less likely to be backpressured. + * For example, a port used to inject NIC Rx packets into the event dev + * can have a lower threshold so as not to overwhelm the device, + * while ports used for worker pools can have a higher threshold. + * This value cannot exceed the *nb_events_limit* + * which was previously supplied to rte_event_dev_configure(). + * This should be set to '-1' for *open system*. + */ + uint16_t dequeue_depth; + /**< Configure number of bulk dequeues for this event port. + * This value cannot exceed the *nb_event_port_dequeue_depth* + * which previously supplied to rte_event_dev_configure() + */ + uint16_t enqueue_depth; + /**< Configure number of bulk enqueues for this event port. + * This value cannot exceed the *nb_event_port_enqueue_depth* + * which previously supplied to rte_event_dev_configure() + */ +}; + +/** + * Retrieve the default configuration information of an event port designated + * by its *port_id* from the event driver for an event device. + * + * This function intended to be used in conjunction with rte_event_port_setup() + * where caller needs to set up the port by overriding few default values. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The index of the event port to get the configuration information. + * The value must be in the range [0, nb_event_ports - 1] + * previously supplied to rte_event_dev_configure(). + * @param[out] port_conf + * The pointer to the default event port configuration data + * @return + * - 0: Success, driver updates the default event port configuration data. + * - <0: Error code returned by the driver info get function. + * + * @see rte_event_port_setup() + * + */ +int +rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id, + struct rte_event_port_conf *port_conf); + +/** + * Allocate and set up an event port for an event device. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The index of the event port to setup. The value must be in the range + * [0, nb_event_ports - 1] previously supplied to rte_event_dev_configure(). + * @param port_conf + * The pointer to the configuration data to be used for the queue. + * NULL value is allowed, in which case default configuration used. + * + * @see rte_event_port_default_conf_get() + * + * @return + * - 0: Success, event port correctly set up. + * - <0: Port configuration failed + * - (-EDQUOT) Quota exceeded(Application tried to link the queue configured + * with RTE_EVENT_QUEUE_CFG_SINGLE_LINK to more than one event ports) + */ +int +rte_event_port_setup(uint8_t dev_id, uint8_t port_id, + const struct rte_event_port_conf *port_conf); + +/** + * Get the number of dequeue queue depth configured for event port designated + * by its *port_id* on a specific event device + * + * @param dev_id + * Event device identifier. + * @param port_id + * Event port identifier. + * @return + * - The number of configured dequeue queue depth + * + * @see rte_event_dequeue_burst() + */ +uint8_t +rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id); + +/** + * Get the number of enqueue queue depth configured for event port designated + * by its *port_id* on a specific event device + * + * @param dev_id + * Event device identifier. + * @param port_id + * Event port identifier. + * @return + * - The number of configured enqueue queue depth + * + * @see rte_event_enqueue_burst() + */ +uint8_t +rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id); + +/** + * Get the number of ports on a specific event device + * + * @param dev_id + * Event device identifier. + * @return + * - The number of configured ports + */ +uint8_t +rte_event_port_count(uint8_t dev_id); + +/** + * Start an event device. + * + * The device start step is the last one and consists of setting the event + * queues to start accepting the events and schedules to event ports. + * + * On success, all basic functions exported by the API (event enqueue, + * event dequeue and so on) can be invoked. + * + * @param dev_id + * Event device identifier + * @return + * - 0: Success, device started. + * - -ESTALE : Not all ports of the device are configured + * - -ENOLINK: Not all queues are linked, which could lead to deadlock. + */ +int +rte_event_dev_start(uint8_t dev_id); + +/** + * Stop an event device. The device can be restarted with a call to + * rte_event_dev_start() + * + * @param dev_id + * Event device identifier. + */ +void +rte_event_dev_stop(uint8_t dev_id); + +/** + * Close an event device. The device cannot be restarted! + * + * @param dev_id + * Event device identifier + * + * @return + * - 0 on successfully closing device + * - <0 on failure to close device + * - (-EAGAIN) if device is busy + */ +int +rte_event_dev_close(uint8_t dev_id); + +/* Scheduler type definitions */ +#define RTE_SCHED_TYPE_ORDERED 0 +/**< Ordered scheduling + * + * Events from an ordered flow of an event queue can be scheduled to multiple + * ports for concurrent processing while maintaining the original event order. + * This scheme enables the user to achieve high single flow throughput by + * avoiding SW synchronization for ordering between ports which bound to cores. + * + * The source flow ordering from an event queue is maintained when events are + * enqueued to their destination queue within the same ordered flow context. + * An event port holds the context until application call + * rte_event_dequeue_burst() from the same port, which implicitly releases + * the context. + * User may allow the scheduler to release the context earlier than that + * by invoking rte_event_enqueue_burst() with RTE_EVENT_OP_RELEASE operation. + * + * Events from the source queue appear in their original order when dequeued + * from a destination queue. + * Event ordering is based on the received event(s), but also other + * (newly allocated or stored) events are ordered when enqueued within the same + * ordered context. Events not enqueued (e.g. released or stored) within the + * context are considered missing from reordering and are skipped at this time + * (but can be ordered again within another context). + * + * @see rte_event_queue_setup(), rte_event_dequeue_burst(), RTE_EVENT_OP_RELEASE + */ + +#define RTE_SCHED_TYPE_ATOMIC 1 +/**< Atomic scheduling + * + * Events from an atomic flow of an event queue can be scheduled only to a + * single port at a time. The port is guaranteed to have exclusive (atomic) + * access to the associated flow context, which enables the user to avoid SW + * synchronization. Atomic flows also help to maintain event ordering + * since only one port at a time can process events from a flow of an + * event queue. + * + * The atomic queue synchronization context is dedicated to the port until + * application call rte_event_dequeue_burst() from the same port, + * which implicitly releases the context. User may allow the scheduler to + * release the context earlier than that by invoking rte_event_enqueue_burst() + * with RTE_EVENT_OP_RELEASE operation. + * + * @see rte_event_queue_setup(), rte_event_dequeue_burst(), RTE_EVENT_OP_RELEASE + */ + +#define RTE_SCHED_TYPE_PARALLEL 2 +/**< Parallel scheduling + * + * The scheduler performs priority scheduling, load balancing, etc. functions + * but does not provide additional event synchronization or ordering. + * It is free to schedule events from a single parallel flow of an event queue + * to multiple events ports for concurrent processing. + * The application is responsible for flow context synchronization and + * event ordering (SW synchronization). + * + * @see rte_event_queue_setup(), rte_event_dequeue_burst() + */ + +/* Event types to classify the event source */ +#define RTE_EVENT_TYPE_ETHDEV 0x0 +/**< The event generated from ethdev subsystem */ +#define RTE_EVENT_TYPE_CRYPTODEV 0x1 +/**< The event generated from crypodev subsystem */ +#define RTE_EVENT_TYPE_TIMERDEV 0x2 +/**< The event generated from timerdev subsystem */ +#define RTE_EVENT_TYPE_CPU 0x3 +/**< The event generated from cpu for pipelining. + * Application may use *sub_event_type* to further classify the event + */ +#define RTE_EVENT_TYPE_MAX 0x10 +/**< Maximum number of event types */ + +/* Event enqueue operations */ +#define RTE_EVENT_OP_NEW 0 +/**< The event producers use this operation to inject a new event to the + * event device. + */ +#define RTE_EVENT_OP_FORWARD 1 +/**< The CPU use this operation to forward the event to different event queue or + * change to new application specific flow or schedule type to enable + * pipelining + */ +#define RTE_EVENT_OP_RELEASE 2 +/**< Release the flow context associated with the schedule type. + * + * If current flow's scheduler type method is *RTE_SCHED_TYPE_ATOMIC* + * then this function hints the scheduler that the user has completed critical + * section processing in the current atomic context. + * The scheduler is now allowed to schedule events from the same flow from + * an event queue to another port. However, the context may be still held + * until the next rte_event_dequeue_burst() call, this call allows but does not + * force the scheduler to release the context early. + * + * Early atomic context release may increase parallelism and thus system + * performance, but the user needs to design carefully the split into critical + * vs non-critical sections. + * + * If current flow's scheduler type method is *RTE_SCHED_TYPE_ORDERED* + * then this function hints the scheduler that the user has done all that need + * to maintain event order in the current ordered context. + * The scheduler is allowed to release the ordered context of this port and + * avoid reordering any following enqueues. + * + * Early ordered context release may increase parallelism and thus system + * performance. + * + * If current flow's scheduler type method is *RTE_SCHED_TYPE_PARALLEL* + * or no scheduling context is held then this function may be an NOOP, + * depending on the implementation. + * + */ + +/** + * The generic *rte_event* structure to hold the event attributes + * for dequeue and enqueue operation + */ +RTE_STD_C11 +struct rte_event { + /** WORD0 */ + union { + uint64_t event; + /** Event attributes for dequeue or enqueue operation */ + struct { + uint32_t flow_id:20; + /**< Targeted flow identifier for the enqueue and + * dequeue operation. + * The value must be in the range of + * [0, nb_event_queue_flows - 1] which + * previously supplied to rte_event_dev_configure(). + */ + uint32_t sub_event_type:8; + /**< Sub-event types based on the event source. + * @see RTE_EVENT_TYPE_CPU + */ + uint32_t event_type:4; + /**< Event type to classify the event source. + * @see RTE_EVENT_TYPE_ETHDEV, (RTE_EVENT_TYPE_*) + */ + uint8_t op:2; + /**< The type of event enqueue operation - new/forward/ + * etc.This field is not preserved across an instance + * and is undefined on dequeue. + * @see RTE_EVENT_OP_NEW, (RTE_EVENT_OP_*) + */ + uint8_t rsvd:4; + /**< Reserved for future use */ + uint8_t sched_type:2; + /**< Scheduler synchronization type (RTE_SCHED_TYPE_*) + * associated with flow id on a given event queue + * for the enqueue and dequeue operation. + */ + uint8_t queue_id; + /**< Targeted event queue identifier for the enqueue or + * dequeue operation. + * The value must be in the range of + * [0, nb_event_queues - 1] which previously supplied to + * rte_event_dev_configure(). + */ + uint8_t priority; + /**< Event priority relative to other events in the + * event queue. The requested priority should in the + * range of [RTE_EVENT_DEV_PRIORITY_HIGHEST, + * RTE_EVENT_DEV_PRIORITY_LOWEST]. + * The implementation shall normalize the requested + * priority to supported priority value. + * Valid when the device has + * RTE_EVENT_DEV_CAP_EVENT_QOS capability. + */ + uint8_t impl_opaque; + /**< Implementation specific opaque value. + * An implementation may use this field to hold + * implementation specific value to share between + * dequeue and enqueue operation. + * The application should not modify this field. + */ + }; + }; + /** WORD1 */ + union { + uint64_t u64; + /**< Opaque 64-bit value */ + void *event_ptr; + /**< Opaque event pointer */ + struct rte_mbuf *mbuf; + /**< mbuf pointer if dequeued event is associated with mbuf */ + }; +}; + + +struct rte_eventdev_driver; +struct rte_eventdev_ops; +struct rte_eventdev; + +typedef void (*event_schedule_t)(struct rte_eventdev *dev); +/**< @internal Schedule one or more events in the event dev. */ + +typedef uint16_t (*event_enqueue_t)(void *port, const struct rte_event *ev); +/**< @internal Enqueue event on port of a device */ + +typedef uint16_t (*event_enqueue_burst_t)(void *port, + const struct rte_event ev[], uint16_t nb_events); +/**< @internal Enqueue burst of events on port of a device */ + +typedef uint16_t (*event_dequeue_t)(void *port, struct rte_event *ev, + uint64_t timeout_ticks); +/**< @internal Dequeue event from port of a device */ + +typedef uint16_t (*event_dequeue_burst_t)(void *port, struct rte_event ev[], + uint16_t nb_events, uint64_t timeout_ticks); +/**< @internal Dequeue burst of events from port of a device */ + +#define RTE_EVENTDEV_NAME_MAX_LEN (64) +/**< @internal Max length of name of event PMD */ + +/** + * @internal + * The data part, with no function pointers, associated with each device. + * + * This structure is safe to place in shared memory to be common among + * different processes in a multi-process configuration. + */ +struct rte_eventdev_data { + int socket_id; + /**< Socket ID where memory is allocated */ + uint8_t dev_id; + /**< Device ID for this instance */ + uint8_t nb_queues; + /**< Number of event queues. */ + uint8_t nb_ports; + /**< Number of event ports. */ + void **ports; + /**< Array of pointers to ports. */ + uint8_t *ports_dequeue_depth; + /**< Array of port dequeue depth. */ + uint8_t *ports_enqueue_depth; + /**< Array of port enqueue depth. */ + uint8_t *queues_prio; + /**< Array of queue priority. */ + uint16_t *links_map; + /**< Memory to store queues to port connections. */ + void *dev_private; + /**< PMD-specific private data */ + uint32_t event_dev_cap; + /**< Event device capabilities(RTE_EVENT_DEV_CAP_)*/ + struct rte_event_dev_config dev_conf; + /**< Configuration applied to device. */ + + RTE_STD_C11 + uint8_t dev_started : 1; + /**< Device state: STARTED(1)/STOPPED(0) */ + + char name[RTE_EVENTDEV_NAME_MAX_LEN]; + /**< Unique identifier name */ +} __rte_cache_aligned; + +/** @internal The data structure associated with each event device. */ +struct rte_eventdev { + event_schedule_t schedule; + /**< Pointer to PMD schedule function. */ + event_enqueue_t enqueue; + /**< Pointer to PMD enqueue function. */ + event_enqueue_burst_t enqueue_burst; + /**< Pointer to PMD enqueue burst function. */ + event_dequeue_t dequeue; + /**< Pointer to PMD dequeue function. */ + event_dequeue_burst_t dequeue_burst; + /**< Pointer to PMD dequeue burst function. */ + + struct rte_eventdev_data *data; + /**< Pointer to device data */ + const struct rte_eventdev_ops *dev_ops; + /**< Functions exported by PMD */ + struct rte_device *dev; + /**< Device info. supplied by probing */ + const struct rte_eventdev_driver *driver; + /**< Driver for this device */ + + RTE_STD_C11 + uint8_t attached : 1; + /**< Flag indicating the device is attached */ +} __rte_cache_aligned; + +extern struct rte_eventdev *rte_eventdevs; +/** @internal The pool of rte_eventdev structures. */ + + +/** + * Schedule one or more events in the event dev. + * + * An event dev implementation may define this is a NOOP, for instance if + * the event dev performs its scheduling in hardware. + * + * @param dev_id + * The identifier of the device. + */ +static inline void +rte_event_schedule(uint8_t dev_id) +{ + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + if (*dev->schedule) + (*dev->schedule)(dev); +} + +/** + * Enqueue a burst of events objects or an event object supplied in *rte_event* + * structure on an event device designated by its *dev_id* through the event + * port specified by *port_id*. Each event object specifies the event queue on + * which it will be enqueued. + * + * The *nb_events* parameter is the number of event objects to enqueue which are + * supplied in the *ev* array of *rte_event* structure. + * + * The rte_event_enqueue_burst() function returns the number of + * events objects it actually enqueued. A return value equal to *nb_events* + * means that all event objects have been enqueued. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The identifier of the event port. + * @param ev + * Points to an array of *nb_events* objects of type *rte_event* structure + * which contain the event object enqueue operations to be processed. + * @param nb_events + * The number of event objects to enqueue, typically number of + * rte_event_port_enqueue_depth() available for this port. + * + * @return + * The number of event objects actually enqueued on the event device. The + * return value can be less than the value of the *nb_events* parameter when + * the event devices queue is full or if invalid parameters are specified in a + * *rte_event*. If the return value is less than *nb_events*, the remaining + * events at the end of ev[] are not consumed and the caller has to take care + * of them, and rte_errno is set accordingly. Possible errno values include: + * - -EINVAL The port ID is invalid, device ID is invalid, an event's queue + * ID is invalid, or an event's sched type doesn't match the + * capabilities of the destination queue. + * - -ENOSPC The event port was backpressured and unable to enqueue + * one or more events. This error code is only applicable to + * closed systems. + * @see rte_event_port_enqueue_depth() + */ +static inline uint16_t +rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id, + const struct rte_event ev[], uint16_t nb_events) +{ + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + if (dev_id >= RTE_EVENT_MAX_DEVS || !rte_eventdevs[dev_id].attached) { + rte_errno = -EINVAL; + return 0; + } + + if (port_id >= dev->data->nb_ports) { + rte_errno = -EINVAL; + return 0; + } +#endif + + /* + * Allow zero cost non burst mode routine invocation if application + * requests nb_events as const one + */ + if (nb_events == 1) + return (*dev->enqueue)( + dev->data->ports[port_id], ev); + else + return (*dev->enqueue_burst)( + dev->data->ports[port_id], ev, nb_events); +} + +/** + * Converts nanoseconds to *timeout_ticks* value for rte_event_dequeue_burst() + * + * If the device is configured with RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT flag + * then application can use this function to convert timeout value in + * nanoseconds to implementations specific timeout value supplied in + * rte_event_dequeue_burst() + * + * @param dev_id + * The identifier of the device. + * @param ns + * Wait time in nanosecond + * @param[out] timeout_ticks + * Value for the *timeout_ticks* parameter in rte_event_dequeue_burst() + * + * @return + * - 0 on success. + * - -ENOTSUP if the device doesn't support timeouts + * - -EINVAL if *dev_id* is invalid or *timeout_ticks* is NULL + * - other values < 0 on failure. + * + * @see rte_event_dequeue_burst(), RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT + * @see rte_event_dev_configure() + * + */ +int +rte_event_dequeue_timeout_ticks(uint8_t dev_id, uint64_t ns, + uint64_t *timeout_ticks); + +/** + * Dequeue a burst of events objects or an event object from the event port + * designated by its *event_port_id*, on an event device designated + * by its *dev_id*. + * + * rte_event_dequeue_burst() does not dictate the specifics of scheduling + * algorithm as each eventdev driver may have different criteria to schedule + * an event. However, in general, from an application perspective scheduler may + * use the following scheme to dispatch an event to the port. + * + * 1) Selection of event queue based on + * a) The list of event queues are linked to the event port. + * b) If the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability then event + * queue selection from list is based on event queue priority relative to + * other event queue supplied as *priority* in rte_event_queue_setup() + * c) If the device has RTE_EVENT_DEV_CAP_EVENT_QOS capability then event + * queue selection from the list is based on event priority supplied as + * *priority* in rte_event_enqueue_burst() + * 2) Selection of event + * a) The number of flows available in selected event queue. + * b) Schedule type method associated with the event + * + * The *nb_events* parameter is the maximum number of event objects to dequeue + * which are returned in the *ev* array of *rte_event* structure. + * + * The rte_event_dequeue_burst() function returns the number of events objects + * it actually dequeued. A return value equal to *nb_events* means that all + * event objects have been dequeued. + * + * The number of events dequeued is the number of scheduler contexts held by + * this port. These contexts are automatically released in the next + * rte_event_dequeue_burst() invocation, or invoking rte_event_enqueue_burst() + * with RTE_EVENT_OP_RELEASE operation can be used to release the + * contexts early. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The identifier of the event port. + * @param[out] ev + * Points to an array of *nb_events* objects of type *rte_event* structure + * for output to be populated with the dequeued event objects. + * @param nb_events + * The maximum number of event objects to dequeue, typically number of + * rte_event_port_dequeue_depth() available for this port. + * + * @param timeout_ticks + * - 0 no-wait, returns immediately if there is no event. + * - >0 wait for the event, if the device is configured with + * RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT then this function will wait until + * at least one event is available or *timeout_ticks* time. + * if the device is not configured with RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT + * then this function will wait until the event available or + * *dequeue_timeout_ns* ns which was previously supplied to + * rte_event_dev_configure() + * + * @return + * The number of event objects actually dequeued from the port. The return + * value can be less than the value of the *nb_events* parameter when the + * event port's queue is not full. + * + * @see rte_event_port_dequeue_depth() + */ +static inline uint16_t +rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[], + uint16_t nb_events, uint64_t timeout_ticks) +{ + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + if (dev_id >= RTE_EVENT_MAX_DEVS || !rte_eventdevs[dev_id].attached) { + rte_errno = -EINVAL; + return 0; + } + + if (port_id >= dev->data->nb_ports) { + rte_errno = -EINVAL; + return 0; + } +#endif + + /* + * Allow zero cost non burst mode routine invocation if application + * requests nb_events as const one + */ + if (nb_events == 1) + return (*dev->dequeue)( + dev->data->ports[port_id], ev, timeout_ticks); + else + return (*dev->dequeue_burst)( + dev->data->ports[port_id], ev, nb_events, + timeout_ticks); +} + +/** + * Link multiple source event queues supplied in *queues* to the destination + * event port designated by its *port_id* with associated service priority + * supplied in *priorities* on the event device designated by its *dev_id*. + * + * The link establishment shall enable the event port *port_id* from + * receiving events from the specified event queue(s) supplied in *queues* + * + * An event queue may link to one or more event ports. + * The number of links can be established from an event queue to event port is + * implementation defined. + * + * Event queue(s) to event port link establishment can be changed at runtime + * without re-configuring the device to support scaling and to reduce the + * latency of critical work by establishing the link with more event ports + * at runtime. + * + * @param dev_id + * The identifier of the device. + * + * @param port_id + * Event port identifier to select the destination port to link. + * + * @param queues + * Points to an array of *nb_links* event queues to be linked + * to the event port. + * NULL value is allowed, in which case this function links all the configured + * event queues *nb_event_queues* which previously supplied to + * rte_event_dev_configure() to the event port *port_id* + * + * @param priorities + * Points to an array of *nb_links* service priorities associated with each + * event queue link to event port. + * The priority defines the event port's servicing priority for + * event queue, which may be ignored by an implementation. + * The requested priority should in the range of + * [RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST]. + * The implementation shall normalize the requested priority to + * implementation supported priority value. + * NULL value is allowed, in which case this function links the event queues + * with RTE_EVENT_DEV_PRIORITY_NORMAL servicing priority + * + * @param nb_links + * The number of links to establish. This parameter is ignored if queues is + * NULL. + * + * @return + * The number of links actually established. The return value can be less than + * the value of the *nb_links* parameter when the implementation has the + * limitation on specific queue to port link establishment or if invalid + * parameters are specified in *queues* + * If the return value is less than *nb_links*, the remaining links at the end + * of link[] are not established, and the caller has to take care of them. + * If return value is less than *nb_links* then implementation shall update the + * rte_errno accordingly, Possible rte_errno values are + * (-EDQUOT) Quota exceeded(Application tried to link the queue configured with + * RTE_EVENT_QUEUE_CFG_SINGLE_LINK to more than one event ports) + * (-EINVAL) Invalid parameter + * + */ +int +rte_event_port_link(uint8_t dev_id, uint8_t port_id, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links); + +/** + * Unlink multiple source event queues supplied in *queues* from the destination + * event port designated by its *port_id* on the event device designated + * by its *dev_id*. + * + * The unlink establishment shall disable the event port *port_id* from + * receiving events from the specified event queue *queue_id* + * + * Event queue(s) to event port unlink establishment can be changed at runtime + * without re-configuring the device. + * + * @param dev_id + * The identifier of the device. + * + * @param port_id + * Event port identifier to select the destination port to unlink. + * + * @param queues + * Points to an array of *nb_unlinks* event queues to be unlinked + * from the event port. + * NULL value is allowed, in which case this function unlinks all the + * event queue(s) from the event port *port_id*. + * + * @param nb_unlinks + * The number of unlinks to establish. This parameter is ignored if queues is + * NULL. + * + * @return + * The number of unlinks actually established. The return value can be less + * than the value of the *nb_unlinks* parameter when the implementation has the + * limitation on specific queue to port unlink establishment or + * if invalid parameters are specified. + * If the return value is less than *nb_unlinks*, the remaining queues at the + * end of queues[] are not established, and the caller has to take care of them. + * If return value is less than *nb_unlinks* then implementation shall update + * the rte_errno accordingly, Possible rte_errno values are + * (-EINVAL) Invalid parameter + * + */ +int +rte_event_port_unlink(uint8_t dev_id, uint8_t port_id, + uint8_t queues[], uint16_t nb_unlinks); + +/** + * Retrieve the list of source event queues and its associated service priority + * linked to the destination event port designated by its *port_id* + * on the event device designated by its *dev_id*. + * + * @param dev_id + * The identifier of the device. + * + * @param port_id + * Event port identifier. + * + * @param[out] queues + * Points to an array of *queues* for output. + * The caller has to allocate *RTE_EVENT_MAX_QUEUES_PER_DEV* bytes to + * store the event queue(s) linked with event port *port_id* + * + * @param[out] priorities + * Points to an array of *priorities* for output. + * The caller has to allocate *RTE_EVENT_MAX_QUEUES_PER_DEV* bytes to + * store the service priority associated with each event queue linked + * + * @return + * The number of links established on the event port designated by its + * *port_id*. + * - <0 on failure. + * + */ +int +rte_event_port_links_get(uint8_t dev_id, uint8_t port_id, + uint8_t queues[], uint8_t priorities[]); + +/** + * Dump internal information about *dev_id* to the FILE* provided in *f*. + * + * @param dev_id + * The identifier of the device. + * + * @param f + * A pointer to a file for output + * + * @return + * - 0: on success + * - <0: on failure. + */ +int +rte_event_dev_dump(uint8_t dev_id, FILE *f); + +/** Maximum name length for extended statistics counters */ +#define RTE_EVENT_DEV_XSTATS_NAME_SIZE 64 + +/** + * Selects the component of the eventdev to retrieve statistics from. + */ +enum rte_event_dev_xstats_mode { + RTE_EVENT_DEV_XSTATS_DEVICE, + RTE_EVENT_DEV_XSTATS_PORT, + RTE_EVENT_DEV_XSTATS_QUEUE, +}; + +/** + * A name-key lookup element for extended statistics. + * + * This structure is used to map between names and ID numbers + * for extended ethdev statistics. + */ +struct rte_event_dev_xstats_name { + char name[RTE_EVENT_DEV_XSTATS_NAME_SIZE]; +}; + +/** + * Retrieve names of extended statistics of an event device. + * + * @param dev_id + * The identifier of the event device. + * @param mode + * The mode of statistics to retrieve. Choices include the device statistics, + * port statistics or queue statistics. + * @param queue_port_id + * Used to specify the port or queue number in queue or port mode, and is + * ignored in device mode. + * @param[out] xstats_names + * Block of memory to insert names into. Must be at least size in capacity. + * If set to NULL, function returns required capacity. + * @param[out] ids + * Block of memory to insert ids into. Must be at least size in capacity. + * If set to NULL, function returns required capacity. The id values returned + * can be passed to *rte_event_dev_xstats_get* to select statistics. + * @param size + * Capacity of xstats_names (number of names). + * @return + * - positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - negative value on error: + * -ENODEV for invalid *dev_id* + * -EINVAL for invalid mode, queue port or id parameters + * -ENOTSUP if the device doesn't support this function. + */ +int +rte_event_dev_xstats_names_get(uint8_t dev_id, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, + unsigned int size); + +/** + * Retrieve extended statistics of an event device. + * + * @param dev_id + * The identifier of the device. + * @param mode + * The mode of statistics to retrieve. Choices include the device statistics, + * port statistics or queue statistics. + * @param queue_port_id + * Used to specify the port or queue number in queue or port mode, and is + * ignored in device mode. + * @param ids + * The id numbers of the stats to get. The ids can be got from the stat + * position in the stat list from rte_event_dev_get_xstats_names(), or + * by using rte_eventdev_get_xstats_by_name() + * @param[out] values + * The values for each stats request by ID. + * @param n + * The number of stats requested + * @return + * - positive value: number of stat entries filled into the values array + * - negative value on error: + * -ENODEV for invalid *dev_id* + * -EINVAL for invalid mode, queue port or id parameters + * -ENOTSUP if the device doesn't support this function. + */ +int +rte_event_dev_xstats_get(uint8_t dev_id, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + const unsigned int ids[], + uint64_t values[], unsigned int n); + +/** + * Retrieve the value of a single stat by requesting it by name. + * + * @param dev_id + * The identifier of the device + * @param name + * The stat name to retrieve + * @param[out] id + * If non-NULL, the numerical id of the stat will be returned, so that further + * requests for the stat can be got using rte_eventdev_xstats_get, which will + * be faster as it doesn't need to scan a list of names for the stat. + * If the stat cannot be found, the id returned will be (unsigned)-1. + * @return + * - positive value or zero: the stat value + * - negative value: -EINVAL if stat not found, -ENOTSUP if not supported. + */ +uint64_t +rte_event_dev_xstats_by_name_get(uint8_t dev_id, const char *name, + unsigned int *id); + +/** + * Reset the values of the xstats of the selected component in the device. + * + * @param dev_id + * The identifier of the device + * @param mode + * The mode of the statistics to reset. Choose from device, queue or port. + * @param queue_port_id + * The queue or port to reset. 0 and positive values select ports and queues, + * while -1 indicates all ports or queues. + * @param ids + * Selects specific statistics to be reset. When NULL, all statistics selected + * by *mode* will be reset. If non-NULL, must point to array of at least + * *nb_ids* size. + * @param nb_ids + * The number of ids available from the *ids* array. Ignored when ids is NULL. + * @return + * - zero: successfully reset the statistics to zero + * - negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. + */ +int +rte_event_dev_xstats_reset(uint8_t dev_id, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, + const uint32_t ids[], + uint32_t nb_ids); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EVENTDEV_H_ */ diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h new file mode 100644 index 00000000..4005b3c9 --- /dev/null +++ b/lib/librte_eventdev/rte_eventdev_pmd.h @@ -0,0 +1,599 @@ +/* + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EVENTDEV_PMD_H_ +#define _RTE_EVENTDEV_PMD_H_ + +/** @file + * RTE Event PMD APIs + * + * @note + * These API are from event PMD only and user applications should not call + * them directly. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <string.h> + +#include <rte_dev.h> +#include <rte_pci.h> +#include <rte_malloc.h> +#include <rte_log.h> +#include <rte_common.h> + +#include "rte_eventdev.h" + +/* Logging Macros */ +#define RTE_EDEV_LOG_ERR(...) \ + RTE_LOG(ERR, EVENTDEV, \ + RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG +#define RTE_EDEV_LOG_DEBUG(...) \ + RTE_LOG(DEBUG, EVENTDEV, \ + RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) +#else +#define RTE_EDEV_LOG_DEBUG(...) (void)0 +#endif + +/* Macros to check for valid device */ +#define RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, retval) do { \ + if (!rte_event_pmd_is_valid_dev((dev_id))) { \ + RTE_EDEV_LOG_ERR("Invalid dev_id=%d\n", dev_id); \ + return retval; \ + } \ +} while (0) + +#define RTE_EVENTDEV_VALID_DEVID_OR_RET(dev_id) do { \ + if (!rte_event_pmd_is_valid_dev((dev_id))) { \ + RTE_EDEV_LOG_ERR("Invalid dev_id=%d\n", dev_id); \ + return; \ + } \ +} while (0) + +#define RTE_EVENTDEV_DETACHED (0) +#define RTE_EVENTDEV_ATTACHED (1) + +/** + * Initialisation function of a event driver invoked for each matching + * event PCI device detected during the PCI probing phase. + * + * @param dev + * The dev pointer is the address of the *rte_eventdev* structure associated + * with the matching device and which has been [automatically] allocated in + * the *rte_event_devices* array. + * + * @return + * - 0: Success, the device is properly initialised by the driver. + * In particular, the driver MUST have set up the *dev_ops* pointer + * of the *dev* structure. + * - <0: Error code of the device initialisation failure. + */ +typedef int (*eventdev_init_t)(struct rte_eventdev *dev); + +/** + * Finalisation function of a driver invoked for each matching + * PCI device detected during the PCI closing phase. + * + * @param dev + * The dev pointer is the address of the *rte_eventdev* structure associated + * with the matching device and which has been [automatically] allocated in + * the *rte_event_devices* array. + * + * @return + * - 0: Success, the device is properly finalised by the driver. + * In particular, the driver MUST free the *dev_ops* pointer + * of the *dev* structure. + * - <0: Error code of the device initialisation failure. + */ +typedef int (*eventdev_uninit_t)(struct rte_eventdev *dev); + +/** + * The structure associated with a PMD driver. + * + * Each driver acts as a PCI driver and is represented by a generic + * *event_driver* structure that holds: + * + * - An *rte_pci_driver* structure (which must be the first field). + * + * - The *eventdev_init* function invoked for each matching PCI device. + * + * - The size of the private data to allocate for each matching device. + */ +struct rte_eventdev_driver { + struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ + unsigned int dev_private_size; /**< Size of device private data. */ + + eventdev_init_t eventdev_init; /**< Device init function. */ + eventdev_uninit_t eventdev_uninit; /**< Device uninit function. */ +}; + +/** Global structure used for maintaining state of allocated event devices */ +struct rte_eventdev_global { + uint8_t nb_devs; /**< Number of devices found */ +}; + +extern struct rte_eventdev_global *rte_eventdev_globals; +/** Pointer to global event devices data structure. */ +extern struct rte_eventdev *rte_eventdevs; +/** The pool of rte_eventdev structures. */ + +/** + * Get the rte_eventdev structure device pointer for the named device. + * + * @param name + * device name to select the device structure. + * + * @return + * - The rte_eventdev structure pointer for the given device ID. + */ +static inline struct rte_eventdev * +rte_event_pmd_get_named_dev(const char *name) +{ + struct rte_eventdev *dev; + unsigned int i; + + if (name == NULL) + return NULL; + + for (i = 0; i < RTE_EVENT_MAX_DEVS; i++) { + dev = &rte_eventdevs[i]; + if ((dev->attached == RTE_EVENTDEV_ATTACHED) && + (strcmp(dev->data->name, name) == 0)) + return dev; + } + + return NULL; +} + +/** + * Validate if the event device index is valid attached event device. + * + * @param dev_id + * Event device index. + * + * @return + * - If the device index is valid (1) or not (0). + */ +static inline unsigned +rte_event_pmd_is_valid_dev(uint8_t dev_id) +{ + struct rte_eventdev *dev; + + if (dev_id >= RTE_EVENT_MAX_DEVS) + return 0; + + dev = &rte_eventdevs[dev_id]; + if (dev->attached != RTE_EVENTDEV_ATTACHED) + return 0; + else + return 1; +} + +/** + * Definitions of all functions exported by a driver through the + * the generic structure of type *event_dev_ops* supplied in the + * *rte_eventdev* structure associated with a device. + */ + +/** + * Get device information of a device. + * + * @param dev + * Event device pointer + * @param dev_info + * Event device information structure + * + * @return + * Returns 0 on success + */ +typedef void (*eventdev_info_get_t)(struct rte_eventdev *dev, + struct rte_event_dev_info *dev_info); + +/** + * Configure a device. + * + * @param dev + * Event device pointer + * + * @return + * Returns 0 on success + */ +typedef int (*eventdev_configure_t)(const struct rte_eventdev *dev); + +/** + * Start a configured device. + * + * @param dev + * Event device pointer + * + * @return + * Returns 0 on success + */ +typedef int (*eventdev_start_t)(struct rte_eventdev *dev); + +/** + * Stop a configured device. + * + * @param dev + * Event device pointer + */ +typedef void (*eventdev_stop_t)(struct rte_eventdev *dev); + +/** + * Close a configured device. + * + * @param dev + * Event device pointer + * + * @return + * - 0 on success + * - (-EAGAIN) if can't close as device is busy + */ +typedef int (*eventdev_close_t)(struct rte_eventdev *dev); + +/** + * Retrieve the default event queue configuration. + * + * @param dev + * Event device pointer + * @param queue_id + * Event queue index + * @param[out] queue_conf + * Event queue configuration structure + * + */ +typedef void (*eventdev_queue_default_conf_get_t)(struct rte_eventdev *dev, + uint8_t queue_id, struct rte_event_queue_conf *queue_conf); + +/** + * Setup an event queue. + * + * @param dev + * Event device pointer + * @param queue_id + * Event queue index + * @param queue_conf + * Event queue configuration structure + * + * @return + * Returns 0 on success. + */ +typedef int (*eventdev_queue_setup_t)(struct rte_eventdev *dev, + uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf); + +/** + * Release resources allocated by given event queue. + * + * @param dev + * Event device pointer + * @param queue_id + * Event queue index + * + */ +typedef void (*eventdev_queue_release_t)(struct rte_eventdev *dev, + uint8_t queue_id); + +/** + * Retrieve the default event port configuration. + * + * @param dev + * Event device pointer + * @param port_id + * Event port index + * @param[out] port_conf + * Event port configuration structure + * + */ +typedef void (*eventdev_port_default_conf_get_t)(struct rte_eventdev *dev, + uint8_t port_id, struct rte_event_port_conf *port_conf); + +/** + * Setup an event port. + * + * @param dev + * Event device pointer + * @param port_id + * Event port index + * @param port_conf + * Event port configuration structure + * + * @return + * Returns 0 on success. + */ +typedef int (*eventdev_port_setup_t)(struct rte_eventdev *dev, + uint8_t port_id, + const struct rte_event_port_conf *port_conf); + +/** + * Release memory resources allocated by given event port. + * + * @param port + * Event port pointer + * + */ +typedef void (*eventdev_port_release_t)(void *port); + +/** + * Link multiple source event queues to destination event port. + * + * @param dev + * Event device pointer + * @param port + * Event port pointer + * @param link + * Points to an array of *nb_links* event queues to be linked + * to the event port. + * @param priorities + * Points to an array of *nb_links* service priorities associated with each + * event queue link to event port. + * @param nb_links + * The number of links to establish + * + * @return + * Returns 0 on success. + * + */ +typedef int (*eventdev_port_link_t)(struct rte_eventdev *dev, void *port, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links); + +/** + * Unlink multiple source event queues from destination event port. + * + * @param dev + * Event device pointer + * @param port + * Event port pointer + * @param queues + * An array of *nb_unlinks* event queues to be unlinked from the event port. + * @param nb_unlinks + * The number of unlinks to establish + * + * @return + * Returns 0 on success. + * + */ +typedef int (*eventdev_port_unlink_t)(struct rte_eventdev *dev, void *port, + uint8_t queues[], uint16_t nb_unlinks); + +/** + * Converts nanoseconds to *timeout_ticks* value for rte_event_dequeue() + * + * @param dev + * Event device pointer + * @param ns + * Wait time in nanosecond + * @param[out] timeout_ticks + * Value for the *timeout_ticks* parameter in rte_event_dequeue() function + * + * @return + * Returns 0 on success. + * + */ +typedef int (*eventdev_dequeue_timeout_ticks_t)(struct rte_eventdev *dev, + uint64_t ns, uint64_t *timeout_ticks); + +/** + * Dump internal information + * + * @param dev + * Event device pointer + * @param f + * A pointer to a file for output + * + */ +typedef void (*eventdev_dump_t)(struct rte_eventdev *dev, FILE *f); + +/** + * Retrieve a set of statistics from device + * + * @param dev + * Event device pointer + * @param ids + * The stat ids to retrieve + * @param values + * The returned stat values + * @param n + * The number of id values and entries in the values array + * @return + * The number of stat values successfully filled into the values array + */ +typedef int (*eventdev_xstats_get_t)(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n); + +/** + * Resets the statistic values in xstats for the device, based on mode. + */ +typedef int (*eventdev_xstats_reset_t)(struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, + const uint32_t ids[], + uint32_t nb_ids); + +/** + * Get names of extended stats of an event device + * + * @param dev + * Event device pointer + * @param xstats_names + * Array of name values to be filled in + * @param size + * Number of values in the xstats_names array + * @return + * When size >= the number of stats, return the number of stat values filled + * into the array. + * When size < the number of available stats, return the number of stats + * values, and do not fill in any data into xstats_names. + */ +typedef int (*eventdev_xstats_get_names_t)(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size); + +/** + * Get value of one stats and optionally return its id + * + * @param dev + * Event device pointer + * @param name + * The name of the stat to retrieve + * @param id + * Pointer to an unsigned int where we store the stat-id for future reference. + * This pointer may be null if the id is not required. + * @return + * The value of the stat, or (uint64_t)-1 if the stat is not found. + * If the stat is not found, the id value will be returned as (unsigned)-1, + * if id pointer is non-NULL + */ +typedef uint64_t (*eventdev_xstats_get_by_name)(const struct rte_eventdev *dev, + const char *name, unsigned int *id); + +/** Event device operations function pointer table */ +struct rte_eventdev_ops { + eventdev_info_get_t dev_infos_get; /**< Get device info. */ + eventdev_configure_t dev_configure; /**< Configure device. */ + eventdev_start_t dev_start; /**< Start device. */ + eventdev_stop_t dev_stop; /**< Stop device. */ + eventdev_close_t dev_close; /**< Close device. */ + + eventdev_queue_default_conf_get_t queue_def_conf; + /**< Get default queue configuration. */ + eventdev_queue_setup_t queue_setup; + /**< Set up an event queue. */ + eventdev_queue_release_t queue_release; + /**< Release an event queue. */ + + eventdev_port_default_conf_get_t port_def_conf; + /**< Get default port configuration. */ + eventdev_port_setup_t port_setup; + /**< Set up an event port. */ + eventdev_port_release_t port_release; + /**< Release an event port. */ + + eventdev_port_link_t port_link; + /**< Link event queues to an event port. */ + eventdev_port_unlink_t port_unlink; + /**< Unlink event queues from an event port. */ + eventdev_dequeue_timeout_ticks_t timeout_ticks; + /**< Converts ns to *timeout_ticks* value for rte_event_dequeue() */ + eventdev_dump_t dump; + /* Dump internal information */ + + eventdev_xstats_get_t xstats_get; + /**< Get extended device statistics. */ + eventdev_xstats_get_names_t xstats_get_names; + /**< Get names of extended stats. */ + eventdev_xstats_get_by_name xstats_get_by_name; + /**< Get one value by name. */ + eventdev_xstats_reset_t xstats_reset; + /**< Reset the statistics values in xstats. */ +}; + +/** + * Allocates a new eventdev slot for an event device and returns the pointer + * to that slot for the driver to use. + * + * @param name + * Unique identifier name for each device + * @param socket_id + * Socket to allocate resources on. + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_eventdev * +rte_event_pmd_allocate(const char *name, int socket_id); + +/** + * Release the specified eventdev device. + * + * @param eventdev + * The *eventdev* pointer is the address of the *rte_eventdev* structure. + * @return + * - 0 on success, negative on error + */ +int +rte_event_pmd_release(struct rte_eventdev *eventdev); + +/** + * Creates a new virtual event device and returns the pointer to that device. + * + * @param name + * PMD type name + * @param dev_private_size + * Size of event PMDs private data + * @param socket_id + * Socket to allocate resources on. + * + * @return + * - Eventdev pointer if device is successfully created. + * - NULL if device cannot be created. + */ +struct rte_eventdev * +rte_event_pmd_vdev_init(const char *name, size_t dev_private_size, + int socket_id); + +/** + * Destroy the given virtual event device + * + * @param name + * PMD type name + * @return + * - 0 on success, negative on error + */ +int +rte_event_pmd_vdev_uninit(const char *name); + +/** + * Wrapper for use by pci drivers as a .probe function to attach to a event + * interface. + */ +int rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev); + +/** + * Wrapper for use by pci drivers as a .remove function to detach a event + * interface. + */ +int rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EVENTDEV_PMD_H_ */ diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map new file mode 100644 index 00000000..1fa6b333 --- /dev/null +++ b/lib/librte_eventdev/rte_eventdev_version.map @@ -0,0 +1,44 @@ +DPDK_17.05 { + global: + + rte_eventdevs; + + rte_event_dev_count; + rte_event_dev_get_dev_id; + rte_event_dev_socket_id; + rte_event_dev_info_get; + rte_event_dev_configure; + rte_event_dev_start; + rte_event_dev_stop; + rte_event_dev_close; + rte_event_dev_dump; + rte_event_dev_xstats_by_name_get; + rte_event_dev_xstats_get; + rte_event_dev_xstats_names_get; + rte_event_dev_xstats_reset; + + rte_event_port_default_conf_get; + rte_event_port_setup; + rte_event_port_dequeue_depth; + rte_event_port_enqueue_depth; + rte_event_port_count; + rte_event_port_link; + rte_event_port_unlink; + rte_event_port_links_get; + + rte_event_queue_default_conf_get; + rte_event_queue_setup; + rte_event_queue_count; + rte_event_queue_priority; + + rte_event_dequeue_timeout_ticks; + + rte_event_pmd_allocate; + rte_event_pmd_release; + rte_event_pmd_vdev_init; + rte_event_pmd_vdev_uninit; + rte_event_pmd_pci_probe; + rte_event_pmd_pci_remove; + + local: *; +}; diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index bb1ea990..d856aa26 100644 --- a/lib/librte_hash/Makefile +++ b/lib/librte_hash/Makefile @@ -55,7 +55,4 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h -# this lib needs eal and ring -DEPDIRS-$(CONFIG_RTE_LIBRTE_HASH) += lib/librte_eal lib/librte_ring - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h index 7dd6334e..2abe42ab 100644 --- a/lib/librte_hash/rte_crc_arm64.h +++ b/lib/librte_hash/rte_crc_arm64.h @@ -110,8 +110,10 @@ rte_hash_crc_set_alg(uint8_t alg) case CRC32_ARM64: if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) alg = CRC32_SW; + /* fall-through */ case CRC32_SW: crc32_alg = alg; + /* fall-through */ default: break; } diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 51db006a..645c0cfa 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -536,7 +536,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, if (cached_free_slots->len == 0) { /* Need to get another burst of free slots from global ring */ n_slots = rte_ring_mc_dequeue_burst(h->free_slots, - cached_free_slots->objs, LCORE_CACHE_SIZE); + cached_free_slots->objs, + LCORE_CACHE_SIZE, NULL); if (n_slots == 0) return -ENOSPC; @@ -808,7 +809,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) /* Need to enqueue the free slots in global ring. */ n_slots = rte_ring_mp_enqueue_burst(h->free_slots, cached_free_slots->objs, - LCORE_CACHE_SIZE); + LCORE_CACHE_SIZE, NULL); cached_free_slots->len -= n_slots; } /* Put index of new free slot in cache. */ diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index 63e74aa4..0f485b85 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -476,9 +476,15 @@ rte_hash_crc_set_alg(uint8_t alg) case CRC32_SSE42_x64: if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) alg = CRC32_SSE42; +#if __GNUC__ >= 7 + __attribute__ ((fallthrough)); +#endif case CRC32_SSE42: if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) alg = CRC32_SW; +#if __GNUC__ >= 7 + __attribute__ ((fallthrough)); +#endif #endif case CRC32_SW: crc32_alg = alg; diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile index 43f8b1e3..4e693bf8 100644 --- a/lib/librte_ip_frag/Makefile +++ b/lib/librte_ip_frag/Makefile @@ -52,10 +52,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h -DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_ether -DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_hash -DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile index 136a448e..561a0678 100644 --- a/lib/librte_jobstats/Makefile +++ b/lib/librte_jobstats/Makefile @@ -47,7 +47,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_JOBSTATS) := rte_jobstats.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_JOBSTATS)-include := rte_jobstats.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile index 09474461..70f1ca8f 100644 --- a/lib/librte_kni/Makefile +++ b/lib/librte_kni/Makefile @@ -46,9 +46,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_KNI) := rte_kni.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_KNI)-include := rte_kni.h -DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_ether - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index a80cefd2..c3f9208c 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -451,17 +451,35 @@ kni_free_fifo(struct rte_kni_fifo *fifo) } while (ret); } +static void * +va2pa(struct rte_mbuf *m) +{ + return (void *)((unsigned long)m - + ((unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr)); +} + +static void +obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj, + unsigned obj_idx __rte_unused) +{ + struct rte_mbuf *m = obj; + void *mbuf_phys = opaque; + + if (va2pa(m) == mbuf_phys) + rte_pktmbuf_free(m); +} + static void -kni_free_fifo_phy(struct rte_kni_fifo *fifo) +kni_free_fifo_phy(struct rte_mempool *mp, struct rte_kni_fifo *fifo) { void *mbuf_phys; int ret; do { ret = kni_fifo_get(fifo, &mbuf_phys, 1); - /* - * TODO: free mbufs - */ + if (ret) + rte_mempool_obj_iter(mp, obj_free, mbuf_phys); } while (ret); } @@ -470,6 +488,7 @@ rte_kni_release(struct rte_kni *kni) { struct rte_kni_device_info dev_info; uint32_t slot_id; + uint32_t retry = 5; if (!kni || !kni->in_use) return -1; @@ -481,9 +500,16 @@ rte_kni_release(struct rte_kni *kni) } /* mbufs in all fifo should be released, except request/response */ + + /* wait until all rxq packets processed by kernel */ + while (kni_fifo_count(kni->rx_q) && retry--) + usleep(1000); + + if (kni_fifo_count(kni->rx_q)) + RTE_LOG(ERR, KNI, "Fail to free all Rx-q items\n"); + + kni_free_fifo_phy(kni->pktmbuf_pool, kni->alloc_q); kni_free_fifo(kni->tx_q); - kni_free_fifo_phy(kni->rx_q); - kni_free_fifo_phy(kni->alloc_q); kni_free_fifo(kni->free_q); slot_id = kni->slot_id; @@ -549,14 +575,6 @@ rte_kni_handle_request(struct rte_kni *kni) return 0; } -static void * -va2pa(struct rte_mbuf *m) -{ - return (void *)((unsigned long)m - - ((unsigned long)m->buf_addr - - (unsigned long)m->buf_physaddr)); -} - unsigned rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num) { diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h index 8cb85873..c7cd5c26 100644 --- a/lib/librte_kni/rte_kni_fifo.h +++ b/lib/librte_kni/rte_kni_fifo.h @@ -91,3 +91,12 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) fifo->read = new_read; return i; } + +/** + * Get the num of elements in the fifo + */ +static inline uint32_t +kni_fifo_count(struct rte_kni_fifo *fifo) +{ + return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); +} diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile index 87b09f20..564dd310 100644 --- a/lib/librte_kvargs/Makefile +++ b/lib/librte_kvargs/Makefile @@ -49,7 +49,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_KVARGS) := rte_kvargs.c INCS := rte_kvargs.h SYMLINK-$(CONFIG_RTE_LIBRTE_KVARGS)-include := $(INCS) -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c index 8d56abd4..854ac83f 100644 --- a/lib/librte_kvargs/rte_kvargs.c +++ b/lib/librte_kvargs/rte_kvargs.c @@ -92,9 +92,9 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) * into a list of valid keys. */ static int -is_valid_key(const char *valid[], const char *key_match) +is_valid_key(const char * const valid[], const char *key_match) { - const char **valid_ptr; + const char * const *valid_ptr; for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) { if (strcmp(key_match, *valid_ptr) == 0) @@ -109,7 +109,7 @@ is_valid_key(const char *valid[], const char *key_match) */ static int check_for_valid_keys(struct rte_kvargs *kvlist, - const char *valid[]) + const char * const valid[]) { unsigned i, ret; struct rte_kvargs_pair *pair; @@ -187,7 +187,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist) * check if only valid keys were used. */ struct rte_kvargs * -rte_kvargs_parse(const char *args, const char *valid_keys[]) +rte_kvargs_parse(const char *args, const char * const valid_keys[]) { struct rte_kvargs *kvlist; diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h index ae9ae79f..5821c726 100644 --- a/lib/librte_kvargs/rte_kvargs.h +++ b/lib/librte_kvargs/rte_kvargs.h @@ -97,7 +97,8 @@ struct rte_kvargs { * - A pointer to an allocated rte_kvargs structure on success * - NULL on error */ -struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]); +struct rte_kvargs *rte_kvargs_parse(const char *args, + const char *const valid_keys[]); /** * Free a rte_kvargs structure diff --git a/lib/librte_latencystats/Makefile b/lib/librte_latencystats/Makefile new file mode 100644 index 00000000..eaacbb73 --- /dev/null +++ b/lib/librte_latencystats/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_latencystats.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +LDLIBS += -lm +LDLIBS += -lpthread + +EXPORT_MAP := rte_latencystats_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) := rte_latencystats.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_LATENCY_STATS)-include := rte_latencystats.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c new file mode 100644 index 00000000..ce029a12 --- /dev/null +++ b/lib/librte_latencystats/rte_latencystats.c @@ -0,0 +1,360 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <sys/types.h> +#include <stdbool.h> +#include <math.h> + +#include <rte_mbuf.h> +#include <rte_log.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_metrics.h> +#include <rte_memzone.h> +#include <rte_lcore.h> + +#include "rte_latencystats.h" + +/** Nano seconds per second */ +#define NS_PER_SEC 1E9 + +/** Clock cycles per nano second */ +static uint64_t +latencystat_cycles_per_ns(void) +{ + return rte_get_timer_hz() / NS_PER_SEC; +} + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_LATENCY_STATS RTE_LOGTYPE_USER1 + +static const char *MZ_RTE_LATENCY_STATS = "rte_latencystats"; +static int latency_stats_index; +static uint64_t samp_intvl; +static uint64_t timer_tsc; +static uint64_t prev_tsc; + +struct rte_latency_stats { + float min_latency; /**< Minimum latency in nano seconds */ + float avg_latency; /**< Average latency in nano seconds */ + float max_latency; /**< Maximum latency in nano seconds */ + float jitter; /** Latency variation */ +}; + +static struct rte_latency_stats *glob_stats; + +struct rxtx_cbs { + struct rte_eth_rxtx_callback *cb; +}; + +static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; +static struct rxtx_cbs tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; + +struct latency_stats_nameoff { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +static const struct latency_stats_nameoff lat_stats_strings[] = { + {"min_latency_ns", offsetof(struct rte_latency_stats, min_latency)}, + {"avg_latency_ns", offsetof(struct rte_latency_stats, avg_latency)}, + {"max_latency_ns", offsetof(struct rte_latency_stats, max_latency)}, + {"jitter_ns", offsetof(struct rte_latency_stats, jitter)}, +}; + +#define NUM_LATENCY_STATS (sizeof(lat_stats_strings) / \ + sizeof(lat_stats_strings[0])) + +int32_t +rte_latencystats_update(void) +{ + unsigned int i; + float *stats_ptr = NULL; + uint64_t values[NUM_LATENCY_STATS] = {0}; + int ret; + + for (i = 0; i < NUM_LATENCY_STATS; i++) { + stats_ptr = RTE_PTR_ADD(glob_stats, + lat_stats_strings[i].offset); + values[i] = (uint64_t)floor((*stats_ptr)/ + latencystat_cycles_per_ns()); + } + + ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, + latency_stats_index, + values, NUM_LATENCY_STATS); + if (ret < 0) + RTE_LOG(INFO, LATENCY_STATS, "Failed to push the stats\n"); + + return ret; +} + +static void +rte_latencystats_fill_values(struct rte_metric_value *values) +{ + unsigned int i; + float *stats_ptr = NULL; + + for (i = 0; i < NUM_LATENCY_STATS; i++) { + stats_ptr = RTE_PTR_ADD(glob_stats, + lat_stats_strings[i].offset); + values[i].key = i; + values[i].value = (uint64_t)floor((*stats_ptr)/ + latencystat_cycles_per_ns()); + } +} + +static uint16_t +add_time_stamps(uint8_t pid __rte_unused, + uint16_t qid __rte_unused, + struct rte_mbuf **pkts, + uint16_t nb_pkts, + uint16_t max_pkts __rte_unused, + void *user_cb __rte_unused) +{ + unsigned int i; + uint64_t diff_tsc, now; + + /* + * For every sample interval, + * time stamp is marked on one received packet. + */ + now = rte_rdtsc(); + for (i = 0; i < nb_pkts; i++) { + diff_tsc = now - prev_tsc; + timer_tsc += diff_tsc; + if (timer_tsc >= samp_intvl) { + pkts[i]->timestamp = now; + timer_tsc = 0; + } + prev_tsc = now; + now = rte_rdtsc(); + } + + return nb_pkts; +} + +static uint16_t +calc_latency(uint8_t pid __rte_unused, + uint16_t qid __rte_unused, + struct rte_mbuf **pkts, + uint16_t nb_pkts, + void *_ __rte_unused) +{ + unsigned int i, cnt = 0; + uint64_t now; + float latency[nb_pkts]; + static float prev_latency; + /* + * Alpha represents degree of weighting decrease in EWMA, + * a constant smoothing factor between 0 and 1. The value + * is used below for measuring average latency. + */ + const float alpha = 0.2; + + now = rte_rdtsc(); + for (i = 0; i < nb_pkts; i++) { + if (pkts[i]->timestamp) + latency[cnt++] = now - pkts[i]->timestamp; + } + + for (i = 0; i < cnt; i++) { + /* + * The jitter is calculated as statistical mean of interpacket + * delay variation. The "jitter estimate" is computed by taking + * the absolute values of the ipdv sequence and applying an + * exponential filter with parameter 1/16 to generate the + * estimate. i.e J=J+(|D(i-1,i)|-J)/16. Where J is jitter, + * D(i-1,i) is difference in latency of two consecutive packets + * i-1 and i. + * Reference: Calculated as per RFC 5481, sec 4.1, + * RFC 3393 sec 4.5, RFC 1889 sec. + */ + glob_stats->jitter += (fabsf(prev_latency - latency[i]) + - glob_stats->jitter)/16; + if (glob_stats->min_latency == 0) + glob_stats->min_latency = latency[i]; + else if (latency[i] < glob_stats->min_latency) + glob_stats->min_latency = latency[i]; + else if (latency[i] > glob_stats->max_latency) + glob_stats->max_latency = latency[i]; + /* + * The average latency is measured using exponential moving + * average, i.e. using EWMA + * https://en.wikipedia.org/wiki/Moving_average + */ + glob_stats->avg_latency += + alpha * (latency[i] - glob_stats->avg_latency); + prev_latency = latency[i]; + } + + return nb_pkts; +} + +int +rte_latencystats_init(uint64_t app_samp_intvl, + rte_latency_stats_flow_type_fn user_cb) +{ + unsigned int i; + uint8_t pid; + uint16_t qid; + struct rxtx_cbs *cbs = NULL; + const uint8_t nb_ports = rte_eth_dev_count(); + const char *ptr_strings[NUM_LATENCY_STATS] = {0}; + const struct rte_memzone *mz = NULL; + const unsigned int flags = 0; + + if (rte_memzone_lookup(MZ_RTE_LATENCY_STATS)) + return -EEXIST; + + /** Allocate stats in shared memory fo multi process support */ + mz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*glob_stats), + rte_socket_id(), flags); + if (mz == NULL) { + RTE_LOG(ERR, LATENCY_STATS, "Cannot reserve memory: %s:%d\n", + __func__, __LINE__); + return -ENOMEM; + } + + glob_stats = mz->addr; + samp_intvl = app_samp_intvl * latencystat_cycles_per_ns(); + + /** Register latency stats with stats library */ + for (i = 0; i < NUM_LATENCY_STATS; i++) + ptr_strings[i] = lat_stats_strings[i].name; + + latency_stats_index = rte_metrics_reg_names(ptr_strings, + NUM_LATENCY_STATS); + if (latency_stats_index < 0) { + RTE_LOG(DEBUG, LATENCY_STATS, + "Failed to register latency stats names\n"); + return -1; + } + + /** Register Rx/Tx callbacks */ + for (pid = 0; pid < nb_ports; pid++) { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get(pid, &dev_info); + for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { + cbs = &rx_cbs[pid][qid]; + cbs->cb = rte_eth_add_first_rx_callback(pid, qid, + add_time_stamps, user_cb); + if (!cbs->cb) + RTE_LOG(INFO, LATENCY_STATS, "Failed to " + "register Rx callback for pid=%d, " + "qid=%d\n", pid, qid); + } + for (qid = 0; qid < dev_info.nb_tx_queues; qid++) { + cbs = &tx_cbs[pid][qid]; + cbs->cb = rte_eth_add_tx_callback(pid, qid, + calc_latency, user_cb); + if (!cbs->cb) + RTE_LOG(INFO, LATENCY_STATS, "Failed to " + "register Tx callback for pid=%d, " + "qid=%d\n", pid, qid); + } + } + return 0; +} + +int +rte_latencystats_uninit(void) +{ + uint8_t pid; + uint16_t qid; + int ret = 0; + struct rxtx_cbs *cbs = NULL; + const uint8_t nb_ports = rte_eth_dev_count(); + + /** De register Rx/Tx callbacks */ + for (pid = 0; pid < nb_ports; pid++) { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get(pid, &dev_info); + for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { + cbs = &rx_cbs[pid][qid]; + ret = rte_eth_remove_rx_callback(pid, qid, cbs->cb); + if (ret) + RTE_LOG(INFO, LATENCY_STATS, "failed to " + "remove Rx callback for pid=%d, " + "qid=%d\n", pid, qid); + } + for (qid = 0; qid < dev_info.nb_tx_queues; qid++) { + cbs = &tx_cbs[pid][qid]; + ret = rte_eth_remove_tx_callback(pid, qid, cbs->cb); + if (ret) + RTE_LOG(INFO, LATENCY_STATS, "failed to " + "remove Tx callback for pid=%d, " + "qid=%d\n", pid, qid); + } + } + + return 0; +} + +int +rte_latencystats_get_names(struct rte_metric_name *names, uint16_t size) +{ + unsigned int i; + + if (names == NULL || size < NUM_LATENCY_STATS) + return NUM_LATENCY_STATS; + + for (i = 0; i < NUM_LATENCY_STATS; i++) + snprintf(names[i].name, sizeof(names[i].name), + "%s", lat_stats_strings[i].name); + + return NUM_LATENCY_STATS; +} + +int +rte_latencystats_get(struct rte_metric_value *values, uint16_t size) +{ + if (size < NUM_LATENCY_STATS || values == NULL) + return NUM_LATENCY_STATS; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + const struct rte_memzone *mz; + mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS); + if (mz == NULL) { + RTE_LOG(ERR, LATENCY_STATS, + "Latency stats memzone not found\n"); + return -ENOMEM; + } + glob_stats = mz->addr; + } + + /* Retrieve latency stats */ + rte_latencystats_fill_values(values); + + return NUM_LATENCY_STATS; +} diff --git a/lib/librte_latencystats/rte_latencystats.h b/lib/librte_latencystats/rte_latencystats.h new file mode 100644 index 00000000..d85cf3a5 --- /dev/null +++ b/lib/librte_latencystats/rte_latencystats.h @@ -0,0 +1,156 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LATENCYSTATS_H_ +#define _RTE_LATENCYSTATS_H_ + +/** + * @file + * RTE latency stats + * + * library to provide application and flow based latency stats. + */ + +#include <stdint.h> +#include <rte_metrics.h> +#include <rte_mbuf.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Note: This function pointer is for future flow based latency stats + * implementation. + * + * Function type used for identifting flow types of a Rx packet. + * + * The callback function is called on Rx for each packet. + * This function is used for flow based latency calculations. + * + * @param pkt + * Packet that has to be identified with its flow types. + * @param user_param + * The arbitrary user parameter passed in by the application when + * the callback was originally configured. + * @return + * The flow_mask, representing the multiple flow types of a packet. + */ +typedef uint16_t (*rte_latency_stats_flow_type_fn)(struct rte_mbuf *pkt, + void *user_param); + +/** + * Registers Rx/Tx callbacks for each active port, queue. + * + * @param samp_intvl + * Sampling time period in nano seconds, at which packet + * should be marked with time stamp. + * @param user_cb + * Note: This param is for future flow based latency stats + * implementation. + * User callback to be called to get flow types of a packet. + * Used for flow based latency calculation. + * If the value is NULL, global stats will be calculated, + * else flow based latency stats will be calculated. + * For now just pass on the NULL value to this param. + * @return + * -1 : On error + * -ENOMEM: On error + * 0 : On success + */ +int rte_latencystats_init(uint64_t samp_intvl, + rte_latency_stats_flow_type_fn user_cb); + +/** + * Calculates the latency and jitter values internally, exposing the updated + * values via *rte_latencystats_get* or the rte_metrics API. + * @return: + * 0 : on Success + * < 0 : Error in updating values. + */ +int32_t rte_latencystats_update(void); + +/** + * Removes registered Rx/Tx callbacks for each active port, queue. + * + * @return + * -1: On error + * 0: On success + */ +int rte_latencystats_uninit(void); + +/** + * Retrieve names of latency statistics + * + * @param names + * Block of memory to insert names into. Must be at least size in capacity. + * If set to NULL, function returns required capacity. + * @param size + * Capacity of latency stats names (number of names). + * @return + * - positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + */ +int rte_latencystats_get_names(struct rte_metric_name *names, + uint16_t size); + +/** + * Retrieve latency statistics. + * + * @param values + * A pointer to a table of structure of type *rte_metric_value* + * to be filled with latency statistics ids and values. + * This parameter can be set to NULL if size is 0. + * @param size + * The size of the stats table, which should be large enough to store + * all the latency stats. + * @return + * - positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * -ENOMEM: On failure. + */ +int rte_latencystats_get(struct rte_metric_value *values, + uint16_t size); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LATENCYSTATS_H_ */ diff --git a/lib/librte_latencystats/rte_latencystats_version.map b/lib/librte_latencystats/rte_latencystats_version.map new file mode 100644 index 00000000..ac8403e8 --- /dev/null +++ b/lib/librte_latencystats/rte_latencystats_version.map @@ -0,0 +1,11 @@ +DPDK_17.05 { + global: + + rte_latencystats_get; + rte_latencystats_get_names; + rte_latencystats_init; + rte_latencystats_uninit; + rte_latencystats_update; + + local: *; +}; diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile index 3dc549dc..32be46b3 100644 --- a/lib/librte_lpm/Makefile +++ b/lib/librte_lpm/Makefile @@ -55,7 +55,4 @@ else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y) SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_altivec.h endif -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_LPM) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c index 32fdba01..9cc7be77 100644 --- a/lib/librte_lpm/rte_lpm6.c +++ b/lib/librte_lpm/rte_lpm6.c @@ -97,7 +97,7 @@ struct rte_lpm6_tbl_entry { /** Rules tbl entry structure. */ struct rte_lpm6_rule { uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */ - uint8_t next_hop; /**< Rule next hop. */ + uint32_t next_hop; /**< Rule next hop. */ uint8_t depth; /**< Rule depth. */ }; @@ -297,7 +297,7 @@ rte_lpm6_free(struct rte_lpm6 *lpm) * the nexthop if so. Otherwise it adds a new rule if enough space is available. */ static inline int32_t -rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth) +rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth) { uint32_t rule_index; @@ -340,7 +340,7 @@ rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth) */ static void expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth, - uint8_t next_hop) + uint32_t next_hop) { uint32_t tbl8_group_end, tbl8_gindex_next, j; @@ -377,7 +377,7 @@ expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth, static inline int add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes, - uint8_t first_byte, uint8_t depth, uint8_t next_hop) + uint8_t first_byte, uint8_t depth, uint32_t next_hop) { uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i; int32_t tbl8_gindex; @@ -507,9 +507,17 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, * Add a route */ int -rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, +rte_lpm6_add_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint8_t next_hop) { + return rte_lpm6_add_v1705(lpm, ip, depth, next_hop); +} +VERSION_SYMBOL(rte_lpm6_add, _v20, 2.0); + +int +rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint32_t next_hop) +{ struct rte_lpm6_tbl_entry *tbl; struct rte_lpm6_tbl_entry *tbl_next; int32_t rule_index; @@ -560,6 +568,10 @@ rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, return status; } +BIND_DEFAULT_SYMBOL(rte_lpm6_add, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, + uint8_t depth, uint32_t next_hop), + rte_lpm6_add_v1705); /* * Takes a pointer to a table entry and inspect one level. @@ -569,7 +581,7 @@ rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, static inline int lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl, const struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, - uint8_t first_byte, uint8_t *next_hop) + uint8_t first_byte, uint32_t *next_hop) { uint32_t tbl8_index, tbl_entry; @@ -589,7 +601,7 @@ lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl, return 1; } else { /* If not extended then we can have a match. */ - *next_hop = (uint8_t)tbl_entry; + *next_hop = ((uint32_t)tbl_entry & RTE_LPM6_TBL8_BITMASK); return (tbl_entry & RTE_LPM6_LOOKUP_SUCCESS) ? 0 : -ENOENT; } } @@ -598,7 +610,26 @@ lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl, * Looks up an IP */ int -rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop) +rte_lpm6_lookup_v20(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop) +{ + uint32_t next_hop32 = 0; + int32_t status; + + /* DEBUG: Check user input arguments. */ + if (next_hop == NULL) + return -EINVAL; + + status = rte_lpm6_lookup_v1705(lpm, ip, &next_hop32); + if (status == 0) + *next_hop = (uint8_t)next_hop32; + + return status; +} +VERSION_SYMBOL(rte_lpm6_lookup, _v20, 2.0); + +int +rte_lpm6_lookup_v1705(const struct rte_lpm6 *lpm, uint8_t *ip, + uint32_t *next_hop) { const struct rte_lpm6_tbl_entry *tbl; const struct rte_lpm6_tbl_entry *tbl_next = NULL; @@ -625,20 +656,23 @@ rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop) return status; } +BIND_DEFAULT_SYMBOL(rte_lpm6_lookup, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, + uint32_t *next_hop), rte_lpm6_lookup_v1705); /* * Looks up a group of IP addresses */ int -rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, +rte_lpm6_lookup_bulk_func_v20(const struct rte_lpm6 *lpm, uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], int16_t * next_hops, unsigned n) { unsigned i; const struct rte_lpm6_tbl_entry *tbl; const struct rte_lpm6_tbl_entry *tbl_next = NULL; - uint32_t tbl24_index; - uint8_t first_byte, next_hop; + uint32_t tbl24_index, next_hop; + uint8_t first_byte; int status; /* DEBUG: Check user input arguments. */ @@ -664,11 +698,59 @@ rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, if (status < 0) next_hops[i] = -1; else - next_hops[i] = next_hop; + next_hops[i] = (int16_t)next_hop; + } + + return 0; +} +VERSION_SYMBOL(rte_lpm6_lookup_bulk_func, _v20, 2.0); + +int +rte_lpm6_lookup_bulk_func_v1705(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int32_t *next_hops, unsigned int n) +{ + unsigned int i; + const struct rte_lpm6_tbl_entry *tbl; + const struct rte_lpm6_tbl_entry *tbl_next = NULL; + uint32_t tbl24_index, next_hop; + uint8_t first_byte; + int status; + + /* DEBUG: Check user input arguments. */ + if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL)) + return -EINVAL; + + for (i = 0; i < n; i++) { + first_byte = LOOKUP_FIRST_BYTE; + tbl24_index = (ips[i][0] << BYTES2_SIZE) | + (ips[i][1] << BYTE_SIZE) | ips[i][2]; + + /* Calculate pointer to the first entry to be inspected */ + tbl = &lpm->tbl24[tbl24_index]; + + do { + /* Continue inspecting following levels + * until success or failure + */ + status = lookup_step(lpm, tbl, &tbl_next, ips[i], + first_byte++, &next_hop); + tbl = tbl_next; + } while (status == 1); + + if (status < 0) + next_hops[i] = -1; + else + next_hops[i] = (int32_t)next_hop; } return 0; } +BIND_DEFAULT_SYMBOL(rte_lpm6_lookup_bulk_func, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int32_t *next_hops, unsigned int n), + rte_lpm6_lookup_bulk_func_v1705); /* * Finds a rule in rule table. @@ -698,8 +780,28 @@ rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth) * Look for a rule in the high-level rules table */ int -rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, -uint8_t *next_hop) +rte_lpm6_is_rule_present_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint8_t *next_hop) +{ + uint32_t next_hop32 = 0; + int32_t status; + + /* DEBUG: Check user input arguments. */ + if (next_hop == NULL) + return -EINVAL; + + status = rte_lpm6_is_rule_present_v1705(lpm, ip, depth, &next_hop32); + if (status > 0) + *next_hop = (uint8_t)next_hop32; + + return status; + +} +VERSION_SYMBOL(rte_lpm6_is_rule_present, _v20, 2.0); + +int +rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint32_t *next_hop) { uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; int32_t rule_index; @@ -724,6 +826,10 @@ uint8_t *next_hop) /* If rule is not found return 0. */ return 0; } +BIND_DEFAULT_SYMBOL(rte_lpm6_is_rule_present, _v1705, 17.05); +MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, + uint8_t *ip, uint8_t depth, uint32_t *next_hop), + rte_lpm6_is_rule_present_v1705); /* * Delete a rule from the rule table. diff --git a/lib/librte_lpm/rte_lpm6.h b/lib/librte_lpm/rte_lpm6.h index 13d027f9..3a3342da 100644 --- a/lib/librte_lpm/rte_lpm6.h +++ b/lib/librte_lpm/rte_lpm6.h @@ -39,6 +39,7 @@ */ #include <stdint.h> +#include <rte_compat.h> #ifdef __cplusplus extern "C" { @@ -123,7 +124,13 @@ rte_lpm6_free(struct rte_lpm6 *lpm); */ int rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint32_t next_hop); +int +rte_lpm6_add_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint8_t next_hop); +int +rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint32_t next_hop); /** * Check if a rule is present in the LPM table, @@ -142,7 +149,13 @@ rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, */ int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, -uint8_t *next_hop); + uint32_t *next_hop); +int +rte_lpm6_is_rule_present_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint8_t *next_hop); +int +rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint32_t *next_hop); /** * Delete a rule from the LPM table. @@ -199,7 +212,12 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm); * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit */ int -rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop); +rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint32_t *next_hop); +int +rte_lpm6_lookup_v20(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop); +int +rte_lpm6_lookup_v1705(const struct rte_lpm6 *lpm, uint8_t *ip, + uint32_t *next_hop); /** * Lookup multiple IP addresses in an LPM table. @@ -220,7 +238,15 @@ rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop); int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], - int16_t * next_hops, unsigned n); + int32_t *next_hops, unsigned int n); +int +rte_lpm6_lookup_bulk_func_v20(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int16_t *next_hops, unsigned int n); +int +rte_lpm6_lookup_bulk_func_v1705(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int32_t *next_hops, unsigned int n); #ifdef __cplusplus } diff --git a/lib/librte_lpm/rte_lpm_version.map b/lib/librte_lpm/rte_lpm_version.map index 239b371e..90beac85 100644 --- a/lib/librte_lpm/rte_lpm_version.map +++ b/lib/librte_lpm/rte_lpm_version.map @@ -34,3 +34,13 @@ DPDK_16.04 { rte_lpm_delete_all; } DPDK_2.0; + +DPDK_17.05 { + global: + + rte_lpm6_add; + rte_lpm6_is_rule_present; + rte_lpm6_lookup; + rte_lpm6_lookup_bulk_func; + +} DPDK_16.04; diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index 4ae2e8c8..54827305 100644 --- a/lib/librte_mbuf/Makefile +++ b/lib/librte_mbuf/Makefile @@ -38,7 +38,7 @@ CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 EXPORT_MAP := rte_mbuf_version.map -LIBABIVER := 2 +LIBABIVER := 3 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c @@ -46,7 +46,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_MBUF) += lib/librte_eal lib/librte_mempool - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c index 63f43c89..0e3e36a5 100644 --- a/lib/librte_mbuf/rte_mbuf.c +++ b/lib/librte_mbuf/rte_mbuf.c @@ -62,7 +62,7 @@ /* * ctrlmbuf constructor, given as a callback function to - * rte_mempool_create() + * rte_mempool_obj_iter() or rte_mempool_create() */ void rte_ctrlmbuf_init(struct rte_mempool *mp, @@ -77,7 +77,8 @@ rte_ctrlmbuf_init(struct rte_mempool *mp, /* * pktmbuf pool constructor, given as a callback function to - * rte_mempool_create() + * rte_mempool_create(), or called directly if using + * rte_mempool_create_empty()/rte_mempool_populate() */ void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg) @@ -110,7 +111,7 @@ rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg) /* * pktmbuf constructor, given as a callback function to - * rte_mempool_create(). + * rte_mempool_obj_iter() or rte_mempool_create(). * Set the fields of a packet mbuf to their default values. */ void @@ -145,6 +146,8 @@ rte_pktmbuf_init(struct rte_mempool *mp, m->pool = mp; m->nb_segs = 1; m->port = 0xff; + rte_mbuf_refcnt_set(m, 1); + m->next = NULL; } /* helper to create a mbuf pool */ @@ -320,6 +323,7 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask) case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST"; case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED"; case PKT_RX_LRO: return "PKT_RX_LRO"; + case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP"; default: return NULL; } } @@ -354,6 +358,7 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL }, { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL }, { PKT_RX_LRO, PKT_RX_LRO, NULL }, + { PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL }, }; const char *name; unsigned int i; @@ -404,6 +409,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE"; case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; + case PKT_TX_MACSEC: return "PKT_TX_MACSEC"; default: return NULL; } } @@ -434,6 +440,7 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) "PKT_TX_TUNNEL_NONE" }, { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL }, }; const char *name; unsigned int i; diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ead7c6ea..1cb03109 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -44,6 +44,13 @@ * buffers. The message buffers are stored in a mempool, using the * RTE mempool library. * + * The preferred way to create a mbuf pool is to use + * rte_pktmbuf_pool_create(). However, in some situations, an + * application may want to have more control (ex: populate the pool with + * specific memory), in this case it is possible to use functions from + * rte_mempool. See how rte_pktmbuf_pool_create() is implemented for + * details. + * * This library provides an API to allocate/free packet mbufs, which are * used to carry network packets. * @@ -177,11 +184,22 @@ extern "C" { */ #define PKT_RX_LRO (1ULL << 16) +/** + * Indicate that the timestamp field in the mbuf is valid. + */ +#define PKT_RX_TIMESTAMP (1ULL << 17) + /* add new RX flags here */ /* add new TX flags here */ /** + * Offload the MACsec. This flag must be set by the application to enable + * this offload feature for a packet to be transmitted. + */ +#define PKT_TX_MACSEC (1ULL << 44) + +/** * Bits 45:48 used for the tunnel type. * When doing Tx offload like TSO or checksum, the HW needs to configure the * tunnel type into the HW descriptors. @@ -283,6 +301,21 @@ extern "C" { */ #define PKT_TX_OUTER_IPV6 (1ULL << 60) +/** + * Bitmask of all supported packet Tx offload features flags, + * which can be set for packet. + */ +#define PKT_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG | \ + PKT_TX_IEEE1588_TMST | \ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT | \ + PKT_TX_TUNNEL_MASK | \ + PKT_TX_MACSEC) + #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ @@ -370,16 +403,21 @@ struct rte_mbuf { MARKER cacheline0; void *buf_addr; /**< Virtual address of segment buffer. */ - phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */ - - uint16_t buf_len; /**< Length of segment buffer. */ + /** + * Physical address of segment buffer. + * Force alignment to 8-bytes, so as to ensure we have the exact + * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes + * working on vector drivers easier. + */ + phys_addr_t buf_physaddr __rte_aligned(sizeof(phys_addr_t)); - /* next 6 bytes are initialised on RX descriptor rearm */ - MARKER8 rearm_data; + /* next 8 bytes are initialised on RX descriptor rearm */ + MARKER64 rearm_data; uint16_t data_off; /** - * 16-bit Reference counter. + * Reference counter. Its size should at least equal to the size + * of port field (16 bits), to support zero-copy broadcast. * It should only be accessed using the following functions: * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and * rte_mbuf_refcnt_set(). The functionality of these functions (atomic, @@ -391,8 +429,10 @@ struct rte_mbuf { rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ uint16_t refcnt; /**< Non-atomically accessed refcnt */ }; - uint8_t nb_segs; /**< Number of segments. */ - uint8_t port; /**< Input port. */ + uint16_t nb_segs; /**< Number of segments. */ + + /** Input port (16 bits to support more than 256 virtual ports). */ + uint16_t port; uint64_t ol_flags; /**< Offload features. */ @@ -448,11 +488,16 @@ struct rte_mbuf { uint32_t usr; /**< User defined tags. See rte_distributor_process() */ } hash; /**< hash information */ - uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */ - /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */ uint16_t vlan_tci_outer; + uint16_t buf_len; /**< Length of segment buffer. */ + + /** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference + * are not normalized but are always the same for a given port. + */ + uint64_t timestamp; + /* second cache line - fields only used in slow path or on TX */ MARKER cacheline1 __rte_cache_min_aligned; @@ -493,6 +538,10 @@ struct rte_mbuf { /** Timesync flags for use with IEEE1588. */ uint16_t timesync; + + /** Sequence number. See also rte_reorder_insert(). */ + uint32_t seqn; + } __rte_cache_aligned; /** @@ -739,6 +788,13 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) void rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header); +#define MBUF_RAW_ALLOC_CHECK(m) do { \ + RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1); \ + RTE_ASSERT((m)->next == NULL); \ + RTE_ASSERT((m)->nb_segs == 1); \ + __rte_mbuf_sanity_check(m, 0); \ +} while (0) + /** * Allocate an unitialized mbuf from mempool *mp*. * @@ -747,6 +803,11 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header); * initializing all the required fields. See rte_pktmbuf_reset(). * For standard needs, prefer rte_pktmbuf_alloc(). * + * The caller can expect that the following fields of the mbuf structure + * are initialized: buf_addr, buf_physaddr, buf_len, refcnt=1, nb_segs=1, + * next=NULL, pool, priv_size. The other fields must be initialized + * by the caller. + * * @param mp * The mempool from which mbuf is allocated. * @return @@ -761,28 +822,43 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) if (rte_mempool_get(mp, &mb) < 0) return NULL; m = (struct rte_mbuf *)mb; - RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0); - rte_mbuf_refcnt_set(m, 1); - __rte_mbuf_sanity_check(m, 0); - + MBUF_RAW_ALLOC_CHECK(m); return m; } /** - * @internal Put mbuf back into its original mempool. - * The use of that function is reserved for RTE internal needs. - * Please use rte_pktmbuf_free(). + * Put mbuf back into its original mempool. + * + * The caller must ensure that the mbuf is direct and properly + * reinitialized (refcnt=1, next=NULL, nb_segs=1), as done by + * rte_pktmbuf_prefree_seg(). + * + * This function should be used with care, when optimization is + * required. For standard needs, prefer rte_pktmbuf_free() or + * rte_pktmbuf_free_seg(). * * @param m * The mbuf to be freed. */ static inline void __attribute__((always_inline)) -__rte_mbuf_raw_free(struct rte_mbuf *m) +rte_mbuf_raw_free(struct rte_mbuf *m) { - RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0); + RTE_ASSERT(RTE_MBUF_DIRECT(m)); + RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1); + RTE_ASSERT(m->next == NULL); + RTE_ASSERT(m->nb_segs == 1); + __rte_mbuf_sanity_check(m, 0); rte_mempool_put(m->pool, m); } +/* compat with older versions */ +__rte_deprecated +static inline void +__rte_mbuf_raw_free(struct rte_mbuf *m) +{ + rte_mbuf_raw_free(m); +} + /* Operations on ctrl mbuf */ /** @@ -791,14 +867,14 @@ __rte_mbuf_raw_free(struct rte_mbuf *m) * This function initializes some fields in an mbuf structure that are * not modified by the user once created (mbuf type, origin pool, buffer * start address, and so on). This function is given as a callback function - * to rte_mempool_create() at pool creation time. + * to rte_mempool_obj_iter() or rte_mempool_create() at pool creation time. * * @param mp * The mempool from which the mbuf is allocated. * @param opaque_arg * A pointer that can be used by the user to retrieve useful information - * for mbuf initialization. This pointer comes from the ``init_arg`` - * parameter of rte_mempool_create(). + * for mbuf initialization. This pointer is the opaque argument passed to + * rte_mempool_obj_iter() or rte_mempool_create(). * @param m * The mbuf to initialize. * @param i @@ -872,14 +948,14 @@ rte_is_ctrlmbuf(struct rte_mbuf *m) * This function initializes some fields in the mbuf structure that are * not modified by the user once created (origin pool, buffer start * address, and so on). This function is given as a callback function to - * rte_mempool_create() at pool creation time. + * rte_mempool_obj_iter() or rte_mempool_create() at pool creation time. * * @param mp * The mempool from which mbufs originate. * @param opaque_arg * A pointer that can be used by the user to retrieve useful information - * for mbuf initialization. This pointer comes from the ``init_arg`` - * parameter of rte_mempool_create(). + * for mbuf initialization. This pointer is the opaque argument passed to + * rte_mempool_obj_iter() or rte_mempool_create(). * @param m * The mbuf to initialize. * @param i @@ -894,7 +970,8 @@ void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, * * This function initializes the mempool private data in the case of a * pktmbuf pool. This private data is needed by the driver. The - * function is given as a callback function to rte_mempool_create() at + * function must be called on the mempool before it is used, or it + * can be given as a callback function to rte_mempool_create() at * pool creation. It can be extended by the user, for example, to * provide another packet size. * @@ -902,8 +979,8 @@ void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, * The mempool from which mbufs originate. * @param opaque_arg * A pointer that can be used by the user to retrieve useful information - * for mbuf initialization. This pointer comes from the ``init_arg`` - * parameter of rte_mempool_create(). + * for mbuf initialization. This pointer is the opaque argument passed to + * rte_mempool_create(). */ void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg); @@ -911,8 +988,7 @@ void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg); * Create a mbuf pool. * * This function creates and initializes a packet mbuf pool. It is - * a wrapper to rte_mempool_create() with the proper packet constructor - * and mempool constructor. + * a wrapper to rte_mempool functions. * * @param name * The name of the mbuf pool. @@ -1079,25 +1155,25 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool, switch (count % 4) { case 0: while (idx != count) { - RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); - rte_mbuf_refcnt_set(mbufs[idx], 1); + MBUF_RAW_ALLOC_CHECK(mbufs[idx]); rte_pktmbuf_reset(mbufs[idx]); idx++; + /* fall-through */ case 3: - RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); - rte_mbuf_refcnt_set(mbufs[idx], 1); + MBUF_RAW_ALLOC_CHECK(mbufs[idx]); rte_pktmbuf_reset(mbufs[idx]); idx++; + /* fall-through */ case 2: - RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); - rte_mbuf_refcnt_set(mbufs[idx], 1); + MBUF_RAW_ALLOC_CHECK(mbufs[idx]); rte_pktmbuf_reset(mbufs[idx]); idx++; + /* fall-through */ case 1: - RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); - rte_mbuf_refcnt_set(mbufs[idx], 1); + MBUF_RAW_ALLOC_CHECK(mbufs[idx]); rte_pktmbuf_reset(mbufs[idx]); idx++; + /* fall-through */ } } return 0; @@ -1139,7 +1215,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) mi->buf_addr = m->buf_addr; mi->buf_len = m->buf_len; - mi->next = m->next; mi->data_off = m->data_off; mi->data_len = m->data_len; mi->port = m->port; @@ -1153,6 +1228,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) mi->nb_segs = 1; mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF; mi->packet_type = m->packet_type; + mi->timestamp = m->timestamp; __rte_mbuf_sanity_check(mi, 1); __rte_mbuf_sanity_check(m, 0); @@ -1189,24 +1265,71 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) m->data_len = 0; m->ol_flags = 0; - if (rte_mbuf_refcnt_update(md, -1) == 0) - __rte_mbuf_raw_free(md); + if (rte_mbuf_refcnt_update(md, -1) == 0) { + md->next = NULL; + md->nb_segs = 1; + rte_mbuf_refcnt_set(md, 1); + rte_mbuf_raw_free(md); + } } -static inline struct rte_mbuf* __attribute__((always_inline)) -__rte_pktmbuf_prefree_seg(struct rte_mbuf *m) +/** + * Decrease reference counter and unlink a mbuf segment + * + * This function does the same than a free, except that it does not + * return the segment to its pool. + * It decreases the reference counter, and if it reaches 0, it is + * detached from its parent for an indirect mbuf. + * + * @param m + * The mbuf to be unlinked + * @return + * - (m) if it is the last reference. It can be recycled or freed. + * - (NULL) if the mbuf still has remaining references on it. + */ +__attribute__((always_inline)) +static inline struct rte_mbuf * +rte_pktmbuf_prefree_seg(struct rte_mbuf *m) { __rte_mbuf_sanity_check(m, 0); - if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) { - /* if this is an indirect mbuf, it is detached. */ + if (likely(rte_mbuf_refcnt_read(m) == 1)) { + if (RTE_MBUF_INDIRECT(m)) rte_pktmbuf_detach(m); + + if (m->next != NULL) { + m->next = NULL; + m->nb_segs = 1; + } + + return m; + + } else if (rte_atomic16_add_return(&m->refcnt_atomic, -1) == 0) { + + + if (RTE_MBUF_INDIRECT(m)) + rte_pktmbuf_detach(m); + + if (m->next != NULL) { + m->next = NULL; + m->nb_segs = 1; + } + rte_mbuf_refcnt_set(m, 1); + return m; } return NULL; } +/* deprecated, replaced by rte_pktmbuf_prefree_seg() */ +__rte_deprecated +static inline struct rte_mbuf * +__rte_pktmbuf_prefree_seg(struct rte_mbuf *m) +{ + return rte_pktmbuf_prefree_seg(m); +} + /** * Free a segment of a packet mbuf into its original mempool. * @@ -1219,10 +1342,9 @@ __rte_pktmbuf_prefree_seg(struct rte_mbuf *m) static inline void __attribute__((always_inline)) rte_pktmbuf_free_seg(struct rte_mbuf *m) { - if (likely(NULL != (m = __rte_pktmbuf_prefree_seg(m)))) { - m->next = NULL; - __rte_mbuf_raw_free(m); - } + m = rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) + rte_mbuf_raw_free(m); } /** @@ -1647,6 +1769,108 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** + * Validate general requirements for Tx offload in mbuf. + * + * This function checks correctness and completeness of Tx offload settings. + * + * @param m + * The packet mbuf to be validated. + * @return + * 0 if packet is valid + */ +static inline int +rte_validate_tx_offload(const struct rte_mbuf *m) +{ + uint64_t ol_flags = m->ol_flags; + uint64_t inner_l3_offset = m->l2_len; + + /* Does packet set any of available offloads? */ + if (!(ol_flags & PKT_TX_OFFLOAD_MASK)) + return 0; + + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + + /* Headers are fragmented */ + if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) + return -ENOTSUP; + + /* IP checksum can be counted only for IPv4 packet */ + if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6)) + return -EINVAL; + + /* IP type not set when required */ + if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) + if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6))) + return -EINVAL; + + /* Check requirements for TSO packet */ + if (ol_flags & PKT_TX_TCP_SEG) + if ((m->tso_segsz == 0) || + ((ol_flags & PKT_TX_IPV4) && + !(ol_flags & PKT_TX_IP_CKSUM))) + return -EINVAL; + + /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */ + if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && + !(ol_flags & PKT_TX_OUTER_IPV4)) + return -EINVAL; + + return 0; +} + +/** + * Linearize data in mbuf. + * + * This function moves the mbuf data in the first segment if there is enough + * tailroom. The subsequent segments are unchained and freed. + * + * @param mbuf + * mbuf to linearize + * @return + * - 0, on success + * - -1, on error + */ +static inline int +rte_pktmbuf_linearize(struct rte_mbuf *mbuf) +{ + int seg_len, copy_len; + struct rte_mbuf *m; + struct rte_mbuf *m_next; + char *buffer; + + if (rte_pktmbuf_is_contiguous(mbuf)) + return 0; + + /* Extend first segment to the total packet length */ + copy_len = rte_pktmbuf_pkt_len(mbuf) - rte_pktmbuf_data_len(mbuf); + + if (unlikely(copy_len > rte_pktmbuf_tailroom(mbuf))) + return -1; + + buffer = rte_pktmbuf_mtod_offset(mbuf, char *, mbuf->data_len); + mbuf->data_len = (uint16_t)(mbuf->pkt_len); + + /* Append data from next segments to the first one */ + m = mbuf->next; + while (m != NULL) { + m_next = m->next; + + seg_len = rte_pktmbuf_data_len(m); + rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), seg_len); + buffer += seg_len; + + rte_pktmbuf_free_seg(m); + m = m_next; + } + + mbuf->next = NULL; + mbuf->nb_segs = 1; + + return 0; +} + +/** * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h index ff6de9d1..a3269c4c 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.h +++ b/lib/librte_mbuf/rte_mbuf_ptype.h @@ -91,6 +91,9 @@ * RTE_PTYPE_INNER_L4_UDP. */ +#include <stddef.h> +#include <stdint.h> + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile index 057a6ab4..7b5bdfee 100644 --- a/lib/librte_mempool/Makefile +++ b/lib/librte_mempool/Makefile @@ -43,11 +43,7 @@ LIBABIVER := 2 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops.c -SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ring.c -SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_stack.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h -DEPDIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += lib/librte_eal lib/librte_ring - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index aa513b97..f65310f6 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -818,7 +818,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size, goto exit_unlock; } mp->mz = mz; - mp->socket_id = socket_id; mp->size = n; mp->flags = flags; mp->socket_id = socket_id; @@ -869,6 +868,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) { + int ret; struct rte_mempool *mp; mp = rte_mempool_create_empty(name, n, elt_size, cache_size, @@ -881,13 +881,16 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, * set the correct index into the table of ops structs. */ if ((flags & MEMPOOL_F_SP_PUT) && (flags & MEMPOOL_F_SC_GET)) - rte_mempool_set_ops_byname(mp, "ring_sp_sc", NULL); + ret = rte_mempool_set_ops_byname(mp, "ring_sp_sc", NULL); else if (flags & MEMPOOL_F_SP_PUT) - rte_mempool_set_ops_byname(mp, "ring_sp_mc", NULL); + ret = rte_mempool_set_ops_byname(mp, "ring_sp_mc", NULL); else if (flags & MEMPOOL_F_SC_GET) - rte_mempool_set_ops_byname(mp, "ring_mp_sc", NULL); + ret = rte_mempool_set_ops_byname(mp, "ring_mp_sc", NULL); else - rte_mempool_set_ops_byname(mp, "ring_mp_mc", NULL); + ret = rte_mempool_set_ops_byname(mp, "ring_mp_mc", NULL); + + if (ret) + goto fail; /* call the mempool priv initializer */ if (mp_init) @@ -998,12 +1001,6 @@ rte_mempool_in_use_count(const struct rte_mempool *mp) return mp->size - rte_mempool_avail_count(mp); } -unsigned int -rte_mempool_count(const struct rte_mempool *mp) -{ - return rte_mempool_avail_count(mp); -} - /* dump the cache status */ static unsigned rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) @@ -1047,7 +1044,7 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp, /* Force to drop the "const" attribute. This is done only when * DEBUG is enabled */ tmp = (void *) obj_table_const; - obj_table = (void **) tmp; + obj_table = tmp; while (n--) { obj = obj_table[n]; diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 956ce04b..48bc8ea3 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -51,13 +51,15 @@ * meta-data in the object data and retrieve them when allocating a * new object. * - * Note: the mempool implementation is not preemptable. A lcore must - * not be interrupted by another task that uses the same mempool - * (because it uses a ring which is not preemptable). Also, mempool - * functions must not be used outside the DPDK environment: for - * example, in linuxapp environment, a thread that is not created by - * the EAL must not use mempools. This is due to the per-lcore cache - * that won't work as rte_lcore_id() will not return a correct value. + * Note: the mempool implementation is not preemptible. An lcore must not be + * interrupted by another task that uses the same mempool (because it uses a + * ring which is not preemptible). Also, usual mempool functions like + * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL + * thread due to the internal per-lcore cache. Due to the lack of caching, + * rte_mempool_get() or rte_mempool_put() performance will suffer when called + * by non-EAL threads. Instead, non-EAL threads should call + * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache + * created with rte_mempool_cache_create(). */ #include <stdio.h> @@ -357,7 +359,7 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp, * Prototype for implementation specific data provisioning function. * * The function should provide the implementation specific memory for - * for use by the other mempool ops functions in a given mempool ops struct. + * use by the other mempool ops functions in a given mempool ops struct. * E.g. the default ops provides an instance of the rte_ring for this purpose. * it will most likely point to a different type of data structure, and * will be transparent to the application programmer. @@ -551,7 +553,7 @@ int rte_mempool_register_ops(const struct rte_mempool_ops *ops); /** * Macro to statically register the ops of a mempool handler. * Note that the rte_mempool_register_ops fails silently here when - * more then RTE_MEMPOOL_MAX_OPS_IDX is registered. + * more than RTE_MEMPOOL_MAX_OPS_IDX is registered. */ #define MEMPOOL_REGISTER_OPS(ops) \ void mp_hdlr_init_##ops(void); \ @@ -654,7 +656,7 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); * when using rte_mempool_get() or rte_mempool_get_bulk() is * "single-consumer". Otherwise, it is "multi-consumers". * - MEMPOOL_F_NO_PHYS_CONTIG: If set, allocated objects won't - * necessarilly be contiguous in physical memory. + * necessarily be contiguous in physical memory. * @return * The pointer to the new allocated mempool, on success. NULL on error * with rte_errno set appropriately. Possible rte_errno values include: @@ -794,7 +796,7 @@ rte_mempool_free(struct rte_mempool *mp); * Add physically contiguous memory for objects in the pool at init * * Add a virtually and physically contiguous memory chunk in the pool - * where objects can be instanciated. + * where objects can be instantiated. * * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR), * the chunk doesn't need to be physically contiguous (only virtually), @@ -825,7 +827,7 @@ int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, * Add physical memory for objects in the pool at init * * Add a virtually contiguous memory chunk in the pool where objects can - * be instanciated. The physical addresses corresponding to the virtual + * be instantiated. The physical addresses corresponding to the virtual * area are described in paddr[], pg_num, pg_shift. * * @param mp @@ -856,7 +858,7 @@ int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, * Add virtually contiguous memory for objects in the pool at init * * Add a virtually contiguous memory chunk in the pool where objects can - * be instanciated. + * be instantiated. * * @param mp * A pointer to the mempool structure. @@ -1038,19 +1040,15 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) */ static inline void __attribute__((always_inline)) __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache) { void **cache_objs; /* increment stat now, adding in mempool always success */ __MEMPOOL_STAT_ADD(mp, put, n); - /* No cache provided or single producer */ - if (unlikely(cache == NULL || flags & MEMPOOL_F_SP_PUT)) - goto ring_enqueue; - - /* Go straight to ring if put would overflow mem allocated for cache */ - if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) + /* No cache provided or if put would overflow mem allocated for cache */ + if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE)) goto ring_enqueue; cache_objs = &cache->objs[cache->len]; @@ -1104,50 +1102,11 @@ ring_enqueue: */ static inline void __attribute__((always_inline)) rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache, + __rte_unused int flags) { __mempool_check_cookies(mp, obj_table, n, 0); - __mempool_generic_put(mp, obj_table, n, cache, flags); -} - -/** - * @deprecated - * Put several objects back in the mempool (multi-producers safe). - * - * @param mp - * A pointer to the mempool structure. - * @param obj_table - * A pointer to a table of void * pointers (objects). - * @param n - * The number of objects to add in the mempool from the obj_table. - */ -__rte_deprecated -static inline void __attribute__((always_inline)) -rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table, - unsigned n) -{ - struct rte_mempool_cache *cache; - cache = rte_mempool_default_cache(mp, rte_lcore_id()); - rte_mempool_generic_put(mp, obj_table, n, cache, 0); -} - -/** - * @deprecated - * Put several objects back in the mempool (NOT multi-producers safe). - * - * @param mp - * A pointer to the mempool structure. - * @param obj_table - * A pointer to a table of void * pointers (objects). - * @param n - * The number of objects to add in the mempool from obj_table. - */ -__rte_deprecated -static inline void __attribute__((always_inline)) -rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table, - unsigned n) -{ - rte_mempool_generic_put(mp, obj_table, n, NULL, MEMPOOL_F_SP_PUT); + __mempool_generic_put(mp, obj_table, n, cache); } /** @@ -1174,40 +1133,6 @@ rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, } /** - * @deprecated - * Put one object in the mempool (multi-producers safe). - * - * @param mp - * A pointer to the mempool structure. - * @param obj - * A pointer to the object to be added. - */ -__rte_deprecated -static inline void __attribute__((always_inline)) -rte_mempool_mp_put(struct rte_mempool *mp, void *obj) -{ - struct rte_mempool_cache *cache; - cache = rte_mempool_default_cache(mp, rte_lcore_id()); - rte_mempool_generic_put(mp, &obj, 1, cache, 0); -} - -/** - * @deprecated - * Put one object back in the mempool (NOT multi-producers safe). - * - * @param mp - * A pointer to the mempool structure. - * @param obj - * A pointer to the object to be added. - */ -__rte_deprecated -static inline void __attribute__((always_inline)) -rte_mempool_sp_put(struct rte_mempool *mp, void *obj) -{ - rte_mempool_generic_put(mp, &obj, 1, NULL, MEMPOOL_F_SP_PUT); -} - -/** * Put one object back in the mempool. * * This function calls the multi-producer or the single-producer @@ -1244,15 +1169,14 @@ rte_mempool_put(struct rte_mempool *mp, void *obj) */ static inline int __attribute__((always_inline)) __mempool_generic_get(struct rte_mempool *mp, void **obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache) { int ret; uint32_t index, len; void **cache_objs; - /* No cache provided or single consumer */ - if (unlikely(cache == NULL || flags & MEMPOOL_F_SC_GET || - n >= cache->size)) + /* No cache provided or cannot be satisfied from cache */ + if (unlikely(cache == NULL || n >= cache->size)) goto ring_dequeue; cache_objs = cache->objs; @@ -1326,72 +1250,16 @@ ring_dequeue: */ static inline int __attribute__((always_inline)) rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, - struct rte_mempool_cache *cache, int flags) + struct rte_mempool_cache *cache, __rte_unused int flags) { int ret; - ret = __mempool_generic_get(mp, obj_table, n, cache, flags); + ret = __mempool_generic_get(mp, obj_table, n, cache); if (ret == 0) __mempool_check_cookies(mp, obj_table, n, 1); return ret; } /** - * @deprecated - * Get several objects from the mempool (multi-consumers safe). - * - * If cache is enabled, objects will be retrieved first from cache, - * subsequently from the common pool. Note that it can return -ENOENT when - * the local cache and common pool are empty, even if cache from other - * lcores are full. - * - * @param mp - * A pointer to the mempool structure. - * @param obj_table - * A pointer to a table of void * pointers (objects) that will be filled. - * @param n - * The number of objects to get from mempool to obj_table. - * @return - * - 0: Success; objects taken. - * - -ENOENT: Not enough entries in the mempool; no object is retrieved. - */ -__rte_deprecated -static inline int __attribute__((always_inline)) -rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) -{ - struct rte_mempool_cache *cache; - cache = rte_mempool_default_cache(mp, rte_lcore_id()); - return rte_mempool_generic_get(mp, obj_table, n, cache, 0); -} - -/** - * @deprecated - * Get several objects from the mempool (NOT multi-consumers safe). - * - * If cache is enabled, objects will be retrieved first from cache, - * subsequently from the common pool. Note that it can return -ENOENT when - * the local cache and common pool are empty, even if cache from other - * lcores are full. - * - * @param mp - * A pointer to the mempool structure. - * @param obj_table - * A pointer to a table of void * pointers (objects) that will be filled. - * @param n - * The number of objects to get from the mempool to obj_table. - * @return - * - 0: Success; objects taken. - * - -ENOENT: Not enough entries in the mempool; no object is - * retrieved. - */ -__rte_deprecated -static inline int __attribute__((always_inline)) -rte_mempool_sc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) -{ - return rte_mempool_generic_get(mp, obj_table, n, NULL, - MEMPOOL_F_SC_GET); -} - -/** * Get several objects from the mempool. * * This function calls the multi-consumers or the single-consumer @@ -1422,56 +1290,6 @@ rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) } /** - * @deprecated - * Get one object from the mempool (multi-consumers safe). - * - * If cache is enabled, objects will be retrieved first from cache, - * subsequently from the common pool. Note that it can return -ENOENT when - * the local cache and common pool are empty, even if cache from other - * lcores are full. - * - * @param mp - * A pointer to the mempool structure. - * @param obj_p - * A pointer to a void * pointer (object) that will be filled. - * @return - * - 0: Success; objects taken. - * - -ENOENT: Not enough entries in the mempool; no object is retrieved. - */ -__rte_deprecated -static inline int __attribute__((always_inline)) -rte_mempool_mc_get(struct rte_mempool *mp, void **obj_p) -{ - struct rte_mempool_cache *cache; - cache = rte_mempool_default_cache(mp, rte_lcore_id()); - return rte_mempool_generic_get(mp, obj_p, 1, cache, 0); -} - -/** - * @deprecated - * Get one object from the mempool (NOT multi-consumers safe). - * - * If cache is enabled, objects will be retrieved first from cache, - * subsequently from the common pool. Note that it can return -ENOENT when - * the local cache and common pool are empty, even if cache from other - * lcores are full. - * - * @param mp - * A pointer to the mempool structure. - * @param obj_p - * A pointer to a void * pointer (object) that will be filled. - * @return - * - 0: Success; objects taken. - * - -ENOENT: Not enough entries in the mempool; no object is retrieved. - */ -__rte_deprecated -static inline int __attribute__((always_inline)) -rte_mempool_sc_get(struct rte_mempool *mp, void **obj_p) -{ - return rte_mempool_generic_get(mp, obj_p, 1, NULL, MEMPOOL_F_SC_GET); -} - -/** * Get one object from the mempool. * * This function calls the multi-consumers or the single-consumer @@ -1512,22 +1330,6 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p) unsigned int rte_mempool_avail_count(const struct rte_mempool *mp); /** - * @deprecated - * Return the number of entries in the mempool. - * - * When cache is enabled, this function has to browse the length of - * all lcores, so it should not be used in a data path, but only for - * debug purposes. - * - * @param mp - * A pointer to the mempool structure. - * @return - * The number of entries in the mempool. - */ -__rte_deprecated -unsigned rte_mempool_count(const struct rte_mempool *mp); - -/** * Return the number of elements which have been allocated from the mempool * * When cache is enabled, this function has to browse the length of @@ -1543,31 +1345,6 @@ unsigned int rte_mempool_in_use_count(const struct rte_mempool *mp); /** - * @deprecated - * Return the number of free entries in the mempool ring. - * i.e. how many entries can be freed back to the mempool. - * - * NOTE: This corresponds to the number of elements *allocated* from the - * memory pool, not the number of elements in the pool itself. To count - * the number elements currently available in the pool, use "rte_mempool_count" - * - * When cache is enabled, this function has to browse the length of - * all lcores, so it should not be used in a data path, but only for - * debug purposes. User-owned mempool caches are not accounted for. - * - * @param mp - * A pointer to the mempool structure. - * @return - * The number of free entries in the mempool. - */ -__rte_deprecated -static inline unsigned -rte_mempool_free_count(const struct rte_mempool *mp) -{ - return rte_mempool_in_use_count(mp); -} - -/** * Test if the mempool is full. * * When cache is enabled, this function has to browse the length of all diff --git a/lib/librte_mempool/rte_mempool_ring.c b/lib/librte_mempool/rte_mempool_ring.c deleted file mode 100644 index b9aa64dd..00000000 --- a/lib/librte_mempool/rte_mempool_ring.c +++ /dev/null @@ -1,161 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stdio.h> -#include <string.h> - -#include <rte_errno.h> -#include <rte_ring.h> -#include <rte_mempool.h> - -static int -common_ring_mp_enqueue(struct rte_mempool *mp, void * const *obj_table, - unsigned n) -{ - return rte_ring_mp_enqueue_bulk(mp->pool_data, obj_table, n); -} - -static int -common_ring_sp_enqueue(struct rte_mempool *mp, void * const *obj_table, - unsigned n) -{ - return rte_ring_sp_enqueue_bulk(mp->pool_data, obj_table, n); -} - -static int -common_ring_mc_dequeue(struct rte_mempool *mp, void **obj_table, unsigned n) -{ - return rte_ring_mc_dequeue_bulk(mp->pool_data, obj_table, n); -} - -static int -common_ring_sc_dequeue(struct rte_mempool *mp, void **obj_table, unsigned n) -{ - return rte_ring_sc_dequeue_bulk(mp->pool_data, obj_table, n); -} - -static unsigned -common_ring_get_count(const struct rte_mempool *mp) -{ - return rte_ring_count(mp->pool_data); -} - - -static int -common_ring_alloc(struct rte_mempool *mp) -{ - int rg_flags = 0, ret; - char rg_name[RTE_RING_NAMESIZE]; - struct rte_ring *r; - - ret = snprintf(rg_name, sizeof(rg_name), - RTE_MEMPOOL_MZ_FORMAT, mp->name); - if (ret < 0 || ret >= (int)sizeof(rg_name)) { - rte_errno = ENAMETOOLONG; - return -rte_errno; - } - - /* ring flags */ - if (mp->flags & MEMPOOL_F_SP_PUT) - rg_flags |= RING_F_SP_ENQ; - if (mp->flags & MEMPOOL_F_SC_GET) - rg_flags |= RING_F_SC_DEQ; - - /* - * Allocate the ring that will be used to store objects. - * Ring functions will return appropriate errors if we are - * running as a secondary process etc., so no checks made - * in this function for that condition. - */ - r = rte_ring_create(rg_name, rte_align32pow2(mp->size + 1), - mp->socket_id, rg_flags); - if (r == NULL) - return -rte_errno; - - mp->pool_data = r; - - return 0; -} - -static void -common_ring_free(struct rte_mempool *mp) -{ - rte_ring_free(mp->pool_data); -} - -/* - * The following 4 declarations of mempool ops structs address - * the need for the backward compatible mempool handlers for - * single/multi producers and single/multi consumers as dictated by the - * flags provided to the rte_mempool_create function - */ -static const struct rte_mempool_ops ops_mp_mc = { - .name = "ring_mp_mc", - .alloc = common_ring_alloc, - .free = common_ring_free, - .enqueue = common_ring_mp_enqueue, - .dequeue = common_ring_mc_dequeue, - .get_count = common_ring_get_count, -}; - -static const struct rte_mempool_ops ops_sp_sc = { - .name = "ring_sp_sc", - .alloc = common_ring_alloc, - .free = common_ring_free, - .enqueue = common_ring_sp_enqueue, - .dequeue = common_ring_sc_dequeue, - .get_count = common_ring_get_count, -}; - -static const struct rte_mempool_ops ops_mp_sc = { - .name = "ring_mp_sc", - .alloc = common_ring_alloc, - .free = common_ring_free, - .enqueue = common_ring_mp_enqueue, - .dequeue = common_ring_sc_dequeue, - .get_count = common_ring_get_count, -}; - -static const struct rte_mempool_ops ops_sp_mc = { - .name = "ring_sp_mc", - .alloc = common_ring_alloc, - .free = common_ring_free, - .enqueue = common_ring_sp_enqueue, - .dequeue = common_ring_mc_dequeue, - .get_count = common_ring_get_count, -}; - -MEMPOOL_REGISTER_OPS(ops_mp_mc); -MEMPOOL_REGISTER_OPS(ops_sp_sc); -MEMPOOL_REGISTER_OPS(ops_mp_sc); -MEMPOOL_REGISTER_OPS(ops_sp_mc); diff --git a/lib/librte_mempool/rte_mempool_stack.c b/lib/librte_mempool/rte_mempool_stack.c deleted file mode 100644 index 817f77e6..00000000 --- a/lib/librte_mempool/rte_mempool_stack.c +++ /dev/null @@ -1,147 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stdio.h> -#include <rte_mempool.h> -#include <rte_malloc.h> - -struct rte_mempool_stack { - rte_spinlock_t sl; - - uint32_t size; - uint32_t len; - void *objs[]; -}; - -static int -stack_alloc(struct rte_mempool *mp) -{ - struct rte_mempool_stack *s; - unsigned n = mp->size; - int size = sizeof(*s) + (n+16)*sizeof(void *); - - /* Allocate our local memory structure */ - s = rte_zmalloc_socket("mempool-stack", - size, - RTE_CACHE_LINE_SIZE, - mp->socket_id); - if (s == NULL) { - RTE_LOG(ERR, MEMPOOL, "Cannot allocate stack!\n"); - return -ENOMEM; - } - - rte_spinlock_init(&s->sl); - - s->size = n; - mp->pool_data = s; - - return 0; -} - -static int -stack_enqueue(struct rte_mempool *mp, void * const *obj_table, - unsigned n) -{ - struct rte_mempool_stack *s = mp->pool_data; - void **cache_objs; - unsigned index; - - rte_spinlock_lock(&s->sl); - cache_objs = &s->objs[s->len]; - - /* Is there sufficient space in the stack ? */ - if ((s->len + n) > s->size) { - rte_spinlock_unlock(&s->sl); - return -ENOBUFS; - } - - /* Add elements back into the cache */ - for (index = 0; index < n; ++index, obj_table++) - cache_objs[index] = *obj_table; - - s->len += n; - - rte_spinlock_unlock(&s->sl); - return 0; -} - -static int -stack_dequeue(struct rte_mempool *mp, void **obj_table, - unsigned n) -{ - struct rte_mempool_stack *s = mp->pool_data; - void **cache_objs; - unsigned index, len; - - rte_spinlock_lock(&s->sl); - - if (unlikely(n > s->len)) { - rte_spinlock_unlock(&s->sl); - return -ENOENT; - } - - cache_objs = s->objs; - - for (index = 0, len = s->len - 1; index < n; - ++index, len--, obj_table++) - *obj_table = cache_objs[len]; - - s->len -= n; - rte_spinlock_unlock(&s->sl); - return 0; -} - -static unsigned -stack_get_count(const struct rte_mempool *mp) -{ - struct rte_mempool_stack *s = mp->pool_data; - - return s->len; -} - -static void -stack_free(struct rte_mempool *mp) -{ - rte_free((void *)(mp->pool_data)); -} - -static struct rte_mempool_ops ops_stack = { - .name = "stack", - .alloc = stack_alloc, - .free = stack_free, - .enqueue = stack_enqueue, - .dequeue = stack_dequeue, - .get_count = stack_get_count -}; - -MEMPOOL_REGISTER_OPS(ops_stack); diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map index dee1c990..f9c07944 100644 --- a/lib/librte_mempool/rte_mempool_version.map +++ b/lib/librte_mempool/rte_mempool_version.map @@ -3,7 +3,6 @@ DPDK_2.0 { rte_mempool_audit; rte_mempool_calc_obj_size; - rte_mempool_count; rte_mempool_create; rte_mempool_dump; rte_mempool_list_dump; diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile index f07fced7..539bfddd 100644 --- a/lib/librte_meter/Makefile +++ b/lib/librte_meter/Makefile @@ -53,7 +53,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_METER) := rte_meter.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_METER)-include := rte_meter.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_METER) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_metrics/Makefile b/lib/librte_metrics/Makefile new file mode 100644 index 00000000..d4990e83 --- /dev/null +++ b/lib/librte_metrics/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_metrics.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_metrics_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_METRICS) := rte_metrics.c + +# Install header file +SYMLINK-$(CONFIG_RTE_LIBRTE_METRICS)-include += rte_metrics.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c new file mode 100644 index 00000000..e9a122c1 --- /dev/null +++ b/lib/librte_metrics/rte_metrics.c @@ -0,0 +1,302 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <sys/queue.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_metrics.h> +#include <rte_lcore.h> +#include <rte_memzone.h> +#include <rte_spinlock.h> + +#define RTE_METRICS_MAX_METRICS 256 +#define RTE_METRICS_MEMZONE_NAME "RTE_METRICS" + +/** + * Internal stats metadata and value entry. + * + * @internal + */ +struct rte_metrics_meta_s { + /** Name of metric */ + char name[RTE_METRICS_MAX_NAME_LEN]; + /** Current value for metric */ + uint64_t value[RTE_MAX_ETHPORTS]; + /** Used for global metrics */ + uint64_t global_value; + /** Index of next root element (zero for none) */ + uint16_t idx_next_set; + /** Index of next metric in set (zero for none) */ + uint16_t idx_next_stat; +}; + +/** + * Internal stats info structure. + * + * @internal + * Offsets into metadata are used instead of pointers because ASLR + * means that having the same physical addresses in different + * processes is not guaranteed. + */ +struct rte_metrics_data_s { + /** Index of last metadata entry with valid data. + * This value is not valid if cnt_stats is zero. + */ + uint16_t idx_last_set; + /** Number of metrics. */ + uint16_t cnt_stats; + /** Metric data memory block. */ + struct rte_metrics_meta_s metadata[RTE_METRICS_MAX_METRICS]; + /** Metric data access lock */ + rte_spinlock_t lock; +}; + +void +rte_metrics_init(int socket_id) +{ + struct rte_metrics_data_s *stats; + const struct rte_memzone *memzone; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); + if (memzone != NULL) + return; + memzone = rte_memzone_reserve(RTE_METRICS_MEMZONE_NAME, + sizeof(struct rte_metrics_data_s), socket_id, 0); + if (memzone == NULL) + rte_exit(EXIT_FAILURE, "Unable to allocate stats memzone\n"); + stats = memzone->addr; + memset(stats, 0, sizeof(struct rte_metrics_data_s)); + rte_spinlock_init(&stats->lock); +} + +int +rte_metrics_reg_name(const char *name) +{ + const char * const list_names[] = {name}; + + return rte_metrics_reg_names(list_names, 1); +} + +int +rte_metrics_reg_names(const char * const *names, uint16_t cnt_names) +{ + struct rte_metrics_meta_s *entry; + struct rte_metrics_data_s *stats; + const struct rte_memzone *memzone; + uint16_t idx_name; + uint16_t idx_base; + + /* Some sanity checks */ + if (cnt_names < 1 || names == NULL) + return -EINVAL; + + memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); + if (memzone == NULL) + return -EIO; + stats = memzone->addr; + + if (stats->cnt_stats + cnt_names >= RTE_METRICS_MAX_METRICS) + return -ENOMEM; + + rte_spinlock_lock(&stats->lock); + + /* Overwritten later if this is actually first set.. */ + stats->metadata[stats->idx_last_set].idx_next_set = stats->cnt_stats; + + stats->idx_last_set = idx_base = stats->cnt_stats; + + for (idx_name = 0; idx_name < cnt_names; idx_name++) { + entry = &stats->metadata[idx_name + stats->cnt_stats]; + strncpy(entry->name, names[idx_name], + RTE_METRICS_MAX_NAME_LEN); + memset(entry->value, 0, sizeof(entry->value)); + entry->idx_next_stat = idx_name + stats->cnt_stats + 1; + } + entry->idx_next_stat = 0; + entry->idx_next_set = 0; + stats->cnt_stats += cnt_names; + + rte_spinlock_unlock(&stats->lock); + + return idx_base; +} + +int +rte_metrics_update_value(int port_id, uint16_t key, const uint64_t value) +{ + return rte_metrics_update_values(port_id, key, &value, 1); +} + +int +rte_metrics_update_values(int port_id, + uint16_t key, + const uint64_t *values, + uint32_t count) +{ + struct rte_metrics_meta_s *entry; + struct rte_metrics_data_s *stats; + const struct rte_memzone *memzone; + uint16_t idx_metric; + uint16_t idx_value; + uint16_t cnt_setsize; + + if (port_id != RTE_METRICS_GLOBAL && + (port_id < 0 || port_id > RTE_MAX_ETHPORTS)) + return -EINVAL; + + if (values == NULL) + return -EINVAL; + + memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); + if (memzone == NULL) + return -EIO; + stats = memzone->addr; + + rte_spinlock_lock(&stats->lock); + idx_metric = key; + cnt_setsize = 1; + while (idx_metric < stats->cnt_stats) { + entry = &stats->metadata[idx_metric]; + if (entry->idx_next_stat == 0) + break; + cnt_setsize++; + idx_metric++; + } + /* Check update does not cross set border */ + if (count > cnt_setsize) { + rte_spinlock_unlock(&stats->lock); + return -ERANGE; + } + + if (port_id == RTE_METRICS_GLOBAL) + for (idx_value = 0; idx_value < count; idx_value++) { + idx_metric = key + idx_value; + stats->metadata[idx_metric].global_value = + values[idx_value]; + } + else + for (idx_value = 0; idx_value < count; idx_value++) { + idx_metric = key + idx_value; + stats->metadata[idx_metric].value[port_id] = + values[idx_value]; + } + rte_spinlock_unlock(&stats->lock); + return 0; +} + +int +rte_metrics_get_names(struct rte_metric_name *names, + uint16_t capacity) +{ + struct rte_metrics_data_s *stats; + const struct rte_memzone *memzone; + uint16_t idx_name; + int return_value; + + memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); + /* If not allocated, fail silently */ + if (memzone == NULL) + return 0; + + stats = memzone->addr; + rte_spinlock_lock(&stats->lock); + if (names != NULL) { + if (capacity < stats->cnt_stats) { + return_value = stats->cnt_stats; + rte_spinlock_unlock(&stats->lock); + return return_value; + } + for (idx_name = 0; idx_name < stats->cnt_stats; idx_name++) + strncpy(names[idx_name].name, + stats->metadata[idx_name].name, + RTE_METRICS_MAX_NAME_LEN); + } + return_value = stats->cnt_stats; + rte_spinlock_unlock(&stats->lock); + return return_value; +} + +int +rte_metrics_get_values(int port_id, + struct rte_metric_value *values, + uint16_t capacity) +{ + struct rte_metrics_meta_s *entry; + struct rte_metrics_data_s *stats; + const struct rte_memzone *memzone; + uint16_t idx_name; + int return_value; + + if (port_id != RTE_METRICS_GLOBAL && + (port_id < 0 || port_id > RTE_MAX_ETHPORTS)) + return -EINVAL; + + memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); + /* If not allocated, fail silently */ + if (memzone == NULL) + return 0; + stats = memzone->addr; + rte_spinlock_lock(&stats->lock); + + if (values != NULL) { + if (capacity < stats->cnt_stats) { + return_value = stats->cnt_stats; + rte_spinlock_unlock(&stats->lock); + return return_value; + } + if (port_id == RTE_METRICS_GLOBAL) + for (idx_name = 0; + idx_name < stats->cnt_stats; + idx_name++) { + entry = &stats->metadata[idx_name]; + values[idx_name].key = idx_name; + values[idx_name].value = entry->global_value; + } + else + for (idx_name = 0; + idx_name < stats->cnt_stats; + idx_name++) { + entry = &stats->metadata[idx_name]; + values[idx_name].key = idx_name; + values[idx_name].value = entry->value[port_id]; + } + } + return_value = stats->cnt_stats; + rte_spinlock_unlock(&stats->lock); + return return_value; +} diff --git a/lib/librte_metrics/rte_metrics.h b/lib/librte_metrics/rte_metrics.h new file mode 100644 index 00000000..0fa3104e --- /dev/null +++ b/lib/librte_metrics/rte_metrics.h @@ -0,0 +1,250 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * DPDK Metrics module + * + * Metrics are statistics that are not generated by PMDs, and hence + * are better reported through a mechanism that is independent from + * the ethdev-based extended statistics. Providers will typically + * be other libraries and consumers will typically be applications. + * + * Metric information is populated using a push model, where producers + * update the values contained within the metric library by calling + * an update function on the relevant metrics. Consumers receive + * metric information by querying the central metric data, which is + * held in shared memory. Currently only bulk querying of metrics + * by consumers is supported. + */ + +#ifndef _RTE_METRICS_H_ +#define _RTE_METRICS_H_ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** Maximum length of metric name (including null-terminator) */ +#define RTE_METRICS_MAX_NAME_LEN 64 + +/** + * Global metric special id. + * + * When used for the port_id parameter when calling + * rte_metrics_update_metric() or rte_metrics_update_metric(), + * the global metric, which are not associated with any specific + * port (i.e. device), are updated. + */ +#define RTE_METRICS_GLOBAL -1 + + +/** + * A name-key lookup for metrics. + * + * An array of this structure is returned by rte_metrics_get_names(). + * The struct rte_metric_value references these names via their array index. + */ +struct rte_metric_name { + /** String describing metric */ + char name[RTE_METRICS_MAX_NAME_LEN]; +}; + + +/** + * Metric value structure. + * + * This structure is used by rte_metrics_get_values() to return metrics, + * which are statistics that are not generated by PMDs. It maps a name key, + * which corresponds to an index in the array returned by + * rte_metrics_get_names(). + */ +struct rte_metric_value { + /** Numeric identifier of metric. */ + uint16_t key; + /** Value for metric */ + uint64_t value; +}; + + +/** + * Initializes metric module. This function must be called from + * a primary process before metrics are used. + * + * @param socket_id + * Socket to use for shared memory allocation. + */ +void rte_metrics_init(int socket_id); + +/** + * Register a metric, making it available as a reporting parameter. + * + * Registering a metric is the way producers declare a parameter + * that they wish to be reported. Once registered, the associated + * numeric key can be obtained via rte_metrics_get_names(), which + * is required for updating said metric's value. + * + * @param name + * Metric name + * + * @return + * - Zero or positive: Success (index key of new metric) + * - -EIO: Error, unable to access metrics shared memory + * (rte_metrics_init() not called) + * - -EINVAL: Error, invalid parameters + * - -ENOMEM: Error, maximum metrics reached + */ +int rte_metrics_reg_name(const char *name); + +/** + * Register a set of metrics. + * + * This is a bulk version of rte_metrics_reg_names() and aside from + * handling multiple keys at once is functionally identical. + * + * @param names + * List of metric names + * + * @param cnt_names + * Number of metrics in set + * + * @return + * - Zero or positive: Success (index key of start of set) + * - -EIO: Error, unable to access metrics shared memory + * (rte_metrics_init() not called) + * - -EINVAL: Error, invalid parameters + * - -ENOMEM: Error, maximum metrics reached + */ +int rte_metrics_reg_names(const char * const *names, uint16_t cnt_names); + +/** + * Get metric name-key lookup table. + * + * @param names + * A struct rte_metric_name array of at least *capacity* in size to + * receive key names. If this is NULL, function returns the required + * number of elements for this array. + * + * @param capacity + * Size (number of elements) of struct rte_metric_name array. + * Disregarded if names is NULL. + * + * @return + * - Positive value above capacity: error, *names* is too small. + * Return value is required size. + * - Positive value equal or less than capacity: Success. Return + * value is number of elements filled in. + * - Negative value: error. + */ +int rte_metrics_get_names( + struct rte_metric_name *names, + uint16_t capacity); + +/** + * Get metric value table. + * + * @param port_id + * Port id to query + * + * @param values + * A struct rte_metric_value array of at least *capacity* in size to + * receive metric ids and values. If this is NULL, function returns + * the required number of elements for this array. + * + * @param capacity + * Size (number of elements) of struct rte_metric_value array. + * Disregarded if names is NULL. + * + * @return + * - Positive value above capacity: error, *values* is too small. + * Return value is required size. + * - Positive value equal or less than capacity: Success. Return + * value is number of elements filled in. + * - Negative value: error. + */ +int rte_metrics_get_values( + int port_id, + struct rte_metric_value *values, + uint16_t capacity); + +/** + * Updates a metric + * + * @param port_id + * Port to update metrics for + * @param key + * Id of metric to update + * @param value + * New value + * + * @return + * - -EIO if unable to access shared metrics memory + * - Zero on success + */ +int rte_metrics_update_value( + int port_id, + uint16_t key, + const uint64_t value); + +/** + * Updates a metric set. Note that it is an error to try to + * update across a set boundary. + * + * @param port_id + * Port to update metrics for + * @param key + * Base id of metrics set to update + * @param values + * Set of new values + * @param count + * Number of new values + * + * @return + * - -ERANGE if count exceeds metric set size + * - -EIO if unable to access shared metrics memory + * - Zero on success + */ +int rte_metrics_update_values( + int port_id, + uint16_t key, + const uint64_t *values, + uint32_t count); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_metrics/rte_metrics_version.map b/lib/librte_metrics/rte_metrics_version.map new file mode 100644 index 00000000..4c5234cd --- /dev/null +++ b/lib/librte_metrics/rte_metrics_version.map @@ -0,0 +1,13 @@ +DPDK_17.05 { + global: + + rte_metrics_get_names; + rte_metrics_get_values; + rte_metrics_init; + rte_metrics_reg_name; + rte_metrics_reg_names; + rte_metrics_update_value; + rte_metrics_update_values; + + local: *; +}; diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile index 20cf6644..56727c4d 100644 --- a/lib/librte_net/Makefile +++ b/lib/librte_net/Makefile @@ -39,12 +39,12 @@ EXPORT_MAP := rte_net_version.map LIBABIVER := 1 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h - -DEPDIRS-$(CONFIG_RTE_LIBRTE_NET) += lib/librte_eal lib/librte_mbuf +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_net/net_crc_sse.h b/lib/librte_net/net_crc_sse.h new file mode 100644 index 00000000..8bce522a --- /dev/null +++ b/lib/librte_net/net_crc_sse.h @@ -0,0 +1,363 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_NET_CRC_SSE_H_ +#define _RTE_NET_CRC_SSE_H_ + +#include <rte_branch_prediction.h> + +#include <x86intrin.h> +#include <cpuid.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** PCLMULQDQ CRC computation context structure */ +struct crc_pclmulqdq_ctx { + __m128i rk1_rk2; + __m128i rk5_rk6; + __m128i rk7_rk8; +}; + +struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16); +struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16); +/** + * @brief Performs one folding round + * + * Logically function operates as follows: + * DATA = READ_NEXT_16BYTES(); + * F1 = LSB8(FOLD) + * F2 = MSB8(FOLD) + * T1 = CLMUL(F1, RK1) + * T2 = CLMUL(F2, RK2) + * FOLD = XOR(T1, T2, DATA) + * + * @param data_block + * 16 byte data block + * @param precomp + * Precomputed rk1 constanst + * @param fold + * Current16 byte folded data + * + * @return + * New 16 byte folded data + */ +static inline __attribute__((always_inline)) __m128i +crcr32_folding_round(__m128i data_block, + __m128i precomp, + __m128i fold) +{ + __m128i tmp0 = _mm_clmulepi64_si128(fold, precomp, 0x01); + __m128i tmp1 = _mm_clmulepi64_si128(fold, precomp, 0x10); + + return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0)); +} + +/** + * Performs reduction from 128 bits to 64 bits + * + * @param data128 + * 128 bits data to be reduced + * @param precomp + * precomputed constants rk5, rk6 + * + * @return + * 64 bits reduced data + */ + +static inline __attribute__((always_inline)) __m128i +crcr32_reduce_128_to_64(__m128i data128, __m128i precomp) +{ + __m128i tmp0, tmp1, tmp2; + + /* 64b fold */ + tmp0 = _mm_clmulepi64_si128(data128, precomp, 0x00); + tmp1 = _mm_srli_si128(data128, 8); + tmp0 = _mm_xor_si128(tmp0, tmp1); + + /* 32b fold */ + tmp2 = _mm_slli_si128(tmp0, 4); + tmp1 = _mm_clmulepi64_si128(tmp2, precomp, 0x10); + + return _mm_xor_si128(tmp1, tmp0); +} + +/** + * Performs Barret's reduction from 64 bits to 32 bits + * + * @param data64 + * 64 bits data to be reduced + * @param precomp + * rk7 precomputed constant + * + * @return + * reduced 32 bits data + */ + +static inline __attribute__((always_inline)) uint32_t +crcr32_reduce_64_to_32(__m128i data64, __m128i precomp) +{ + static const uint32_t mask1[4] __rte_aligned(16) = { + 0xffffffff, 0xffffffff, 0x00000000, 0x00000000 + }; + + static const uint32_t mask2[4] __rte_aligned(16) = { + 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff + }; + __m128i tmp0, tmp1, tmp2; + + tmp0 = _mm_and_si128(data64, _mm_load_si128((const __m128i *)mask2)); + + tmp1 = _mm_clmulepi64_si128(tmp0, precomp, 0x00); + tmp1 = _mm_xor_si128(tmp1, tmp0); + tmp1 = _mm_and_si128(tmp1, _mm_load_si128((const __m128i *)mask1)); + + tmp2 = _mm_clmulepi64_si128(tmp1, precomp, 0x10); + tmp2 = _mm_xor_si128(tmp2, tmp1); + tmp2 = _mm_xor_si128(tmp2, tmp0); + + return _mm_extract_epi32(tmp2, 2); +} + +static const uint8_t crc_xmm_shift_tab[48] __rte_aligned(16) = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +/** + * Shifts left 128 bit register by specified number of bytes + * + * @param reg + * 128 bit value + * @param num + * number of bytes to shift left reg by (0-16) + * + * @return + * reg << (num * 8) + */ + +static inline __attribute__((always_inline)) __m128i +xmm_shift_left(__m128i reg, const unsigned int num) +{ + const __m128i *p = (const __m128i *)(crc_xmm_shift_tab + 16 - num); + + return _mm_shuffle_epi8(reg, _mm_loadu_si128(p)); +} + +static inline __attribute__((always_inline)) uint32_t +crc32_eth_calc_pclmulqdq( + const uint8_t *data, + uint32_t data_len, + uint32_t crc, + const struct crc_pclmulqdq_ctx *params) +{ + __m128i temp, fold, k; + uint32_t n; + + /* Get CRC init value */ + temp = _mm_insert_epi32(_mm_setzero_si128(), crc, 0); + + /** + * Folding all data into single 16 byte data block + * Assumes: fold holds first 16 bytes of data + */ + + if (unlikely(data_len < 32)) { + if (unlikely(data_len == 16)) { + /* 16 bytes */ + fold = _mm_loadu_si128((const __m128i *)data); + fold = _mm_xor_si128(fold, temp); + goto reduction_128_64; + } + + if (unlikely(data_len < 16)) { + /* 0 to 15 bytes */ + uint8_t buffer[16] __rte_aligned(16); + + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, data, data_len); + + fold = _mm_load_si128((const __m128i *)buffer); + fold = _mm_xor_si128(fold, temp); + if (unlikely(data_len < 4)) { + fold = xmm_shift_left(fold, 8 - data_len); + goto barret_reduction; + } + fold = xmm_shift_left(fold, 16 - data_len); + goto reduction_128_64; + } + /* 17 to 31 bytes */ + fold = _mm_loadu_si128((const __m128i *)data); + fold = _mm_xor_si128(fold, temp); + n = 16; + k = params->rk1_rk2; + goto partial_bytes; + } + + /** At least 32 bytes in the buffer */ + /** Apply CRC initial value */ + fold = _mm_loadu_si128((const __m128i *)data); + fold = _mm_xor_si128(fold, temp); + + /** Main folding loop - the last 16 bytes is processed separately */ + k = params->rk1_rk2; + for (n = 16; (n + 16) <= data_len; n += 16) { + temp = _mm_loadu_si128((const __m128i *)&data[n]); + fold = crcr32_folding_round(temp, k, fold); + } + +partial_bytes: + if (likely(n < data_len)) { + + const uint32_t mask3[4] __rte_aligned(16) = { + 0x80808080, 0x80808080, 0x80808080, 0x80808080 + }; + + const uint8_t shf_table[32] __rte_aligned(16) = { + 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + }; + + __m128i last16, a, b; + + last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]); + + temp = _mm_loadu_si128((const __m128i *) + &shf_table[data_len & 15]); + a = _mm_shuffle_epi8(fold, temp); + + temp = _mm_xor_si128(temp, + _mm_load_si128((const __m128i *)mask3)); + b = _mm_shuffle_epi8(fold, temp); + b = _mm_blendv_epi8(b, last16, temp); + + /* k = rk1 & rk2 */ + temp = _mm_clmulepi64_si128(a, k, 0x01); + fold = _mm_clmulepi64_si128(a, k, 0x10); + + fold = _mm_xor_si128(fold, temp); + fold = _mm_xor_si128(fold, b); + } + + /** Reduction 128 -> 32 Assumes: fold holds 128bit folded data */ +reduction_128_64: + k = params->rk5_rk6; + fold = crcr32_reduce_128_to_64(fold, k); + +barret_reduction: + k = params->rk7_rk8; + n = crcr32_reduce_64_to_32(fold, k); + + return n; +} + + +static inline void +rte_net_crc_sse42_init(void) +{ + uint64_t k1, k2, k5, k6; + uint64_t p = 0, q = 0; + + /** Initialize CRC16 data */ + k1 = 0x189aeLLU; + k2 = 0x8e10LLU; + k5 = 0x189aeLLU; + k6 = 0x114aaLLU; + q = 0x11c581910LLU; + p = 0x10811LLU; + + /** Save the params in context structure */ + crc16_ccitt_pclmulqdq.rk1_rk2 = + _mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2)); + crc16_ccitt_pclmulqdq.rk5_rk6 = + _mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6)); + crc16_ccitt_pclmulqdq.rk7_rk8 = + _mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p)); + + /** Initialize CRC32 data */ + k1 = 0xccaa009eLLU; + k2 = 0x1751997d0LLU; + k5 = 0xccaa009eLLU; + k6 = 0x163cd6124LLU; + q = 0x1f7011640LLU; + p = 0x1db710641LLU; + + /** Save the params in context structure */ + crc32_eth_pclmulqdq.rk1_rk2 = + _mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2)); + crc32_eth_pclmulqdq.rk5_rk6 = + _mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6)); + crc32_eth_pclmulqdq.rk7_rk8 = + _mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p)); + + /** + * Reset the register as following calculation may + * use other data types such as float, double, etc. + */ + _mm_empty(); + +} + +static inline uint32_t +rte_crc16_ccitt_sse42_handler(const uint8_t *data, + uint32_t data_len) +{ + /** return 16-bit CRC value */ + return (uint16_t)~crc32_eth_calc_pclmulqdq(data, + data_len, + 0xffff, + &crc16_ccitt_pclmulqdq); +} + +static inline uint32_t +rte_crc32_eth_sse42_handler(const uint8_t *data, + uint32_t data_len) +{ + return ~crc32_eth_calc_pclmulqdq(data, + data_len, + 0xffffffffUL, + &crc32_eth_pclmulqdq); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_NET_CRC_SSE_H_ */ diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h index ff3d0654..917d42a1 100644 --- a/lib/librte_net/rte_ether.h +++ b/lib/librte_net/rte_ether.h @@ -333,6 +333,7 @@ struct vxlan_hdr { #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ +#define ETHER_TYPE_LLDP 0x88CC /**< LLDP Protocol. */ #define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) /**< VXLAN tunnel header length. */ @@ -357,7 +358,7 @@ static inline int rte_vlan_strip(struct rte_mbuf *m) return -1; struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); - m->ol_flags |= PKT_RX_VLAN_PKT; + m->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); /* Copy ether header over rather than moving whole packet */ @@ -407,6 +408,8 @@ static inline int rte_vlan_insert(struct rte_mbuf **m) vh = (struct vlan_hdr *) (nh + 1); vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); + (*m)->ol_flags &= ~PKT_RX_VLAN_STRIPPED; + return 0; } diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h index d4156aea..79c764ad 100644 --- a/lib/librte_net/rte_net.h +++ b/lib/librte_net/rte_net.h @@ -38,6 +38,11 @@ extern "C" { #endif +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_tcp.h> +#include <rte_sctp.h> + /** * Structure containing header lengths associated to a packet, filled * by rte_net_get_ptype(). @@ -86,6 +91,112 @@ struct rte_net_hdr_lens { uint32_t rte_net_get_ptype(const struct rte_mbuf *m, struct rte_net_hdr_lens *hdr_lens, uint32_t layers); +/** + * Prepare pseudo header checksum + * + * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in + * provided mbufs packet data and based on the requested offload flags. + * + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set + * in packet data, + * - for TSO the IP payload length is not included in pseudo header. + * + * This function expects that used headers are in the first data segment of + * mbuf, are not fragmented and can be safely modified. + * + * @param m + * The packet mbuf to be fixed. + * @param ol_flags + * TX offloads flags to use with this packet. + * @return + * 0 if checksum is initialized properly + */ +static inline int +rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + struct tcp_hdr *tcp_hdr; + struct udp_hdr *udp_hdr; + uint64_t inner_l3_offset = m->l2_len; + + if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) || + (ol_flags & PKT_TX_OUTER_IPV6)) + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + + if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) { + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + inner_l3_offset); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + + udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + + m->l3_len); + udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, + ol_flags); + } else { + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + inner_l3_offset); + /* non-TSO udp */ + udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, + inner_l3_offset + m->l3_len); + udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, + ol_flags); + } + } else if ((ol_flags & PKT_TX_TCP_CKSUM) || + (ol_flags & PKT_TX_TCP_SEG)) { + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + inner_l3_offset); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + + /* non-TSO tcp or TSO */ + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + + m->l3_len); + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, + ol_flags); + } else { + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + inner_l3_offset); + /* non-TSO tcp or TSO */ + tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, + inner_l3_offset + m->l3_len); + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, + ol_flags); + } + } + + return 0; +} + +/** + * Prepare pseudo header checksum + * + * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in + * provided mbufs packet data. + * + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set + * in packet data, + * - for TSO the IP payload length is not included in pseudo header. + * + * This function expects that used headers are in the first data segment of + * mbuf, are not fragmented and can be safely modified. + * + * @param m + * The packet mbuf to be fixed. + * @return + * 0 if checksum is initialized properly + */ +static inline int +rte_net_intel_cksum_prepare(struct rte_mbuf *m) +{ + return rte_net_intel_cksum_flags_prepare(m, m->ol_flags); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c new file mode 100644 index 00000000..9d1ee63f --- /dev/null +++ b/lib/librte_net/rte_net_crc.c @@ -0,0 +1,207 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stddef.h> +#include <string.h> +#include <stdint.h> + +#include <rte_cpuflags.h> +#include <rte_common.h> +#include <rte_net_crc.h> + +#if defined(RTE_ARCH_X86_64) \ + && defined(RTE_MACHINE_CPUFLAG_SSE4_2) \ + && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) +#define X86_64_SSE42_PCLMULQDQ 1 +#endif + +#ifdef X86_64_SSE42_PCLMULQDQ +#include <net_crc_sse.h> +#endif + +/* crc tables */ +static uint32_t crc32_eth_lut[CRC_LUT_SIZE]; +static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE]; + +static uint32_t +rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len); + +static uint32_t +rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len); + +typedef uint32_t +(*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len); + +static rte_net_crc_handler *handlers; + +static rte_net_crc_handler handlers_scalar[] = { + [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler, + [RTE_NET_CRC32_ETH] = rte_crc32_eth_handler, +}; + +#ifdef X86_64_SSE42_PCLMULQDQ +static rte_net_crc_handler handlers_sse42[] = { + [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler, + [RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler, +}; +#endif + +/** + * Reflect the bits about the middle + * + * @param val + * value to be reflected + * + * @return + * reflected value + */ +static uint32_t +reflect_32bits(uint32_t val) +{ + uint32_t i, res = 0; + + for (i = 0; i < 32; i++) + if ((val & (1 << i)) != 0) + res |= (uint32_t)(1 << (31 - i)); + + return res; +} + +static void +crc32_eth_init_lut(uint32_t poly, + uint32_t *lut) +{ + uint32_t i, j; + + for (i = 0; i < CRC_LUT_SIZE; i++) { + uint32_t crc = reflect_32bits(i); + + for (j = 0; j < 8; j++) { + if (crc & 0x80000000L) + crc = (crc << 1) ^ poly; + else + crc <<= 1; + } + lut[i] = reflect_32bits(crc); + } +} + +static inline __attribute__((always_inline)) uint32_t +crc32_eth_calc_lut(const uint8_t *data, + uint32_t data_len, + uint32_t crc, + const uint32_t *lut) +{ + while (data_len--) + crc = lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8); + + return crc; +} + +static void +rte_net_crc_scalar_init(void) +{ + /* 32-bit crc init */ + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut); + + /* 16-bit CRC init */ + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut); +} + +static inline uint32_t +rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len) +{ + /* return 16-bit CRC value */ + return (uint16_t)~crc32_eth_calc_lut(data, + data_len, + 0xffff, + crc16_ccitt_lut); +} + +static inline uint32_t +rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len) +{ + /* return 32-bit CRC value */ + return ~crc32_eth_calc_lut(data, + data_len, + 0xffffffffUL, + crc32_eth_lut); +} + +void +rte_net_crc_set_alg(enum rte_net_crc_alg alg) +{ + switch (alg) { + case RTE_NET_CRC_SSE42: +#ifdef X86_64_SSE42_PCLMULQDQ + handlers = handlers_sse42; +#else + alg = RTE_NET_CRC_SCALAR; +#endif + break; + case RTE_NET_CRC_SCALAR: + default: + handlers = handlers_scalar; + break; + } +} + +uint32_t +rte_net_crc_calc(const void *data, + uint32_t data_len, + enum rte_net_crc_type type) +{ + uint32_t ret; + rte_net_crc_handler f_handle; + + f_handle = handlers[type]; + ret = f_handle(data, data_len); + + return ret; +} + +/* Select highest available crc algorithm as default one */ +static inline void __attribute__((constructor)) +rte_net_crc_init(void) +{ + enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR; + + rte_net_crc_scalar_init(); + +#ifdef X86_64_SSE42_PCLMULQDQ + alg = RTE_NET_CRC_SSE42; + rte_net_crc_sse42_init(); +#endif + + rte_net_crc_set_alg(alg); +} diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_net/rte_net_crc.h index 28825ff6..d22286c6 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_atomic.h +++ b/lib/librte_net/rte_net_crc.h @@ -1,7 +1,8 @@ -/* +/*- * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2017 Intel Corporation. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,65 +29,70 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_ATOMIC_TILE_H_ -#define _RTE_ATOMIC_TILE_H_ +#ifndef _RTE_NET_CRC_H_ +#define _RTE_NET_CRC_H_ -#ifndef RTE_FORCE_INTRINSICS -# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS -#endif +#include <stdint.h> #ifdef __cplusplus extern "C" { #endif -#include "generic/rte_atomic.h" +/** CRC polynomials */ +#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL +#define CRC16_CCITT_POLYNOMIAL 0x1021U -/** - * General memory barrier. - * - * Guarantees that the LOAD and STORE operations generated before the - * barrier occur before the LOAD and STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_mb(void) -{ - __sync_synchronize(); -} +#define CRC_LUT_SIZE 256 + +/** CRC types */ +enum rte_net_crc_type { + RTE_NET_CRC16_CCITT = 0, + RTE_NET_CRC32_ETH, + RTE_NET_CRC_REQS +}; + +/** CRC compute algorithm */ +enum rte_net_crc_alg { + RTE_NET_CRC_SCALAR = 0, + RTE_NET_CRC_SSE42, +}; /** - * Write memory barrier. + * This API set the CRC computation algorithm (i.e. scalar version, + * x86 64-bit sse4.2 intrinsic version, etc.) and internal data + * structure. * - * Guarantees that the STORE operations generated before the barrier - * occur before the STORE operations generated after. - * This function is architecture dependent. + * @param alg + * This parameter is used to select the CRC implementation version. + * - RTE_NET_CRC_SCALAR + * - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic) */ -static inline void rte_wmb(void) -{ - __sync_synchronize(); -} +void +rte_net_crc_set_alg(enum rte_net_crc_alg alg); /** - * Read memory barrier. + * CRC compute API + * + * @param data + * Pointer to the packet data for CRC computation + * @param data_len + * Data length for CRC computation + * @param type + * CRC type (enum rte_net_crc_type) * - * Guarantees that the LOAD operations generated before the barrier - * occur before the LOAD operations generated after. - * This function is architecture dependent. + * @return + * CRC value */ -static inline void rte_rmb(void) -{ - __sync_synchronize(); -} - -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_compiler_barrier() - -#define rte_smp_rmb() rte_compiler_barrier() +uint32_t +rte_net_crc_calc(const void *data, + uint32_t data_len, + enum rte_net_crc_type type); #ifdef __cplusplus } #endif -#endif /* _RTE_ATOMIC_TILE_H_ */ + +#endif /* _RTE_NET_CRC_H_ */ diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map index 3b15e651..687c40ea 100644 --- a/lib/librte_net/rte_net_version.map +++ b/lib/librte_net/rte_net_version.map @@ -4,3 +4,11 @@ DPDK_16.11 { local: *; }; + +DPDK_17.05 { + global: + + rte_net_crc_calc; + rte_net_crc_set_alg; + +} DPDK_16.11; diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile index 166441a2..1c03bcbb 100644 --- a/lib/librte_pdump/Makefile +++ b/lib/librte_pdump/Makefile @@ -48,10 +48,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_PDUMP)-include := rte_pdump.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_ether - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index 59686837..b599d65d 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -197,7 +197,7 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) dup_bufs[d_pkts++] = p; } - ring_enq = rte_ring_enqueue_burst(ring, (void *)dup_bufs, d_pkts); + ring_enq = rte_ring_enqueue_burst(ring, (void *)dup_bufs, d_pkts, NULL); if (unlikely(ring_enq < d_pkts)) { RTE_LOG(DEBUG, PDUMP, "only %d of packets enqueued to ring\n", ring_enq); @@ -337,7 +337,7 @@ pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, static int set_pdump_rxtx_cbs(struct pdump_request *p) { - uint16_t nb_rx_q, nb_tx_q = 0, end_q, queue; + uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue; uint8_t port; int ret = 0; uint32_t flags; @@ -740,7 +740,7 @@ pdump_validate_ring_mp(struct rte_ring *ring, struct rte_mempool *mp) rte_errno = EINVAL; return -1; } - if (ring->prod.sp_enqueue || ring->cons.sc_dequeue) { + if (ring->prod.single || ring->cons.single) { RTE_LOG(ERR, PDUMP, "ring with either SP or SC settings" " is not valid for pdump, should have MP and MC settings\n"); rte_errno = EINVAL; diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h index 924b8043..ba6e39b0 100644 --- a/lib/librte_pdump/rte_pdump.h +++ b/lib/librte_pdump/rte_pdump.h @@ -201,7 +201,7 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue, * * @param path * directory path for server or client socket. - * @type + * @param type * specifies RTE_PDUMP_SOCKET_SERVER if socket path is for server. * (or) * specifies RTE_PDUMP_SOCKET_CLIENT if socket path is for client. diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile index 05d64ff8..7a835fd5 100644 --- a/lib/librte_pipeline/Makefile +++ b/lib/librte_pipeline/Makefile @@ -51,11 +51,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := rte_pipeline.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_table -DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_port - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile index 44fa7352..76629a13 100644 --- a/lib/librte_port/Makefile +++ b/lib/librte_port/Makefile @@ -77,15 +77,4 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h endif SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) := lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ether -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ip_frag -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_sched -ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) -DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_kni -endif - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_port/rte_port_ethdev.c b/lib/librte_port/rte_port_ethdev.c index 73e5f185..d5c5fba5 100644 --- a/lib/librte_port/rte_port_ethdev.c +++ b/lib/librte_port/rte_port_ethdev.c @@ -67,7 +67,7 @@ static void * rte_port_ethdev_reader_create(void *params, int socket_id) { struct rte_port_ethdev_reader_params *conf = - (struct rte_port_ethdev_reader_params *) params; + params; struct rte_port_ethdev_reader *port; /* Check input parameters */ @@ -95,7 +95,7 @@ static int rte_port_ethdev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { struct rte_port_ethdev_reader *p = - (struct rte_port_ethdev_reader *) port; + port; uint16_t rx_pkt_cnt; rx_pkt_cnt = rte_eth_rx_burst(p->port_id, p->queue_id, pkts, n_pkts); @@ -120,7 +120,7 @@ static int rte_port_ethdev_reader_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_ethdev_reader *p = - (struct rte_port_ethdev_reader *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -163,7 +163,7 @@ static void * rte_port_ethdev_writer_create(void *params, int socket_id) { struct rte_port_ethdev_writer_params *conf = - (struct rte_port_ethdev_writer_params *) params; + params; struct rte_port_ethdev_writer *port; /* Check input parameters */ @@ -212,7 +212,7 @@ static int rte_port_ethdev_writer_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_ethdev_writer *p = - (struct rte_port_ethdev_writer *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1); @@ -228,7 +228,7 @@ rte_port_ethdev_writer_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_ethdev_writer *p = - (struct rte_port_ethdev_writer *) port; + port; uint64_t bsz_mask = p->bsz_mask; uint32_t tx_buf_count = p->tx_buf_count; uint64_t expr = (pkts_mask & (pkts_mask + 1)) | @@ -274,7 +274,7 @@ static int rte_port_ethdev_writer_flush(void *port) { struct rte_port_ethdev_writer *p = - (struct rte_port_ethdev_writer *) port; + port; if (p->tx_buf_count > 0) send_burst(p); @@ -300,7 +300,7 @@ static int rte_port_ethdev_writer_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_ethdev_writer *p = - (struct rte_port_ethdev_writer *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -344,7 +344,7 @@ static void * rte_port_ethdev_writer_nodrop_create(void *params, int socket_id) { struct rte_port_ethdev_writer_nodrop_params *conf = - (struct rte_port_ethdev_writer_nodrop_params *) params; + params; struct rte_port_ethdev_writer_nodrop *port; /* Check input parameters */ @@ -418,7 +418,7 @@ static int rte_port_ethdev_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_ethdev_writer_nodrop *p = - (struct rte_port_ethdev_writer_nodrop *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); @@ -434,7 +434,7 @@ rte_port_ethdev_writer_nodrop_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_ethdev_writer_nodrop *p = - (struct rte_port_ethdev_writer_nodrop *) port; + port; uint64_t bsz_mask = p->bsz_mask; uint32_t tx_buf_count = p->tx_buf_count; @@ -456,8 +456,8 @@ rte_port_ethdev_writer_nodrop_tx_bulk(void *port, return 0; /* - * If we didnt manage to send all packets in single burst, move - * remaining packets to the buffer and call send burst. + * If we did not manage to send all packets in single burst, + * move remaining packets to the buffer and call send burst. */ for (; n_pkts_ok < n_pkts; n_pkts_ok++) { struct rte_mbuf *pkt = pkts[n_pkts_ok]; @@ -487,7 +487,7 @@ static int rte_port_ethdev_writer_nodrop_flush(void *port) { struct rte_port_ethdev_writer_nodrop *p = - (struct rte_port_ethdev_writer_nodrop *) port; + port; if (p->tx_buf_count > 0) send_burst_nodrop(p); @@ -513,7 +513,7 @@ static int rte_port_ethdev_writer_nodrop_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_ethdev_writer_nodrop *p = - (struct rte_port_ethdev_writer_nodrop *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_fd.c b/lib/librte_port/rte_port_fd.c index 0d640f34..b5b37291 100644 --- a/lib/librte_port/rte_port_fd.c +++ b/lib/librte_port/rte_port_fd.c @@ -67,7 +67,7 @@ static void * rte_port_fd_reader_create(void *params, int socket_id) { struct rte_port_fd_reader_params *conf = - (struct rte_port_fd_reader_params *) params; + params; struct rte_port_fd_reader *port; /* Check input parameters */ @@ -107,18 +107,13 @@ rte_port_fd_reader_create(void *params, int socket_id) static int rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { - struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port; - uint32_t i; + struct rte_port_fd_reader *p = port; + uint32_t i, j; - if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) + if (rte_pktmbuf_alloc_bulk(p->mempool, pkts, n_pkts) != 0) return 0; for (i = 0; i < n_pkts; i++) { - rte_mbuf_refcnt_set(pkts[i], 1); - rte_pktmbuf_reset(pkts[i]); - } - - for (i = 0; i < n_pkts; i++) { struct rte_mbuf *pkt = pkts[i]; void *pkt_data = rte_pktmbuf_mtod(pkt, void *); ssize_t n_bytes; @@ -131,12 +126,12 @@ rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) pkt->pkt_len = n_bytes; } - for ( ; i < n_pkts; i++) - rte_pktmbuf_free(pkts[i]); + for (j = i; j < n_pkts; j++) + rte_pktmbuf_free(pkts[j]); RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i); - return n_pkts; + return i; } static int @@ -156,7 +151,7 @@ static int rte_port_fd_reader_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_fd_reader *p = - (struct rte_port_fd_reader *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -197,7 +192,7 @@ static void * rte_port_fd_writer_create(void *params, int socket_id) { struct rte_port_fd_writer_params *conf = - (struct rte_port_fd_writer_params *) params; + params; struct rte_port_fd_writer *port; /* Check input parameters */ @@ -253,7 +248,7 @@ static int rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_fd_writer *p = - (struct rte_port_fd_writer *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); @@ -269,7 +264,7 @@ rte_port_fd_writer_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_fd_writer *p = - (struct rte_port_fd_writer *) port; + port; uint32_t tx_buf_count = p->tx_buf_count; if ((pkts_mask & (pkts_mask + 1)) == 0) { @@ -301,7 +296,7 @@ static int rte_port_fd_writer_flush(void *port) { struct rte_port_fd_writer *p = - (struct rte_port_fd_writer *) port; + port; if (p->tx_buf_count > 0) send_burst(p); @@ -327,7 +322,7 @@ static int rte_port_fd_writer_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_fd_writer *p = - (struct rte_port_fd_writer *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -369,7 +364,7 @@ static void * rte_port_fd_writer_nodrop_create(void *params, int socket_id) { struct rte_port_fd_writer_nodrop_params *conf = - (struct rte_port_fd_writer_nodrop_params *) params; + params; struct rte_port_fd_writer_nodrop *port; /* Check input parameters */ @@ -438,7 +433,7 @@ static int rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_fd_writer_nodrop *p = - (struct rte_port_fd_writer_nodrop *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); @@ -454,7 +449,7 @@ rte_port_fd_writer_nodrop_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_fd_writer_nodrop *p = - (struct rte_port_fd_writer_nodrop *) port; + port; uint32_t tx_buf_count = p->tx_buf_count; if ((pkts_mask & (pkts_mask + 1)) == 0) { @@ -486,7 +481,7 @@ static int rte_port_fd_writer_nodrop_flush(void *port) { struct rte_port_fd_writer_nodrop *p = - (struct rte_port_fd_writer_nodrop *) port; + port; if (p->tx_buf_count > 0) send_burst_nodrop(p); @@ -512,7 +507,7 @@ static int rte_port_fd_writer_nodrop_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_fd_writer_nodrop *p = - (struct rte_port_fd_writer_nodrop *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_frag.c b/lib/librte_port/rte_port_frag.c index 0fcace99..a00c9ae1 100644 --- a/lib/librte_port/rte_port_frag.c +++ b/lib/librte_port/rte_port_frag.c @@ -88,7 +88,7 @@ static void * rte_port_ring_reader_frag_create(void *params, int socket_id, int is_ipv4) { struct rte_port_ring_reader_frag_params *conf = - (struct rte_port_ring_reader_frag_params *) params; + params; struct rte_port_ring_reader_frag *port; /* Check input parameters */ @@ -159,7 +159,7 @@ rte_port_ring_reader_frag_rx(void *port, uint32_t n_pkts) { struct rte_port_ring_reader_frag *p = - (struct rte_port_ring_reader_frag *) port; + port; uint32_t n_pkts_out; n_pkts_out = 0; @@ -186,7 +186,8 @@ rte_port_ring_reader_frag_rx(void *port, /* If "pkts" buffer is empty, read packet burst from ring */ if (p->n_pkts == 0) { p->n_pkts = rte_ring_sc_dequeue_burst(p->ring, - (void **) p->pkts, RTE_PORT_IN_BURST_SIZE_MAX); + (void **) p->pkts, RTE_PORT_IN_BURST_SIZE_MAX, + NULL); RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(p, p->n_pkts); if (p->n_pkts == 0) return n_pkts_out; @@ -276,7 +277,7 @@ rte_port_frag_reader_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_ring_reader_frag *p = - (struct rte_port_ring_reader_frag *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_kni.c b/lib/librte_port/rte_port_kni.c index 08f4ac2a..2515fb2a 100644 --- a/lib/librte_port/rte_port_kni.c +++ b/lib/librte_port/rte_port_kni.c @@ -66,7 +66,7 @@ static void * rte_port_kni_reader_create(void *params, int socket_id) { struct rte_port_kni_reader_params *conf = - (struct rte_port_kni_reader_params *) params; + params; struct rte_port_kni_reader *port; /* Check input parameters */ @@ -93,7 +93,7 @@ static int rte_port_kni_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { struct rte_port_kni_reader *p = - (struct rte_port_kni_reader *) port; + port; uint16_t rx_pkt_cnt; rx_pkt_cnt = rte_kni_rx_burst(p->kni, pkts, n_pkts); @@ -118,7 +118,7 @@ static int rte_port_kni_reader_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_kni_reader *p = - (struct rte_port_kni_reader *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -160,7 +160,7 @@ static void * rte_port_kni_writer_create(void *params, int socket_id) { struct rte_port_kni_writer_params *conf = - (struct rte_port_kni_writer_params *) params; + params; struct rte_port_kni_writer *port; /* Check input parameters */ @@ -207,7 +207,7 @@ static int rte_port_kni_writer_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_kni_writer *p = - (struct rte_port_kni_writer *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1); @@ -223,7 +223,7 @@ rte_port_kni_writer_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_kni_writer *p = - (struct rte_port_kni_writer *) port; + port; uint64_t bsz_mask = p->bsz_mask; uint32_t tx_buf_count = p->tx_buf_count; uint64_t expr = (pkts_mask & (pkts_mask + 1)) | @@ -268,7 +268,7 @@ static int rte_port_kni_writer_flush(void *port) { struct rte_port_kni_writer *p = - (struct rte_port_kni_writer *) port; + port; if (p->tx_buf_count > 0) send_burst(p); @@ -294,7 +294,7 @@ static int rte_port_kni_writer_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_kni_writer *p = - (struct rte_port_kni_writer *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -337,7 +337,7 @@ static void * rte_port_kni_writer_nodrop_create(void *params, int socket_id) { struct rte_port_kni_writer_nodrop_params *conf = - (struct rte_port_kni_writer_nodrop_params *) params; + params; struct rte_port_kni_writer_nodrop *port; /* Check input parameters */ @@ -410,7 +410,7 @@ static int rte_port_kni_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_kni_writer_nodrop *p = - (struct rte_port_kni_writer_nodrop *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1); @@ -426,7 +426,7 @@ rte_port_kni_writer_nodrop_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_kni_writer_nodrop *p = - (struct rte_port_kni_writer_nodrop *) port; + port; uint64_t bsz_mask = p->bsz_mask; uint32_t tx_buf_count = p->tx_buf_count; @@ -478,7 +478,7 @@ static int rte_port_kni_writer_nodrop_flush(void *port) { struct rte_port_kni_writer_nodrop *p = - (struct rte_port_kni_writer_nodrop *) port; + port; if (p->tx_buf_count > 0) send_burst_nodrop(p); @@ -504,7 +504,7 @@ static int rte_port_kni_writer_nodrop_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_kni_writer_nodrop *p = - (struct rte_port_kni_writer_nodrop *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c index c4bb5081..415fadd5 100644 --- a/lib/librte_port/rte_port_ras.c +++ b/lib/librte_port/rte_port_ras.c @@ -93,7 +93,7 @@ static void * rte_port_ring_writer_ras_create(void *params, int socket_id, int is_ipv4) { struct rte_port_ring_writer_ras_params *conf = - (struct rte_port_ring_writer_ras_params *) params; + params; struct rte_port_ring_writer_ras *port; uint64_t frag_cycles; @@ -167,7 +167,7 @@ send_burst(struct rte_port_ring_writer_ras *p) uint32_t nb_tx; nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, - p->tx_buf_count); + p->tx_buf_count, NULL); RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); for ( ; nb_tx < p->tx_buf_count; nb_tx++) @@ -243,7 +243,7 @@ static int rte_port_ring_writer_ras_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_ring_writer_ras *p = - (struct rte_port_ring_writer_ras *) port; + port; RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); p->f_ras(p, pkt); @@ -259,7 +259,7 @@ rte_port_ring_writer_ras_tx_bulk(void *port, uint64_t pkts_mask) { struct rte_port_ring_writer_ras *p = - (struct rte_port_ring_writer_ras *) port; + port; if ((pkts_mask & (pkts_mask + 1)) == 0) { uint64_t n_pkts = __builtin_popcountll(pkts_mask); @@ -295,7 +295,7 @@ static int rte_port_ring_writer_ras_flush(void *port) { struct rte_port_ring_writer_ras *p = - (struct rte_port_ring_writer_ras *) port; + port; if (p->tx_buf_count > 0) send_burst(p); @@ -307,7 +307,7 @@ static int rte_port_ring_writer_ras_free(void *port) { struct rte_port_ring_writer_ras *p = - (struct rte_port_ring_writer_ras *) port; + port; if (port == NULL) { RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__); @@ -326,7 +326,7 @@ rte_port_ras_writer_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_ring_writer_ras *p = - (struct rte_port_ring_writer_ras *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c index 3b9d3d08..64bd965f 100644 --- a/lib/librte_port/rte_port_ring.c +++ b/lib/librte_port/rte_port_ring.c @@ -67,14 +67,14 @@ rte_port_ring_reader_create_internal(void *params, int socket_id, uint32_t is_multi) { struct rte_port_ring_reader_params *conf = - (struct rte_port_ring_reader_params *) params; + params; struct rte_port_ring_reader *port; /* Check input parameters */ if ((conf == NULL) || (conf->ring == NULL) || - (conf->ring->cons.sc_dequeue && is_multi) || - (!(conf->ring->cons.sc_dequeue) && !is_multi)) { + (conf->ring->cons.single && is_multi) || + (!(conf->ring->cons.single) && !is_multi)) { RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); return NULL; } @@ -108,10 +108,11 @@ rte_port_ring_multi_reader_create(void *params, int socket_id) static int rte_port_ring_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { - struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port; + struct rte_port_ring_reader *p = port; uint32_t nb_rx; - nb_rx = rte_ring_sc_dequeue_burst(p->ring, (void **) pkts, n_pkts); + nb_rx = rte_ring_sc_dequeue_burst(p->ring, (void **) pkts, + n_pkts, NULL); RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx); return nb_rx; @@ -121,10 +122,11 @@ static int rte_port_ring_multi_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { - struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port; + struct rte_port_ring_reader *p = port; uint32_t nb_rx; - nb_rx = rte_ring_mc_dequeue_burst(p->ring, (void **) pkts, n_pkts); + nb_rx = rte_ring_mc_dequeue_burst(p->ring, (void **) pkts, + n_pkts, NULL); RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx); return nb_rx; @@ -148,7 +150,7 @@ rte_port_ring_reader_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_ring_reader *p = - (struct rte_port_ring_reader *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -192,14 +194,14 @@ rte_port_ring_writer_create_internal(void *params, int socket_id, uint32_t is_multi) { struct rte_port_ring_writer_params *conf = - (struct rte_port_ring_writer_params *) params; + params; struct rte_port_ring_writer *port; /* Check input parameters */ if ((conf == NULL) || (conf->ring == NULL) || - (conf->ring->prod.sp_enqueue && is_multi) || - (!(conf->ring->prod.sp_enqueue) && !is_multi) || + (conf->ring->prod.single && is_multi) || + (!(conf->ring->prod.single) && !is_multi) || (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); return NULL; @@ -241,7 +243,7 @@ send_burst(struct rte_port_ring_writer *p) uint32_t nb_tx; nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, - p->tx_buf_count); + p->tx_buf_count, NULL); RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); for ( ; nb_tx < p->tx_buf_count; nb_tx++) @@ -256,7 +258,7 @@ send_burst_mp(struct rte_port_ring_writer *p) uint32_t nb_tx; nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf, - p->tx_buf_count); + p->tx_buf_count, NULL); RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); for ( ; nb_tx < p->tx_buf_count; nb_tx++) @@ -268,7 +270,7 @@ send_burst_mp(struct rte_port_ring_writer *p) static int rte_port_ring_writer_tx(void *port, struct rte_mbuf *pkt) { - struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + struct rte_port_ring_writer *p = port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); @@ -281,7 +283,7 @@ rte_port_ring_writer_tx(void *port, struct rte_mbuf *pkt) static int rte_port_ring_multi_writer_tx(void *port, struct rte_mbuf *pkt) { - struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + struct rte_port_ring_writer *p = port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); @@ -298,7 +300,7 @@ rte_port_ring_writer_tx_bulk_internal(void *port, uint32_t is_multi) { struct rte_port_ring_writer *p = - (struct rte_port_ring_writer *) port; + port; uint64_t bsz_mask = p->bsz_mask; uint32_t tx_buf_count = p->tx_buf_count; @@ -318,11 +320,11 @@ rte_port_ring_writer_tx_bulk_internal(void *port, RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); if (is_multi) - n_pkts_ok = rte_ring_mp_enqueue_burst(p->ring, (void **)pkts, - n_pkts); + n_pkts_ok = rte_ring_mp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); else - n_pkts_ok = rte_ring_sp_enqueue_burst(p->ring, (void **)pkts, - n_pkts); + n_pkts_ok = rte_ring_sp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok); for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) { @@ -372,7 +374,7 @@ rte_port_ring_multi_writer_tx_bulk(void *port, static int rte_port_ring_writer_flush(void *port) { - struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + struct rte_port_ring_writer *p = port; if (p->tx_buf_count > 0) send_burst(p); @@ -383,7 +385,7 @@ rte_port_ring_writer_flush(void *port) static int rte_port_ring_multi_writer_flush(void *port) { - struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + struct rte_port_ring_writer *p = port; if (p->tx_buf_count > 0) send_burst_mp(p); @@ -394,7 +396,7 @@ rte_port_ring_multi_writer_flush(void *port) static int rte_port_ring_writer_free(void *port) { - struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + struct rte_port_ring_writer *p = port; if (port == NULL) { RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); @@ -416,7 +418,7 @@ rte_port_ring_writer_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_ring_writer *p = - (struct rte_port_ring_writer *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -461,14 +463,14 @@ rte_port_ring_writer_nodrop_create_internal(void *params, int socket_id, uint32_t is_multi) { struct rte_port_ring_writer_nodrop_params *conf = - (struct rte_port_ring_writer_nodrop_params *) params; + params; struct rte_port_ring_writer_nodrop *port; /* Check input parameters */ if ((conf == NULL) || (conf->ring == NULL) || - (conf->ring->prod.sp_enqueue && is_multi) || - (!(conf->ring->prod.sp_enqueue) && !is_multi) || + (conf->ring->prod.single && is_multi) || + (!(conf->ring->prod.single) && !is_multi) || (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); return NULL; @@ -517,7 +519,7 @@ send_burst_nodrop(struct rte_port_ring_writer_nodrop *p) uint32_t nb_tx = 0, i; nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, - p->tx_buf_count); + p->tx_buf_count, NULL); /* We sent all the packets in a first try */ if (nb_tx >= p->tx_buf_count) { @@ -527,7 +529,8 @@ send_burst_nodrop(struct rte_port_ring_writer_nodrop *p) for (i = 0; i < p->n_retries; i++) { nb_tx += rte_ring_sp_enqueue_burst(p->ring, - (void **) (p->tx_buf + nb_tx), p->tx_buf_count - nb_tx); + (void **) (p->tx_buf + nb_tx), + p->tx_buf_count - nb_tx, NULL); /* We sent all the packets in more than one try */ if (nb_tx >= p->tx_buf_count) { @@ -550,7 +553,7 @@ send_burst_mp_nodrop(struct rte_port_ring_writer_nodrop *p) uint32_t nb_tx = 0, i; nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf, - p->tx_buf_count); + p->tx_buf_count, NULL); /* We sent all the packets in a first try */ if (nb_tx >= p->tx_buf_count) { @@ -560,7 +563,8 @@ send_burst_mp_nodrop(struct rte_port_ring_writer_nodrop *p) for (i = 0; i < p->n_retries; i++) { nb_tx += rte_ring_mp_enqueue_burst(p->ring, - (void **) (p->tx_buf + nb_tx), p->tx_buf_count - nb_tx); + (void **) (p->tx_buf + nb_tx), + p->tx_buf_count - nb_tx, NULL); /* We sent all the packets in more than one try */ if (nb_tx >= p->tx_buf_count) { @@ -581,7 +585,7 @@ static int rte_port_ring_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); @@ -595,7 +599,7 @@ static int rte_port_ring_multi_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; p->tx_buf[p->tx_buf_count++] = pkt; RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); @@ -612,7 +616,7 @@ rte_port_ring_writer_nodrop_tx_bulk_internal(void *port, uint32_t is_multi) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; uint64_t bsz_mask = p->bsz_mask; uint32_t tx_buf_count = p->tx_buf_count; @@ -633,10 +637,12 @@ rte_port_ring_writer_nodrop_tx_bulk_internal(void *port, RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); if (is_multi) n_pkts_ok = - rte_ring_mp_enqueue_burst(p->ring, (void **)pkts, n_pkts); + rte_ring_mp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); else n_pkts_ok = - rte_ring_sp_enqueue_burst(p->ring, (void **)pkts, n_pkts); + rte_ring_sp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); if (n_pkts_ok >= n_pkts) return 0; @@ -699,7 +705,7 @@ static int rte_port_ring_writer_nodrop_flush(void *port) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; if (p->tx_buf_count > 0) send_burst_nodrop(p); @@ -711,7 +717,7 @@ static int rte_port_ring_multi_writer_nodrop_flush(void *port) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; if (p->tx_buf_count > 0) send_burst_mp_nodrop(p); @@ -723,7 +729,7 @@ static int rte_port_ring_writer_nodrop_free(void *port) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; if (port == NULL) { RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); @@ -745,7 +751,7 @@ rte_port_ring_writer_nodrop_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_ring_writer_nodrop *p = - (struct rte_port_ring_writer_nodrop *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_sched.c b/lib/librte_port/rte_port_sched.c index 25217d62..9100a197 100644 --- a/lib/librte_port/rte_port_sched.c +++ b/lib/librte_port/rte_port_sched.c @@ -64,7 +64,7 @@ static void * rte_port_sched_reader_create(void *params, int socket_id) { struct rte_port_sched_reader_params *conf = - (struct rte_port_sched_reader_params *) params; + params; struct rte_port_sched_reader *port; /* Check input parameters */ @@ -91,7 +91,7 @@ rte_port_sched_reader_create(void *params, int socket_id) static int rte_port_sched_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { - struct rte_port_sched_reader *p = (struct rte_port_sched_reader *) port; + struct rte_port_sched_reader *p = port; uint32_t nb_rx; nb_rx = rte_sched_port_dequeue(p->sched, pkts, n_pkts); @@ -118,7 +118,7 @@ rte_port_sched_reader_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_sched_reader *p = - (struct rte_port_sched_reader *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -160,7 +160,7 @@ static void * rte_port_sched_writer_create(void *params, int socket_id) { struct rte_port_sched_writer_params *conf = - (struct rte_port_sched_writer_params *) params; + params; struct rte_port_sched_writer *port; /* Check input parameters */ @@ -292,7 +292,7 @@ rte_port_sched_writer_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_sched_writer *p = - (struct rte_port_sched_writer *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_port/rte_port_source_sink.c b/lib/librte_port/rte_port_source_sink.c index 4cad7109..a79f2f64 100644 --- a/lib/librte_port/rte_port_source_sink.c +++ b/lib/librte_port/rte_port_source_sink.c @@ -228,7 +228,7 @@ static void * rte_port_source_create(void *params, int socket_id) { struct rte_port_source_params *p = - (struct rte_port_source_params *) params; + params; struct rte_port_source *port; /* Check input arguments*/ @@ -265,7 +265,7 @@ static int rte_port_source_free(void *port) { struct rte_port_source *p = - (struct rte_port_source *)port; + port; /* Check input parameters */ if (p == NULL) @@ -286,17 +286,12 @@ rte_port_source_free(void *port) static int rte_port_source_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { - struct rte_port_source *p = (struct rte_port_source *) port; + struct rte_port_source *p = port; uint32_t i; - if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) + if (rte_pktmbuf_alloc_bulk(p->mempool, pkts, n_pkts) != 0) return 0; - for (i = 0; i < n_pkts; i++) { - rte_mbuf_refcnt_set(pkts[i], 1); - rte_pktmbuf_reset(pkts[i]); - } - if (p->pkt_buff != NULL) { for (i = 0; i < n_pkts; i++) { uint8_t *pkt_data = rte_pktmbuf_mtod(pkts[i], @@ -323,7 +318,7 @@ rte_port_source_stats_read(void *port, struct rte_port_in_stats *stats, int clear) { struct rte_port_source *p = - (struct rte_port_source *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); @@ -400,7 +395,7 @@ pcap_sink_open(struct rte_port_sink *port, static void pcap_sink_write_pkt(struct rte_port_sink *port, struct rte_mbuf *mbuf) { - uint8_t *pcap_dumper = (uint8_t *)(port->dumper); + uint8_t *pcap_dumper = (port->dumper); struct pcap_pkthdr pcap_hdr; uint8_t jumbo_pkt_buf[ETHER_MAX_JUMBO_FRAME_LEN]; uint8_t *pkt; @@ -524,7 +519,7 @@ rte_port_sink_create(void *params, int socket_id) static int rte_port_sink_tx(void *port, struct rte_mbuf *pkt) { - struct rte_port_sink *p = (struct rte_port_sink *) port; + struct rte_port_sink *p = port; RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1); if (p->dumper != NULL) @@ -539,7 +534,7 @@ static int rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts, uint64_t pkts_mask) { - struct rte_port_sink *p = (struct rte_port_sink *) port; + struct rte_port_sink *p = port; if ((pkts_mask & (pkts_mask + 1)) == 0) { uint64_t n_pkts = __builtin_popcountll(pkts_mask); @@ -591,7 +586,7 @@ static int rte_port_sink_flush(void *port) { struct rte_port_sink *p = - (struct rte_port_sink *)port; + port; if (p == NULL) return 0; @@ -605,7 +600,7 @@ static int rte_port_sink_free(void *port) { struct rte_port_sink *p = - (struct rte_port_sink *)port; + port; if (p == NULL) return 0; @@ -622,7 +617,7 @@ rte_port_sink_stats_read(void *port, struct rte_port_out_stats *stats, int clear) { struct rte_port_sink *p = - (struct rte_port_sink *) port; + port; if (stats != NULL) memcpy(stats, &p->stats, sizeof(p->stats)); diff --git a/lib/librte_power/Makefile b/lib/librte_power/Makefile index cee95cd8..06cd10e8 100644 --- a/lib/librte_power/Makefile +++ b/lib/librte_power/Makefile @@ -47,7 +47,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_POWER) += rte_power_kvm_vm.c guest_channel.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_POWER) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_reorder/Makefile b/lib/librte_reorder/Makefile index 0d111aad..4e44e72f 100644 --- a/lib/librte_reorder/Makefile +++ b/lib/librte_reorder/Makefile @@ -47,9 +47,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_REORDER) := rte_reorder.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_REORDER)-include := rte_reorder.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile index 4b1112e4..3e2f4b87 100644 --- a/lib/librte_ring/Makefile +++ b/lib/librte_ring/Makefile @@ -46,6 +46,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_ring.h -DEPDIRS-$(CONFIG_RTE_LIBRTE_RING) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c index ca0a1082..5f98c33f 100644 --- a/lib/librte_ring/rte_ring.c +++ b/lib/librte_ring/rte_ring.c @@ -127,18 +127,10 @@ rte_ring_init(struct rte_ring *r, const char *name, unsigned count, /* compilation-time checks */ RTE_BUILD_BUG_ON((sizeof(struct rte_ring) & RTE_CACHE_LINE_MASK) != 0); -#ifdef RTE_RING_SPLIT_PROD_CONS RTE_BUILD_BUG_ON((offsetof(struct rte_ring, cons) & RTE_CACHE_LINE_MASK) != 0); -#endif RTE_BUILD_BUG_ON((offsetof(struct rte_ring, prod) & RTE_CACHE_LINE_MASK) != 0); -#ifdef RTE_LIBRTE_RING_DEBUG - RTE_BUILD_BUG_ON((sizeof(struct rte_ring_debug_stats) & - RTE_CACHE_LINE_MASK) != 0); - RTE_BUILD_BUG_ON((offsetof(struct rte_ring, stats) & - RTE_CACHE_LINE_MASK) != 0); -#endif /* init the ring structure */ memset(r, 0, sizeof(*r)); @@ -146,11 +138,10 @@ rte_ring_init(struct rte_ring *r, const char *name, unsigned count, if (ret < 0 || ret >= (int)sizeof(r->name)) return -ENAMETOOLONG; r->flags = flags; - r->prod.watermark = count; - r->prod.sp_enqueue = !!(flags & RING_F_SP_ENQ); - r->cons.sc_dequeue = !!(flags & RING_F_SC_DEQ); - r->prod.size = r->cons.size = count; - r->prod.mask = r->cons.mask = count-1; + r->prod.single = (flags & RING_F_SP_ENQ) ? __IS_SP : __IS_MP; + r->cons.single = (flags & RING_F_SC_DEQ) ? __IS_SC : __IS_MC; + r->size = count; + r->mask = count - 1; r->prod.head = r->cons.head = 0; r->prod.tail = r->cons.tail = 0; @@ -264,76 +255,19 @@ rte_ring_free(struct rte_ring *r) rte_free(te); } -/* - * change the high water mark. If *count* is 0, water marking is - * disabled - */ -int -rte_ring_set_water_mark(struct rte_ring *r, unsigned count) -{ - if (count >= r->prod.size) - return -EINVAL; - - /* if count is 0, disable the watermarking */ - if (count == 0) - count = r->prod.size; - - r->prod.watermark = count; - return 0; -} - /* dump the status of the ring on the console */ void rte_ring_dump(FILE *f, const struct rte_ring *r) { -#ifdef RTE_LIBRTE_RING_DEBUG - struct rte_ring_debug_stats sum; - unsigned lcore_id; -#endif - fprintf(f, "ring <%s>@%p\n", r->name, r); fprintf(f, " flags=%x\n", r->flags); - fprintf(f, " size=%"PRIu32"\n", r->prod.size); + fprintf(f, " size=%"PRIu32"\n", r->size); fprintf(f, " ct=%"PRIu32"\n", r->cons.tail); fprintf(f, " ch=%"PRIu32"\n", r->cons.head); fprintf(f, " pt=%"PRIu32"\n", r->prod.tail); fprintf(f, " ph=%"PRIu32"\n", r->prod.head); fprintf(f, " used=%u\n", rte_ring_count(r)); fprintf(f, " avail=%u\n", rte_ring_free_count(r)); - if (r->prod.watermark == r->prod.size) - fprintf(f, " watermark=0\n"); - else - fprintf(f, " watermark=%"PRIu32"\n", r->prod.watermark); - - /* sum and dump statistics */ -#ifdef RTE_LIBRTE_RING_DEBUG - memset(&sum, 0, sizeof(sum)); - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { - sum.enq_success_bulk += r->stats[lcore_id].enq_success_bulk; - sum.enq_success_objs += r->stats[lcore_id].enq_success_objs; - sum.enq_quota_bulk += r->stats[lcore_id].enq_quota_bulk; - sum.enq_quota_objs += r->stats[lcore_id].enq_quota_objs; - sum.enq_fail_bulk += r->stats[lcore_id].enq_fail_bulk; - sum.enq_fail_objs += r->stats[lcore_id].enq_fail_objs; - sum.deq_success_bulk += r->stats[lcore_id].deq_success_bulk; - sum.deq_success_objs += r->stats[lcore_id].deq_success_objs; - sum.deq_fail_bulk += r->stats[lcore_id].deq_fail_bulk; - sum.deq_fail_objs += r->stats[lcore_id].deq_fail_objs; - } - fprintf(f, " size=%"PRIu32"\n", r->prod.size); - fprintf(f, " enq_success_bulk=%"PRIu64"\n", sum.enq_success_bulk); - fprintf(f, " enq_success_objs=%"PRIu64"\n", sum.enq_success_objs); - fprintf(f, " enq_quota_bulk=%"PRIu64"\n", sum.enq_quota_bulk); - fprintf(f, " enq_quota_objs=%"PRIu64"\n", sum.enq_quota_objs); - fprintf(f, " enq_fail_bulk=%"PRIu64"\n", sum.enq_fail_bulk); - fprintf(f, " enq_fail_objs=%"PRIu64"\n", sum.enq_fail_objs); - fprintf(f, " deq_success_bulk=%"PRIu64"\n", sum.deq_success_bulk); - fprintf(f, " deq_success_objs=%"PRIu64"\n", sum.deq_success_objs); - fprintf(f, " deq_fail_bulk=%"PRIu64"\n", sum.deq_fail_bulk); - fprintf(f, " deq_fail_objs=%"PRIu64"\n", sum.deq_fail_objs); -#else - fprintf(f, " no statistics available\n"); -#endif } /* dump the status of all rings on the console */ diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 32b8c8d2..97f025a1 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -106,38 +106,30 @@ extern "C" { enum rte_ring_queue_behavior { RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */ - RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */ + RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items as possible from ring */ }; -#ifdef RTE_LIBRTE_RING_DEBUG -/** - * A structure that stores the ring statistics (per-lcore). - */ -struct rte_ring_debug_stats { - uint64_t enq_success_bulk; /**< Successful enqueues number. */ - uint64_t enq_success_objs; /**< Objects successfully enqueued. */ - uint64_t enq_quota_bulk; /**< Successful enqueues above watermark. */ - uint64_t enq_quota_objs; /**< Objects enqueued above watermark. */ - uint64_t enq_fail_bulk; /**< Failed enqueues number. */ - uint64_t enq_fail_objs; /**< Objects that failed to be enqueued. */ - uint64_t deq_success_bulk; /**< Successful dequeues number. */ - uint64_t deq_success_objs; /**< Objects successfully dequeued. */ - uint64_t deq_fail_bulk; /**< Failed dequeues number. */ - uint64_t deq_fail_objs; /**< Objects that failed to be dequeued. */ -} __rte_cache_aligned; -#endif - #define RTE_RING_MZ_PREFIX "RG_" /**< The maximum length of a ring name. */ #define RTE_RING_NAMESIZE (RTE_MEMZONE_NAMESIZE - \ sizeof(RTE_RING_MZ_PREFIX) + 1) -#ifndef RTE_RING_PAUSE_REP_COUNT -#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield - * if RTE_RING_PAUSE_REP not defined. */ +struct rte_memzone; /* forward declaration, so as not to require memzone.h */ + +#if RTE_CACHE_LINE_SIZE < 128 +#define PROD_ALIGN (RTE_CACHE_LINE_SIZE * 2) +#define CONS_ALIGN (RTE_CACHE_LINE_SIZE * 2) +#else +#define PROD_ALIGN RTE_CACHE_LINE_SIZE +#define CONS_ALIGN RTE_CACHE_LINE_SIZE #endif -struct rte_memzone; /* forward declaration, so as not to require memzone.h */ +/* structure to hold a pair of head/tail values and other metadata */ +struct rte_ring_headtail { + volatile uint32_t head; /**< Prod/consumer head. */ + volatile uint32_t tail; /**< Prod/consumer tail. */ + uint32_t single; /**< True if single prod/cons */ +}; /** * An RTE ring structure. @@ -155,68 +147,29 @@ struct rte_ring { * compatibility requirements, it could be changed to RTE_RING_NAMESIZE * next time the ABI changes */ - char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the ring. */ - int flags; /**< Flags supplied at creation. */ + char name[RTE_MEMZONE_NAMESIZE] __rte_cache_aligned; /**< Name of the ring. */ + int flags; /**< Flags supplied at creation. */ const struct rte_memzone *memzone; /**< Memzone, if any, containing the rte_ring */ + uint32_t size; /**< Size of ring. */ + uint32_t mask; /**< Mask (size-1) of ring. */ /** Ring producer status. */ - struct prod { - uint32_t watermark; /**< Maximum items before EDQUOT. */ - uint32_t sp_enqueue; /**< True, if single producer. */ - uint32_t size; /**< Size of ring. */ - uint32_t mask; /**< Mask (size-1) of ring. */ - volatile uint32_t head; /**< Producer head. */ - volatile uint32_t tail; /**< Producer tail. */ - } prod __rte_cache_aligned; + struct rte_ring_headtail prod __rte_aligned(PROD_ALIGN); /** Ring consumer status. */ - struct cons { - uint32_t sc_dequeue; /**< True, if single consumer. */ - uint32_t size; /**< Size of the ring. */ - uint32_t mask; /**< Mask (size-1) of ring. */ - volatile uint32_t head; /**< Consumer head. */ - volatile uint32_t tail; /**< Consumer tail. */ -#ifdef RTE_RING_SPLIT_PROD_CONS - } cons __rte_cache_aligned; -#else - } cons; -#endif - -#ifdef RTE_LIBRTE_RING_DEBUG - struct rte_ring_debug_stats stats[RTE_MAX_LCORE]; -#endif - - void *ring[] __rte_cache_aligned; /**< Memory space of ring starts here. - * not volatile so need to be careful - * about compiler re-ordering */ + struct rte_ring_headtail cons __rte_aligned(CONS_ALIGN); }; #define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */ #define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */ -#define RTE_RING_QUOT_EXCEED (1 << 31) /**< Quota exceed for burst ops */ #define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */ -/** - * @internal When debug is enabled, store ring statistics. - * @param r - * A pointer to the ring. - * @param name - * The name of the statistics field to increment in the ring. - * @param n - * The number to add to the object-oriented statistics. - */ -#ifdef RTE_LIBRTE_RING_DEBUG -#define __RING_STAT_ADD(r, name, n) do { \ - unsigned __lcore_id = rte_lcore_id(); \ - if (__lcore_id < RTE_MAX_LCORE) { \ - r->stats[__lcore_id].name##_objs += n; \ - r->stats[__lcore_id].name##_bulk += 1; \ - } \ - } while(0) -#else -#define __RING_STAT_ADD(r, name, n) do {} while(0) -#endif +/* @internal defines for passing to the enqueue dequeue worker functions */ +#define __IS_SP 1 +#define __IS_MP 0 +#define __IS_SC 1 +#define __IS_MC 0 /** * Calculate the memory size needed for a ring @@ -321,26 +274,6 @@ struct rte_ring *rte_ring_create(const char *name, unsigned count, void rte_ring_free(struct rte_ring *r); /** - * Change the high water mark. - * - * If *count* is 0, water marking is disabled. Otherwise, it is set to the - * *count* value. The *count* value must be greater than 0 and less - * than the ring size. - * - * This function can be called at any time (not necessarily at - * initialization). - * - * @param r - * A pointer to the ring structure. - * @param count - * The new water mark value. - * @return - * - 0: Success; water mark changed. - * - -EINVAL: Invalid water mark value. - */ -int rte_ring_set_water_mark(struct rte_ring *r, unsigned count); - -/** * Dump the status of the ring to a file. * * @param f @@ -353,171 +286,147 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r); /* the actual enqueue of pointers on the ring. * Placed here since identical code needed in both * single and multi producer enqueue functions */ -#define ENQUEUE_PTRS() do { \ - const uint32_t size = r->prod.size; \ - uint32_t idx = prod_head & mask; \ +#define ENQUEUE_PTRS(r, ring_start, prod_head, obj_table, n, obj_type) do { \ + unsigned int i; \ + const uint32_t size = (r)->size; \ + uint32_t idx = prod_head & (r)->mask; \ + obj_type *ring = (obj_type *)ring_start; \ if (likely(idx + n < size)) { \ for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \ - r->ring[idx] = obj_table[i]; \ - r->ring[idx+1] = obj_table[i+1]; \ - r->ring[idx+2] = obj_table[i+2]; \ - r->ring[idx+3] = obj_table[i+3]; \ + ring[idx] = obj_table[i]; \ + ring[idx+1] = obj_table[i+1]; \ + ring[idx+2] = obj_table[i+2]; \ + ring[idx+3] = obj_table[i+3]; \ } \ switch (n & 0x3) { \ - case 3: r->ring[idx++] = obj_table[i++]; \ - case 2: r->ring[idx++] = obj_table[i++]; \ - case 1: r->ring[idx++] = obj_table[i++]; \ + case 3: \ + ring[idx++] = obj_table[i++]; /* fallthrough */ \ + case 2: \ + ring[idx++] = obj_table[i++]; /* fallthrough */ \ + case 1: \ + ring[idx++] = obj_table[i++]; \ } \ } else { \ for (i = 0; idx < size; i++, idx++)\ - r->ring[idx] = obj_table[i]; \ + ring[idx] = obj_table[i]; \ for (idx = 0; i < n; i++, idx++) \ - r->ring[idx] = obj_table[i]; \ + ring[idx] = obj_table[i]; \ } \ -} while(0) +} while (0) /* the actual copy of pointers on the ring to obj_table. * Placed here since identical code needed in both * single and multi consumer dequeue functions */ -#define DEQUEUE_PTRS() do { \ - uint32_t idx = cons_head & mask; \ - const uint32_t size = r->cons.size; \ +#define DEQUEUE_PTRS(r, ring_start, cons_head, obj_table, n, obj_type) do { \ + unsigned int i; \ + uint32_t idx = cons_head & (r)->mask; \ + const uint32_t size = (r)->size; \ + obj_type *ring = (obj_type *)ring_start; \ if (likely(idx + n < size)) { \ for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\ - obj_table[i] = r->ring[idx]; \ - obj_table[i+1] = r->ring[idx+1]; \ - obj_table[i+2] = r->ring[idx+2]; \ - obj_table[i+3] = r->ring[idx+3]; \ + obj_table[i] = ring[idx]; \ + obj_table[i+1] = ring[idx+1]; \ + obj_table[i+2] = ring[idx+2]; \ + obj_table[i+3] = ring[idx+3]; \ } \ switch (n & 0x3) { \ - case 3: obj_table[i++] = r->ring[idx++]; \ - case 2: obj_table[i++] = r->ring[idx++]; \ - case 1: obj_table[i++] = r->ring[idx++]; \ + case 3: \ + obj_table[i++] = ring[idx++]; /* fallthrough */ \ + case 2: \ + obj_table[i++] = ring[idx++]; /* fallthrough */ \ + case 1: \ + obj_table[i++] = ring[idx++]; \ } \ } else { \ for (i = 0; idx < size; i++, idx++) \ - obj_table[i] = r->ring[idx]; \ + obj_table[i] = ring[idx]; \ for (idx = 0; i < n; i++, idx++) \ - obj_table[i] = r->ring[idx]; \ + obj_table[i] = ring[idx]; \ } \ } while (0) +static inline __attribute__((always_inline)) void +update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, + uint32_t single) +{ + /* + * If there are other enqueues/dequeues in progress that preceded us, + * we need to wait for them to complete + */ + if (!single) + while (unlikely(ht->tail != old_val)) + rte_pause(); + + ht->tail = new_val; +} + /** - * @internal Enqueue several objects on the ring (multi-producers safe). - * - * This function uses a "compare and set" instruction to move the - * producer index atomically. + * @internal This function updates the producer head for enqueue * * @param r - * A pointer to the ring structure. - * @param obj_table - * A pointer to a table of void * pointers (objects). + * A pointer to the ring structure + * @param is_sp + * Indicates whether multi-producer path is needed or not * @param n - * The number of objects to add in the ring from the obj_table. + * The number of elements we will want to enqueue, i.e. how far should the + * head be moved * @param behavior * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring + * @param old_head + * Returns head value as it was before the move, i.e. where enqueue starts + * @param new_head + * Returns the current/new head value i.e. where enqueue finishes + * @param free_entries + * Returns the amount of free space in the ring BEFORE head was moved * @return - * Depend on the behavior value - * if behavior = RTE_RING_QUEUE_FIXED - * - 0: Success; objects enqueue. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. - * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. - * if behavior = RTE_RING_QUEUE_VARIABLE - * - n: Actual number of objects enqueued. + * Actual number of objects enqueued. + * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline int __attribute__((always_inline)) -__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, - unsigned n, enum rte_ring_queue_behavior behavior) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_move_prod_head(struct rte_ring *r, int is_sp, + unsigned int n, enum rte_ring_queue_behavior behavior, + uint32_t *old_head, uint32_t *new_head, + uint32_t *free_entries) { - uint32_t prod_head, prod_next; - uint32_t cons_tail, free_entries; - const unsigned max = n; + const uint32_t mask = r->mask; + unsigned int max = n; int success; - unsigned i, rep = 0; - uint32_t mask = r->prod.mask; - int ret; - - /* Avoid the unnecessary cmpset operation below, which is also - * potentially harmful when n equals 0. */ - if (n == 0) - return 0; - /* move prod.head atomically */ do { /* Reset n to the initial burst count */ n = max; - prod_head = r->prod.head; - cons_tail = r->cons.tail; + *old_head = r->prod.head; + const uint32_t cons_tail = r->cons.tail; /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have - * prod_head > cons_tail). So 'free_entries' is always between 0 + * *old_head > cons_tail). So 'free_entries' is always between 0 * and size(ring)-1. */ - free_entries = (mask + cons_tail - prod_head); + *free_entries = (mask + cons_tail - *old_head); /* check that we have enough room in ring */ - if (unlikely(n > free_entries)) { - if (behavior == RTE_RING_QUEUE_FIXED) { - __RING_STAT_ADD(r, enq_fail, n); - return -ENOBUFS; - } - else { - /* No free entry available */ - if (unlikely(free_entries == 0)) { - __RING_STAT_ADD(r, enq_fail, n); - return 0; - } - - n = free_entries; - } - } - - prod_next = prod_head + n; - success = rte_atomic32_cmpset(&r->prod.head, prod_head, - prod_next); + if (unlikely(n > *free_entries)) + n = (behavior == RTE_RING_QUEUE_FIXED) ? + 0 : *free_entries; + + if (n == 0) + return 0; + + *new_head = *old_head + n; + if (is_sp) + r->prod.head = *new_head, success = 1; + else + success = rte_atomic32_cmpset(&r->prod.head, + *old_head, *new_head); } while (unlikely(success == 0)); - - /* write entries in ring */ - ENQUEUE_PTRS(); - rte_smp_wmb(); - - /* if we exceed the watermark */ - if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { - ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT : - (int)(n | RTE_RING_QUOT_EXCEED); - __RING_STAT_ADD(r, enq_quota, n); - } - else { - ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n; - __RING_STAT_ADD(r, enq_success, n); - } - - /* - * If there are other enqueues in progress that preceded us, - * we need to wait for them to complete - */ - while (unlikely(r->prod.tail != prod_head)) { - rte_pause(); - - /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting - * for other thread finish. It gives pre-empted thread a chance - * to proceed and finish with ring dequeue operation. */ - if (RTE_RING_PAUSE_REP_COUNT && - ++rep == RTE_RING_PAUSE_REP_COUNT) { - rep = 0; - sched_yield(); - } - } - r->prod.tail = prod_next; - return ret; + return n; } /** - * @internal Enqueue several objects on a ring (NOT multi-producers safe). + * @internal Enqueue several objects on the ring * - * @param r + * @param r * A pointer to the ring structure. * @param obj_table * A pointer to a table of void * pointers (objects). @@ -525,242 +434,142 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, * The number of objects to add in the ring from the obj_table. * @param behavior * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring + * @param is_sp + * Indicates whether to use single producer or multi-producer head update + * @param free_space + * returns the amount of space after the enqueue operation has finished * @return - * Depend on the behavior value - * if behavior = RTE_RING_QUEUE_FIXED - * - 0: Success; objects enqueue. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. - * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. - * if behavior = RTE_RING_QUEUE_VARIABLE - * - n: Actual number of objects enqueued. + * Actual number of objects enqueued. + * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline int __attribute__((always_inline)) -__rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table, - unsigned n, enum rte_ring_queue_behavior behavior) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table, + unsigned int n, enum rte_ring_queue_behavior behavior, + int is_sp, unsigned int *free_space) { - uint32_t prod_head, cons_tail; - uint32_t prod_next, free_entries; - unsigned i; - uint32_t mask = r->prod.mask; - int ret; - - prod_head = r->prod.head; - cons_tail = r->cons.tail; - /* The subtraction is done between two unsigned 32bits value - * (the result is always modulo 32 bits even if we have - * prod_head > cons_tail). So 'free_entries' is always between 0 - * and size(ring)-1. */ - free_entries = mask + cons_tail - prod_head; - - /* check that we have enough room in ring */ - if (unlikely(n > free_entries)) { - if (behavior == RTE_RING_QUEUE_FIXED) { - __RING_STAT_ADD(r, enq_fail, n); - return -ENOBUFS; - } - else { - /* No free entry available */ - if (unlikely(free_entries == 0)) { - __RING_STAT_ADD(r, enq_fail, n); - return 0; - } - - n = free_entries; - } - } - - prod_next = prod_head + n; - r->prod.head = prod_next; - - /* write entries in ring */ - ENQUEUE_PTRS(); + uint32_t prod_head, prod_next; + uint32_t free_entries; + + n = __rte_ring_move_prod_head(r, is_sp, n, behavior, + &prod_head, &prod_next, &free_entries); + if (n == 0) + goto end; + + ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *); rte_smp_wmb(); - /* if we exceed the watermark */ - if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { - ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT : - (int)(n | RTE_RING_QUOT_EXCEED); - __RING_STAT_ADD(r, enq_quota, n); - } - else { - ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n; - __RING_STAT_ADD(r, enq_success, n); - } - - r->prod.tail = prod_next; - return ret; + update_tail(&r->prod, prod_head, prod_next, is_sp); +end: + if (free_space != NULL) + *free_space = free_entries - n; + return n; } /** - * @internal Dequeue several objects from a ring (multi-consumers safe). When - * the request objects are more than the available objects, only dequeue the - * actual number of objects - * - * This function uses a "compare and set" instruction to move the - * consumer index atomically. + * @internal This function updates the consumer head for dequeue * * @param r - * A pointer to the ring structure. - * @param obj_table - * A pointer to a table of void * pointers (objects) that will be filled. + * A pointer to the ring structure + * @param is_sc + * Indicates whether multi-consumer path is needed or not * @param n - * The number of objects to dequeue from the ring to the obj_table. + * The number of elements we will want to enqueue, i.e. how far should the + * head be moved * @param behavior * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring + * @param old_head + * Returns head value as it was before the move, i.e. where dequeue starts + * @param new_head + * Returns the current/new head value i.e. where dequeue finishes + * @param entries + * Returns the number of entries in the ring BEFORE head was moved * @return - * Depend on the behavior value - * if behavior = RTE_RING_QUEUE_FIXED - * - 0: Success; objects dequeued. - * - -ENOENT: Not enough entries in the ring to dequeue; no object is - * dequeued. - * if behavior = RTE_RING_QUEUE_VARIABLE - * - n: Actual number of objects dequeued. + * - Actual number of objects dequeued. + * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ - -static inline int __attribute__((always_inline)) -__rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table, - unsigned n, enum rte_ring_queue_behavior behavior) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_move_cons_head(struct rte_ring *r, int is_sc, + unsigned int n, enum rte_ring_queue_behavior behavior, + uint32_t *old_head, uint32_t *new_head, + uint32_t *entries) { - uint32_t cons_head, prod_tail; - uint32_t cons_next, entries; - const unsigned max = n; + unsigned int max = n; int success; - unsigned i, rep = 0; - uint32_t mask = r->prod.mask; - - /* Avoid the unnecessary cmpset operation below, which is also - * potentially harmful when n equals 0. */ - if (n == 0) - return 0; /* move cons.head atomically */ do { /* Restore n as it may change every loop */ n = max; - cons_head = r->cons.head; - prod_tail = r->prod.tail; + *old_head = r->cons.head; + const uint32_t prod_tail = r->prod.tail; /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - entries = (prod_tail - cons_head); + *entries = (prod_tail - *old_head); /* Set the actual entries for dequeue */ - if (n > entries) { - if (behavior == RTE_RING_QUEUE_FIXED) { - __RING_STAT_ADD(r, deq_fail, n); - return -ENOENT; - } - else { - if (unlikely(entries == 0)){ - __RING_STAT_ADD(r, deq_fail, n); - return 0; - } - - n = entries; - } - } - - cons_next = cons_head + n; - success = rte_atomic32_cmpset(&r->cons.head, cons_head, - cons_next); + if (n > *entries) + n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : *entries; + + if (unlikely(n == 0)) + return 0; + + *new_head = *old_head + n; + if (is_sc) + r->cons.head = *new_head, success = 1; + else + success = rte_atomic32_cmpset(&r->cons.head, *old_head, + *new_head); } while (unlikely(success == 0)); - - /* copy in table */ - DEQUEUE_PTRS(); - rte_smp_rmb(); - - /* - * If there are other dequeues in progress that preceded us, - * we need to wait for them to complete - */ - while (unlikely(r->cons.tail != cons_head)) { - rte_pause(); - - /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting - * for other thread finish. It gives pre-empted thread a chance - * to proceed and finish with ring dequeue operation. */ - if (RTE_RING_PAUSE_REP_COUNT && - ++rep == RTE_RING_PAUSE_REP_COUNT) { - rep = 0; - sched_yield(); - } - } - __RING_STAT_ADD(r, deq_success, n); - r->cons.tail = cons_next; - - return behavior == RTE_RING_QUEUE_FIXED ? 0 : n; + return n; } /** - * @internal Dequeue several objects from a ring (NOT multi-consumers safe). - * When the request objects are more than the available objects, only dequeue - * the actual number of objects + * @internal Dequeue several objects from the ring * * @param r * A pointer to the ring structure. * @param obj_table - * A pointer to a table of void * pointers (objects) that will be filled. + * A pointer to a table of void * pointers (objects). * @param n - * The number of objects to dequeue from the ring to the obj_table. + * The number of objects to pull from the ring. * @param behavior * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring + * @param is_sc + * Indicates whether to use single consumer or multi-consumer head update + * @param available + * returns the number of remaining ring entries after the dequeue has finished * @return - * Depend on the behavior value - * if behavior = RTE_RING_QUEUE_FIXED - * - 0: Success; objects dequeued. - * - -ENOENT: Not enough entries in the ring to dequeue; no object is - * dequeued. - * if behavior = RTE_RING_QUEUE_VARIABLE - * - n: Actual number of objects dequeued. + * - Actual number of objects dequeued. + * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline int __attribute__((always_inline)) -__rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table, - unsigned n, enum rte_ring_queue_behavior behavior) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_do_dequeue(struct rte_ring *r, void **obj_table, + unsigned int n, enum rte_ring_queue_behavior behavior, + int is_sc, unsigned int *available) { - uint32_t cons_head, prod_tail; - uint32_t cons_next, entries; - unsigned i; - uint32_t mask = r->prod.mask; - - cons_head = r->cons.head; - prod_tail = r->prod.tail; - /* The subtraction is done between two unsigned 32bits value - * (the result is always modulo 32 bits even if we have - * cons_head > prod_tail). So 'entries' is always between 0 - * and size(ring)-1. */ - entries = prod_tail - cons_head; - - if (n > entries) { - if (behavior == RTE_RING_QUEUE_FIXED) { - __RING_STAT_ADD(r, deq_fail, n); - return -ENOENT; - } - else { - if (unlikely(entries == 0)){ - __RING_STAT_ADD(r, deq_fail, n); - return 0; - } - - n = entries; - } - } - - cons_next = cons_head + n; - r->cons.head = cons_next; - - /* copy in table */ - DEQUEUE_PTRS(); + uint32_t cons_head, cons_next; + uint32_t entries; + + n = __rte_ring_move_cons_head(r, is_sc, n, behavior, + &cons_head, &cons_next, &entries); + if (n == 0) + goto end; + + DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *); rte_smp_rmb(); - __RING_STAT_ADD(r, deq_success, n); - r->cons.tail = cons_next; - return behavior == RTE_RING_QUEUE_FIXED ? 0 : n; + update_tail(&r->cons, cons_head, cons_next, is_sc); + +end: + if (available != NULL) + *available = entries - n; + return n; } /** @@ -775,17 +584,18 @@ __rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table, * A pointer to a table of void * pointers (objects). * @param n * The number of objects to add in the ring from the obj_table. + * @param free_space + * if non-NULL, returns the amount of space in the ring after the + * enqueue operation has finished. * @return - * - 0: Success; objects enqueue. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. - * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + * The number of objects enqueued, either 0 or n */ -static inline int __attribute__((always_inline)) +static inline unsigned int __attribute__((always_inline)) rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, - unsigned n) + unsigned int n, unsigned int *free_space) { - return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_MP, free_space); } /** @@ -797,17 +607,18 @@ rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, * A pointer to a table of void * pointers (objects). * @param n * The number of objects to add in the ring from the obj_table. + * @param free_space + * if non-NULL, returns the amount of space in the ring after the + * enqueue operation has finished. * @return - * - 0: Success; objects enqueued. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. - * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + * The number of objects enqueued, either 0 or n */ -static inline int __attribute__((always_inline)) +static inline unsigned int __attribute__((always_inline)) rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, - unsigned n) + unsigned int n, unsigned int *free_space) { - return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_SP, free_space); } /** @@ -823,20 +634,18 @@ rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, * A pointer to a table of void * pointers (objects). * @param n * The number of objects to add in the ring from the obj_table. + * @param free_space + * if non-NULL, returns the amount of space in the ring after the + * enqueue operation has finished. * @return - * - 0: Success; objects enqueued. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. - * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + * The number of objects enqueued, either 0 or n */ -static inline int __attribute__((always_inline)) +static inline unsigned int __attribute__((always_inline)) rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, - unsigned n) + unsigned int n, unsigned int *free_space) { - if (r->prod.sp_enqueue) - return rte_ring_sp_enqueue_bulk(r, obj_table, n); - else - return rte_ring_mp_enqueue_bulk(r, obj_table, n); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + r->prod.single, free_space); } /** @@ -851,14 +660,12 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, * A pointer to the object to be added. * @return * - 0: Success; objects enqueued. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. */ static inline int __attribute__((always_inline)) rte_ring_mp_enqueue(struct rte_ring *r, void *obj) { - return rte_ring_mp_enqueue_bulk(r, &obj, 1); + return rte_ring_mp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; } /** @@ -870,14 +677,12 @@ rte_ring_mp_enqueue(struct rte_ring *r, void *obj) * A pointer to the object to be added. * @return * - 0: Success; objects enqueued. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. */ static inline int __attribute__((always_inline)) rte_ring_sp_enqueue(struct rte_ring *r, void *obj) { - return rte_ring_sp_enqueue_bulk(r, &obj, 1); + return rte_ring_sp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; } /** @@ -893,17 +698,12 @@ rte_ring_sp_enqueue(struct rte_ring *r, void *obj) * A pointer to the object to be added. * @return * - 0: Success; objects enqueued. - * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the - * high water mark is exceeded. * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. */ static inline int __attribute__((always_inline)) rte_ring_enqueue(struct rte_ring *r, void *obj) { - if (r->prod.sp_enqueue) - return rte_ring_sp_enqueue(r, obj); - else - return rte_ring_mp_enqueue(r, obj); + return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; } /** @@ -918,15 +718,18 @@ rte_ring_enqueue(struct rte_ring *r, void *obj) * A pointer to a table of void * pointers (objects) that will be filled. * @param n * The number of objects to dequeue from the ring to the obj_table. + * @param available + * If non-NULL, returns the number of remaining ring entries after the + * dequeue has finished. * @return - * - 0: Success; objects dequeued. - * - -ENOENT: Not enough entries in the ring to dequeue; no object is - * dequeued. + * The number of objects dequeued, either 0 or n */ -static inline int __attribute__((always_inline)) -rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +static inline unsigned int __attribute__((always_inline)) +rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, + unsigned int n, unsigned int *available) { - return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED); + return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_MC, available); } /** @@ -939,15 +742,18 @@ rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) * @param n * The number of objects to dequeue from the ring to the obj_table, * must be strictly positive. + * @param available + * If non-NULL, returns the number of remaining ring entries after the + * dequeue has finished. * @return - * - 0: Success; objects dequeued. - * - -ENOENT: Not enough entries in the ring to dequeue; no object is - * dequeued. + * The number of objects dequeued, either 0 or n */ -static inline int __attribute__((always_inline)) -rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +static inline unsigned int __attribute__((always_inline)) +rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, + unsigned int n, unsigned int *available) { - return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED); + return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_SC, available); } /** @@ -963,18 +769,18 @@ rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) * A pointer to a table of void * pointers (objects) that will be filled. * @param n * The number of objects to dequeue from the ring to the obj_table. + * @param available + * If non-NULL, returns the number of remaining ring entries after the + * dequeue has finished. * @return - * - 0: Success; objects dequeued. - * - -ENOENT: Not enough entries in the ring to dequeue, no object is - * dequeued. + * The number of objects dequeued, either 0 or n */ -static inline int __attribute__((always_inline)) -rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +static inline unsigned int __attribute__((always_inline)) +rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, + unsigned int *available) { - if (r->cons.sc_dequeue) - return rte_ring_sc_dequeue_bulk(r, obj_table, n); - else - return rte_ring_mc_dequeue_bulk(r, obj_table, n); + return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + r->cons.single, available); } /** @@ -995,7 +801,7 @@ rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) static inline int __attribute__((always_inline)) rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_mc_dequeue_bulk(r, obj_p, 1); + return rte_ring_mc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOBUFS; } /** @@ -1013,7 +819,7 @@ rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) static inline int __attribute__((always_inline)) rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_sc_dequeue_bulk(r, obj_p, 1); + return rte_ring_sc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOBUFS; } /** @@ -1035,10 +841,7 @@ rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) static inline int __attribute__((always_inline)) rte_ring_dequeue(struct rte_ring *r, void **obj_p) { - if (r->cons.sc_dequeue) - return rte_ring_sc_dequeue(r, obj_p); - else - return rte_ring_mc_dequeue(r, obj_p); + return rte_ring_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; } /** @@ -1055,7 +858,7 @@ rte_ring_full(const struct rte_ring *r) { uint32_t prod_tail = r->prod.tail; uint32_t cons_tail = r->cons.tail; - return ((cons_tail - prod_tail - 1) & r->prod.mask) == 0; + return ((cons_tail - prod_tail - 1) & r->mask) == 0; } /** @@ -1088,7 +891,7 @@ rte_ring_count(const struct rte_ring *r) { uint32_t prod_tail = r->prod.tail; uint32_t cons_tail = r->cons.tail; - return (prod_tail - cons_tail) & r->prod.mask; + return (prod_tail - cons_tail) & r->mask; } /** @@ -1104,7 +907,21 @@ rte_ring_free_count(const struct rte_ring *r) { uint32_t prod_tail = r->prod.tail; uint32_t cons_tail = r->cons.tail; - return (cons_tail - prod_tail - 1) & r->prod.mask; + return (cons_tail - prod_tail - 1) & r->mask; +} + +/** + * Return the size of the ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The number of elements which can be stored in the ring. + */ +static inline unsigned int +rte_ring_get_size(const struct rte_ring *r) +{ + return r->size; } /** @@ -1139,14 +956,18 @@ struct rte_ring *rte_ring_lookup(const char *name); * A pointer to a table of void * pointers (objects). * @param n * The number of objects to add in the ring from the obj_table. + * @param free_space + * if non-NULL, returns the amount of space in the ring after the + * enqueue operation has finished. * @return * - n: Actual number of objects enqueued. */ static inline unsigned __attribute__((always_inline)) rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, - unsigned n) + unsigned int n, unsigned int *free_space) { - return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); + return __rte_ring_do_enqueue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_MP, free_space); } /** @@ -1158,14 +979,18 @@ rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, * A pointer to a table of void * pointers (objects). * @param n * The number of objects to add in the ring from the obj_table. + * @param free_space + * if non-NULL, returns the amount of space in the ring after the + * enqueue operation has finished. * @return * - n: Actual number of objects enqueued. */ static inline unsigned __attribute__((always_inline)) rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, - unsigned n) + unsigned int n, unsigned int *free_space) { - return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); + return __rte_ring_do_enqueue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_SP, free_space); } /** @@ -1181,17 +1006,18 @@ rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, * A pointer to a table of void * pointers (objects). * @param n * The number of objects to add in the ring from the obj_table. + * @param free_space + * if non-NULL, returns the amount of space in the ring after the + * enqueue operation has finished. * @return * - n: Actual number of objects enqueued. */ static inline unsigned __attribute__((always_inline)) rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, - unsigned n) + unsigned int n, unsigned int *free_space) { - if (r->prod.sp_enqueue) - return rte_ring_sp_enqueue_burst(r, obj_table, n); - else - return rte_ring_mp_enqueue_burst(r, obj_table, n); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE, + r->prod.single, free_space); } /** @@ -1208,13 +1034,18 @@ rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, * A pointer to a table of void * pointers (objects) that will be filled. * @param n * The number of objects to dequeue from the ring to the obj_table. + * @param available + * If non-NULL, returns the number of remaining ring entries after the + * dequeue has finished. * @return * - n: Actual number of objects dequeued, 0 if ring is empty */ static inline unsigned __attribute__((always_inline)) -rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, + unsigned int n, unsigned int *available) { - return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); + return __rte_ring_do_dequeue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_MC, available); } /** @@ -1228,13 +1059,18 @@ rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) * A pointer to a table of void * pointers (objects) that will be filled. * @param n * The number of objects to dequeue from the ring to the obj_table. + * @param available + * If non-NULL, returns the number of remaining ring entries after the + * dequeue has finished. * @return * - n: Actual number of objects dequeued, 0 if ring is empty */ static inline unsigned __attribute__((always_inline)) -rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, + unsigned int n, unsigned int *available) { - return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); + return __rte_ring_do_dequeue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_SC, available); } /** @@ -1250,16 +1086,19 @@ rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) * A pointer to a table of void * pointers (objects) that will be filled. * @param n * The number of objects to dequeue from the ring to the obj_table. + * @param available + * If non-NULL, returns the number of remaining ring entries after the + * dequeue has finished. * @return * - Number of objects dequeued */ static inline unsigned __attribute__((always_inline)) -rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, + unsigned int n, unsigned int *available) { - if (r->cons.sc_dequeue) - return rte_ring_sc_dequeue_burst(r, obj_table, n); - else - return rte_ring_mc_dequeue_burst(r, obj_table, n); + return __rte_ring_do_dequeue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, + r->cons.single, available); } #ifdef __cplusplus diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 44cb780f..18274e73 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -58,9 +58,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile index c82c7696..0d06d36a 100644 --- a/lib/librte_table/Makefile +++ b/lib/librte_table/Makefile @@ -72,15 +72,4 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_array.h SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_stub.h -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) := lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_port -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_lpm -ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_acl -endif -DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_hash - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_table/rte_table_acl.c b/lib/librte_table/rte_table_acl.c index 8f1f8ceb..3c05e4a8 100644 --- a/lib/librte_table/rte_table_acl.c +++ b/lib/librte_table/rte_table_acl.c @@ -87,7 +87,7 @@ rte_table_acl_create( int socket_id, uint32_t entry_size) { - struct rte_table_acl_params *p = (struct rte_table_acl_params *) params; + struct rte_table_acl_params *p = params; struct rte_table_acl *acl; uint32_t action_table_size, acl_rule_list_size, acl_rule_memory_size; uint32_t total_size; @@ -168,7 +168,7 @@ rte_table_acl_create( static int rte_table_acl_free(void *table) { - struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl *acl = table; /* Check input parameters */ if (table == NULL) { @@ -248,9 +248,9 @@ rte_table_acl_entry_add( int *key_found, void **entry_ptr) { - struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl *acl = table; struct rte_table_acl_rule_add_params *rule = - (struct rte_table_acl_rule_add_params *) key; + key; struct rte_pipeline_acl_rule acl_rule; struct rte_acl_rule *rule_location; struct rte_acl_ctx *ctx; @@ -366,9 +366,9 @@ rte_table_acl_entry_delete( int *key_found, void *entry) { - struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl *acl = table; struct rte_table_acl_rule_delete_params *rule = - (struct rte_table_acl_rule_delete_params *) key; + key; struct rte_acl_rule *deleted_rule = NULL; struct rte_acl_ctx *ctx; uint32_t pos, pos_valid, i; @@ -450,7 +450,7 @@ rte_table_acl_entry_add_bulk( int *key_found, void **entries_ptr) { - struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl *acl = table; struct rte_acl_ctx *ctx; uint32_t rule_pos[n_keys]; uint32_t i; @@ -507,7 +507,7 @@ rte_table_acl_entry_add_bulk( return -EINVAL; } - rule = (struct rte_table_acl_rule_add_params *) keys[i]; + rule = keys[i]; if (rule->priority > RTE_ACL_MAX_PRIORITY) { RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__); return -EINVAL; @@ -518,7 +518,7 @@ rte_table_acl_entry_add_bulk( memset(key_found, 0, n_keys * sizeof(int)); for (i = 0; i < n_keys; i++) { struct rte_table_acl_rule_add_params *rule = - (struct rte_table_acl_rule_add_params *) keys[i]; + keys[i]; struct rte_pipeline_acl_rule acl_rule; struct rte_acl_rule *rule_location; uint32_t free_pos, free_pos_valid, j; @@ -636,7 +636,7 @@ rte_table_acl_entry_delete_bulk( int *key_found, void **entries) { - struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl *acl = table; struct rte_acl_rule *deleted_rules[n_keys]; uint32_t rule_pos[n_keys]; struct rte_acl_ctx *ctx; @@ -675,7 +675,7 @@ rte_table_acl_entry_delete_bulk( memset(rule_pos, 0, n_keys * sizeof(uint32_t)); for (i = 0; i < n_keys; i++) { struct rte_table_acl_rule_delete_params *rule = - (struct rte_table_acl_rule_delete_params *) keys[i]; + keys[i]; uint32_t pos_valid, j; /* Look for the rule in the table */ @@ -792,7 +792,7 @@ rte_table_acl_lookup( pkts_mask &= ~pkt_mask; - if (action_table_pos != RTE_ACL_INVALID_USERDATA) { + if (action_table_pos != 0) { pkts_out_mask |= pkt_mask; entries[pkt_pos] = (void *) &acl->memory[action_table_pos * @@ -810,7 +810,7 @@ rte_table_acl_lookup( static int rte_table_acl_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl *acl = table; if (stats != NULL) memcpy(stats, &acl->stats, sizeof(acl->stats)); diff --git a/lib/librte_table/rte_table_array.c b/lib/librte_table/rte_table_array.c index 3bb68d11..cf7be88a 100644 --- a/lib/librte_table/rte_table_array.c +++ b/lib/librte_table/rte_table_array.c @@ -74,8 +74,7 @@ struct rte_table_array { static void * rte_table_array_create(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_array_params *p = - (struct rte_table_array_params *) params; + struct rte_table_array_params *p = params; struct rte_table_array *t; uint32_t total_cl_size, total_size; @@ -111,7 +110,7 @@ rte_table_array_create(void *params, int socket_id, uint32_t entry_size) static int rte_table_array_free(void *table) { - struct rte_table_array *t = (struct rte_table_array *) table; + struct rte_table_array *t = table; /* Check input parameters */ if (t == NULL) { @@ -133,8 +132,8 @@ rte_table_array_entry_add( int *key_found, void **entry_ptr) { - struct rte_table_array *t = (struct rte_table_array *) table; - struct rte_table_array_key *k = (struct rte_table_array_key *) key; + struct rte_table_array *t = table; + struct rte_table_array_key *k = key; uint8_t *table_entry; /* Check input parameters */ @@ -214,7 +213,7 @@ rte_table_array_lookup( static int rte_table_array_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_array *array = (struct rte_table_array *) table; + struct rte_table_array *array = table; if (stats != NULL) memcpy(stats, &array->stats, sizeof(array->stats)); diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c index ff7baee3..da1597fa 100644 --- a/lib/librte_table/rte_table_hash_cuckoo.c +++ b/lib/librte_table/rte_table_hash_cuckoo.c @@ -190,7 +190,7 @@ rte_table_hash_cuckoo_free(void *table) { return -EINVAL; } - struct rte_table_hash *t = (struct rte_table_hash *)table; + struct rte_table_hash *t = table; rte_hash_free(t->h_table); rte_free(t); @@ -218,7 +218,7 @@ rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, return -EINVAL; } - struct rte_table_hash *t = (struct rte_table_hash *)table; + struct rte_table_hash *t = table; /* Find Existing entries */ pos = rte_hash_lookup(t->h_table, key); @@ -268,7 +268,7 @@ rte_table_hash_cuckoo_entry_delete(void *table, void *key, return -EINVAL; } - struct rte_table_hash *t = (struct rte_table_hash *)table; + struct rte_table_hash *t = table; pos = rte_hash_del_key(t->h_table, key); if (pos >= 0) { @@ -359,7 +359,7 @@ static int rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_hash_ext.c b/lib/librte_table/rte_table_hash_ext.c index e283a3d1..e7181026 100644 --- a/lib/librte_table/rte_table_hash_ext.c +++ b/lib/librte_table/rte_table_hash_ext.c @@ -172,7 +172,7 @@ static void * rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) { struct rte_table_hash_ext_params *p = - (struct rte_table_hash_ext_params *) params; + params; struct rte_table_hash *t; uint32_t total_size, table_meta_sz; uint32_t bucket_sz, bucket_ext_sz, key_sz; @@ -258,7 +258,7 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) static int rte_table_hash_ext_free(void *table) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; /* Check input parameters */ if (t == NULL) @@ -272,7 +272,7 @@ static int rte_table_hash_ext_entry_add(void *table, void *key, void *entry, int *key_found, void **entry_ptr) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; struct bucket *bkt0, *bkt, *bkt_prev; uint64_t sig; uint32_t bkt_index, i; @@ -373,7 +373,7 @@ static int rte_table_hash_ext_entry_delete(void *table, void *key, int *key_found, void *entry) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; struct bucket *bkt0, *bkt, *bkt_prev; uint64_t sig; uint32_t bkt_index, i; @@ -444,7 +444,6 @@ static int rte_table_hash_ext_lookup_unoptimized( uint64_t pkts_mask_out = 0; __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); - RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); for ( ; pkts_mask; ) { struct bucket *bkt0, *bkt; @@ -490,7 +489,6 @@ static int rte_table_hash_ext_lookup_unoptimized( } *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); return 0; } @@ -874,9 +872,13 @@ static int rte_table_hash_ext_lookup( RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); /* Cannot run the pipeline with less than 7 packets */ - if (__builtin_popcountll(pkts_mask) < 7) - return rte_table_hash_ext_lookup_unoptimized(table, pkts, + if (__builtin_popcountll(pkts_mask) < 7) { + status = rte_table_hash_ext_lookup_unoptimized(table, pkts, pkts_mask, lookup_hit_mask, entries, 0); + RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - + __builtin_popcountll(*lookup_hit_mask)); + return status; + } /* Pipeline stage 0 */ lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); @@ -1007,9 +1009,13 @@ static int rte_table_hash_ext_lookup_dosig( RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); /* Cannot run the pipeline with less than 7 packets */ - if (__builtin_popcountll(pkts_mask) < 7) - return rte_table_hash_ext_lookup_unoptimized(table, pkts, + if (__builtin_popcountll(pkts_mask) < 7) { + status = rte_table_hash_ext_lookup_unoptimized(table, pkts, pkts_mask, lookup_hit_mask, entries, 1); + RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - + __builtin_popcountll(*lookup_hit_mask)); + return status; + } /* Pipeline stage 0 */ lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); @@ -1125,7 +1131,7 @@ static int rte_table_hash_ext_lookup_dosig( static int rte_table_hash_ext_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c index 08d4d77e..ce057b78 100644 --- a/lib/librte_table/rte_table_hash_key16.c +++ b/lib/librte_table/rte_table_hash_key16.c @@ -187,7 +187,7 @@ rte_table_hash_create_key16_lru(void *params, static int rte_table_hash_free_key16_lru(void *table) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; /* Check input parameters */ if (f == NULL) { @@ -207,7 +207,7 @@ rte_table_hash_entry_add_key16_lru( int *key_found, void **entry_ptr) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_16 *bucket; uint64_t signature, pos; uint32_t bucket_index, i; @@ -273,7 +273,7 @@ rte_table_hash_entry_delete_key16_lru( int *key_found, void *entry) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_16 *bucket; uint64_t signature; uint32_t bucket_index, i; @@ -407,7 +407,7 @@ rte_table_hash_create_key16_ext(void *params, static int rte_table_hash_free_key16_ext(void *table) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; /* Check input parameters */ if (f == NULL) { @@ -427,7 +427,7 @@ rte_table_hash_entry_add_key16_ext( int *key_found, void **entry_ptr) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev; uint64_t signature; uint32_t bucket_index, i; @@ -504,7 +504,7 @@ rte_table_hash_entry_delete_key16_ext( int *key_found, void *entry) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev; uint64_t signature; uint32_t bucket_index, i; @@ -1463,7 +1463,7 @@ grind_next_buckets: static int rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c index 161f6b7a..31fe6fda 100644 --- a/lib/librte_table/rte_table_hash_key32.c +++ b/lib/librte_table/rte_table_hash_key32.c @@ -179,7 +179,7 @@ rte_table_hash_create_key32_lru(void *params, static int rte_table_hash_free_key32_lru(void *table) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; /* Check input parameters */ if (f == NULL) { @@ -199,7 +199,7 @@ rte_table_hash_entry_add_key32_lru( int *key_found, void **entry_ptr) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_32 *bucket; uint64_t signature, pos; uint32_t bucket_index, i; @@ -265,7 +265,7 @@ rte_table_hash_entry_delete_key32_lru( int *key_found, void *entry) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_32 *bucket; uint64_t signature; uint32_t bucket_index, i; @@ -329,7 +329,7 @@ rte_table_hash_create_key32_ext(void *params, uint32_t entry_size) { struct rte_table_hash_key32_ext_params *p = - (struct rte_table_hash_key32_ext_params *) params; + params; struct rte_table_hash *f; uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket; uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i; @@ -392,7 +392,7 @@ rte_table_hash_create_key32_ext(void *params, static int rte_table_hash_free_key32_ext(void *table) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; /* Check input parameters */ if (f == NULL) { @@ -412,7 +412,7 @@ rte_table_hash_entry_add_key32_ext( int *key_found, void **entry_ptr) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev; uint64_t signature; uint32_t bucket_index, i; @@ -492,7 +492,7 @@ rte_table_hash_entry_delete_key32_ext( int *key_found, void *entry) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev; uint64_t signature; uint32_t bucket_index, i; @@ -1110,7 +1110,7 @@ grind_next_buckets: static int rte_table_hash_key32_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c index b04f60dc..5f0c6566 100644 --- a/lib/librte_table/rte_table_hash_key8.c +++ b/lib/librte_table/rte_table_hash_key8.c @@ -180,7 +180,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size) static int rte_table_hash_free_key8_lru(void *table) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; /* Check input parameters */ if (f == NULL) { @@ -200,7 +200,7 @@ rte_table_hash_entry_add_key8_lru( int *key_found, void **entry_ptr) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_8 *bucket; uint64_t signature, mask, pos; uint32_t bucket_index, i; @@ -263,7 +263,7 @@ rte_table_hash_entry_delete_key8_lru( int *key_found, void *entry) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_8 *bucket; uint64_t signature, mask; uint32_t bucket_index, i; @@ -392,7 +392,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) static int rte_table_hash_free_key8_ext(void *table) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; /* Check input parameters */ if (f == NULL) { @@ -412,7 +412,7 @@ rte_table_hash_entry_add_key8_ext( int *key_found, void **entry_ptr) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev; uint64_t signature; uint32_t bucket_index, i; @@ -493,7 +493,7 @@ rte_table_hash_entry_delete_key8_ext( int *key_found, void *entry) { - struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_table_hash *f = table; struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev; uint64_t signature; uint32_t bucket_index, i; @@ -1415,7 +1415,7 @@ grind_next_buckets: static int rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_hash_lru.c b/lib/librte_table/rte_table_hash_lru.c index 407c62ab..5a4864e2 100644 --- a/lib/librte_table/rte_table_hash_lru.c +++ b/lib/librte_table/rte_table_hash_lru.c @@ -149,7 +149,7 @@ static void * rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size) { struct rte_table_hash_lru_params *p = - (struct rte_table_hash_lru_params *) params; + params; struct rte_table_hash *t; uint32_t total_size, table_meta_sz; uint32_t bucket_sz, key_sz, key_stack_sz, data_sz; @@ -227,7 +227,7 @@ rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size) static int rte_table_hash_lru_free(void *table) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; /* Check input parameters */ if (t == NULL) @@ -241,7 +241,7 @@ static int rte_table_hash_lru_entry_add(void *table, void *key, void *entry, int *key_found, void **entry_ptr) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; struct bucket *bkt; uint64_t sig; uint32_t bkt_index, i; @@ -325,7 +325,7 @@ static int rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found, void *entry) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; struct bucket *bkt; uint64_t sig; uint32_t bkt_index, i; @@ -1068,7 +1068,7 @@ static int rte_table_hash_lru_lookup_dosig( static int rte_table_hash_lru_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_hash *t = (struct rte_table_hash *) table; + struct rte_table_hash *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_lpm.c b/lib/librte_table/rte_table_lpm.c index 598b79f5..2f472b97 100644 --- a/lib/librte_table/rte_table_lpm.c +++ b/lib/librte_table/rte_table_lpm.c @@ -82,7 +82,7 @@ struct rte_table_lpm { static void * rte_table_lpm_create(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_lpm_params *p = (struct rte_table_lpm_params *) params; + struct rte_table_lpm_params *p = params; struct rte_table_lpm *lpm; struct rte_lpm_config lpm_config; @@ -154,7 +154,7 @@ rte_table_lpm_create(void *params, int socket_id, uint32_t entry_size) static int rte_table_lpm_free(void *table) { - struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; + struct rte_table_lpm *lpm = table; /* Check input parameters */ if (lpm == NULL) { @@ -210,8 +210,8 @@ rte_table_lpm_entry_add( int *key_found, void **entry_ptr) { - struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; - struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key; + struct rte_table_lpm *lpm = table; + struct rte_table_lpm_key *ip_prefix = key; uint32_t nht_pos, nht_pos0_valid; int status; uint32_t nht_pos0 = 0; @@ -277,8 +277,8 @@ rte_table_lpm_entry_delete( int *key_found, void *entry) { - struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; - struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key; + struct rte_table_lpm *lpm = table; + struct rte_table_lpm_key *ip_prefix = key; uint32_t nht_pos; int status; @@ -372,7 +372,7 @@ rte_table_lpm_lookup( static int rte_table_lpm_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_lpm *t = (struct rte_table_lpm *) table; + struct rte_table_lpm *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_lpm_ipv6.c b/lib/librte_table/rte_table_lpm_ipv6.c index 836f4cf6..5def2ad9 100644 --- a/lib/librte_table/rte_table_lpm_ipv6.c +++ b/lib/librte_table/rte_table_lpm_ipv6.c @@ -81,7 +81,7 @@ static void * rte_table_lpm_ipv6_create(void *params, int socket_id, uint32_t entry_size) { struct rte_table_lpm_ipv6_params *p = - (struct rte_table_lpm_ipv6_params *) params; + params; struct rte_table_lpm_ipv6 *lpm; struct rte_lpm6_config lpm6_config; uint32_t total_size, nht_size; @@ -152,7 +152,7 @@ rte_table_lpm_ipv6_create(void *params, int socket_id, uint32_t entry_size) static int rte_table_lpm_ipv6_free(void *table) { - struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + struct rte_table_lpm_ipv6 *lpm = table; /* Check input parameters */ if (lpm == NULL) { @@ -208,12 +208,11 @@ rte_table_lpm_ipv6_entry_add( int *key_found, void **entry_ptr) { - struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + struct rte_table_lpm_ipv6 *lpm = table; struct rte_table_lpm_ipv6_key *ip_prefix = - (struct rte_table_lpm_ipv6_key *) key; - uint32_t nht_pos, nht_pos0_valid; + key; + uint32_t nht_pos, nht_pos0, nht_pos0_valid; int status; - uint8_t nht_pos0; /* Check input parameters */ if (lpm == NULL) { @@ -256,7 +255,7 @@ rte_table_lpm_ipv6_entry_add( /* Add rule to low level LPM table */ if (rte_lpm6_add(lpm->lpm, ip_prefix->ip, ip_prefix->depth, - (uint8_t) nht_pos) < 0) { + nht_pos) < 0) { RTE_LOG(ERR, TABLE, "%s: LPM IPv6 rule add failed\n", __func__); return -1; } @@ -277,10 +276,10 @@ rte_table_lpm_ipv6_entry_delete( int *key_found, void *entry) { - struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + struct rte_table_lpm_ipv6 *lpm = table; struct rte_table_lpm_ipv6_key *ip_prefix = - (struct rte_table_lpm_ipv6_key *) key; - uint8_t nht_pos; + key; + uint32_t nht_pos; int status; /* Check input parameters */ @@ -356,7 +355,7 @@ rte_table_lpm_ipv6_lookup( uint8_t *ip = RTE_MBUF_METADATA_UINT8_PTR(pkt, lpm->offset); int status; - uint8_t nht_pos; + uint32_t nht_pos; status = rte_lpm6_lookup(lpm->lpm, ip, &nht_pos); if (status == 0) { @@ -375,7 +374,7 @@ rte_table_lpm_ipv6_lookup( static int rte_table_lpm_ipv6_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_lpm_ipv6 *t = (struct rte_table_lpm_ipv6 *) table; + struct rte_table_lpm_ipv6 *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_table/rte_table_stub.c b/lib/librte_table/rte_table_stub.c index 691d681a..1ee26bf2 100644 --- a/lib/librte_table/rte_table_stub.c +++ b/lib/librte_table/rte_table_stub.c @@ -98,7 +98,7 @@ rte_table_stub_lookup( static int rte_table_stub_stats_read(void *table, struct rte_table_stats *stats, int clear) { - struct rte_table_stub *t = (struct rte_table_stub *) table; + struct rte_table_stub *t = table; if (stats != NULL) memcpy(stats, &t->stats, sizeof(t->stats)); diff --git a/lib/librte_timer/Makefile b/lib/librte_timer/Makefile index 2aabef85..03a15390 100644 --- a/lib/librte_timer/Makefile +++ b/lib/librte_timer/Makefile @@ -46,7 +46,4 @@ SRCS-$(CONFIG_RTE_LIBRTE_TIMER) := rte_timer.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_TIMER)-include := rte_timer.h -# this lib needs eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_TIMER) += lib/librte_eal - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 415ffc6e..4a116fe3 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -36,7 +36,7 @@ LIB = librte_vhost.a EXPORT_MAP := rte_vhost_version.map -LIBABIVER := 3 +LIBABIVER := 4 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 CFLAGS += -I vhost_user @@ -51,13 +51,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \ virtio_net.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h - -# dependencies -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_net +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 8a075da2..2ceacc9a 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -65,17 +65,12 @@ fdset_move(struct fdset *pfdset, int dst, int src) pfdset->rwfds[dst] = pfdset->rwfds[src]; } -/* - * Find deleted fd entries and remove them - */ static void -fdset_shrink(struct fdset *pfdset) +fdset_shrink_nolock(struct fdset *pfdset) { int i; int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1); - pthread_mutex_lock(&pfdset->fd_mutex); - for (i = 0; i < last_valid_idx; i++) { if (pfdset->fd[i].fd != -1) continue; @@ -84,7 +79,16 @@ fdset_shrink(struct fdset *pfdset) last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1); } pfdset->num = last_valid_idx + 1; +} +/* + * Find deleted fd entries and remove them + */ +static void +fdset_shrink(struct fdset *pfdset) +{ + pthread_mutex_lock(&pfdset->fd_mutex); + fdset_shrink_nolock(pfdset); pthread_mutex_unlock(&pfdset->fd_mutex); } @@ -151,8 +155,12 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) pthread_mutex_lock(&pfdset->fd_mutex); i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; if (i == -1) { - pthread_mutex_unlock(&pfdset->fd_mutex); - return -2; + fdset_shrink_nolock(pfdset); + i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; + if (i == -1) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -2; + } } fdset_add_fd(pfdset, i, fd, rcb, wcb, dat); @@ -202,8 +210,8 @@ fdset_del(struct fdset *pfdset, int fd) * will wait until the flag is reset to zero(which indicates the callback is * finished), then it could free the context after fdset_del. */ -void -fdset_event_dispatch(struct fdset *pfdset) +void * +fdset_event_dispatch(void *arg) { int i; struct pollfd *pfd; @@ -213,9 +221,10 @@ fdset_event_dispatch(struct fdset *pfdset) int fd, numfds; int remove1, remove2; int need_shrink; + struct fdset *pfdset = arg; if (pfdset == NULL) - return; + return NULL; while (1) { @@ -286,4 +295,6 @@ fdset_event_dispatch(struct fdset *pfdset) if (need_shrink) fdset_shrink(pfdset); } + + return NULL; } diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index d319cac6..90d34db1 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -64,6 +64,6 @@ int fdset_add(struct fdset *pfdset, int fd, void *fdset_del(struct fdset *pfdset, int fd); -void fdset_event_dispatch(struct fdset *pfdset); +void *fdset_event_dispatch(void *arg); #endif diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h new file mode 100644 index 00000000..605e47cb --- /dev/null +++ b/lib/librte_vhost/rte_vhost.h @@ -0,0 +1,439 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VHOST_H_ +#define _RTE_VHOST_H_ + +/** + * @file + * Interface to vhost-user + */ + +#include <stdint.h> +#include <sys/eventfd.h> + +#include <rte_memory.h> +#include <rte_mempool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* These are not C++-aware. */ +#include <linux/vhost.h> +#include <linux/virtio_ring.h> + +#define RTE_VHOST_USER_CLIENT (1ULL << 0) +#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) +#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) + +/** + * Information relating to memory regions including offsets to + * addresses in QEMUs memory file. + */ +struct rte_vhost_mem_region { + uint64_t guest_phys_addr; + uint64_t guest_user_addr; + uint64_t host_user_addr; + uint64_t size; + void *mmap_addr; + uint64_t mmap_size; + int fd; +}; + +/** + * Memory structure includes region and mapping information. + */ +struct rte_vhost_memory { + uint32_t nregions; + struct rte_vhost_mem_region regions[]; +}; + +struct rte_vhost_vring { + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + uint64_t log_guest_addr; + + int callfd; + int kickfd; + uint16_t size; +}; + +/** + * Device and vring operations. + */ +struct vhost_device_ops { + int (*new_device)(int vid); /**< Add device. */ + void (*destroy_device)(int vid); /**< Remove device. */ + + int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ + + /** + * Features could be changed after the feature negotiation. + * For example, VHOST_F_LOG_ALL will be set/cleared at the + * start/end of live migration, respectively. This callback + * is used to inform the application on such change. + */ + int (*features_changed)(int vid, uint64_t features); + + void *reserved[4]; /**< Reserved for future extension */ +}; + +/** + * Convert guest physical address to host virtual address + * + * @param mem + * the guest memory regions + * @param gpa + * the guest physical address for querying + * @return + * the host virtual address on success, 0 on failure + */ +static inline uint64_t __attribute__((always_inline)) +rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) +{ + struct rte_vhost_mem_region *reg; + uint32_t i; + + for (i = 0; i < mem->nregions; i++) { + reg = &mem->regions[i]; + if (gpa >= reg->guest_phys_addr && + gpa < reg->guest_phys_addr + reg->size) { + return gpa - reg->guest_phys_addr + + reg->host_user_addr; + } + } + + return 0; +} + +#define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL)) + +/** + * Log the memory write start with given address. + * + * This function only need be invoked when the live migration starts. + * Therefore, we won't need call it at all in the most of time. For + * making the performance impact be minimum, it's suggested to do a + * check before calling it: + * + * if (unlikely(RTE_VHOST_NEED_LOG(features))) + * rte_vhost_log_write(vid, addr, len); + * + * @param vid + * vhost device ID + * @param addr + * the starting address for write + * @param len + * the length to write + */ +void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len); + +/** + * Log the used ring update start at given offset. + * + * Same as rte_vhost_log_write, it's suggested to do a check before + * calling it: + * + * if (unlikely(RTE_VHOST_NEED_LOG(features))) + * rte_vhost_log_used_vring(vid, vring_idx, offset, len); + * + * @param vid + * vhost device ID + * @param vring_idx + * the vring index + * @param offset + * the offset inside the used ring + * @param len + * the length to write + */ +void rte_vhost_log_used_vring(int vid, uint16_t vring_idx, + uint64_t offset, uint64_t len); + +int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable); + +/** + * Register vhost driver. path could be different for multiple + * instance support. + */ +int rte_vhost_driver_register(const char *path, uint64_t flags); + +/* Unregister vhost driver. This is only meaningful to vhost user. */ +int rte_vhost_driver_unregister(const char *path); + +/** + * Set the feature bits the vhost-user driver supports. + * + * @param path + * The vhost-user socket file path + * @param features + * Supported features + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_driver_set_features(const char *path, uint64_t features); + +/** + * Enable vhost-user driver features. + * + * Note that + * - the param features should be a subset of the feature bits provided + * by rte_vhost_driver_set_features(). + * - it must be invoked before vhost-user negotiation starts. + * + * @param path + * The vhost-user socket file path + * @param features + * Features to enable + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_driver_enable_features(const char *path, uint64_t features); + +/** + * Disable vhost-user driver features. + * + * The two notes at rte_vhost_driver_enable_features() also apply here. + * + * @param path + * The vhost-user socket file path + * @param features + * Features to disable + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_driver_disable_features(const char *path, uint64_t features); + +/** + * Get the feature bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param features + * A pointer to store the queried feature bits + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_driver_get_features(const char *path, uint64_t *features); + +/** + * Get the feature bits after negotiation + * + * @param vid + * Vhost device ID + * @param features + * A pointer to store the queried feature bits + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_get_negotiated_features(int vid, uint64_t *features); + +/* Register callbacks. */ +int rte_vhost_driver_callback_register(const char *path, + struct vhost_device_ops const * const ops); + +/** + * + * Start the vhost-user driver. + * + * This function triggers the vhost-user negotiation. + * + * @param path + * The vhost-user socket file path + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_driver_start(const char *path); + +/** + * Get the MTU value of the device if set in QEMU. + * + * @param vid + * virtio-net device ID + * @param mtu + * The variable to store the MTU value + * + * @return + * 0: success + * -EAGAIN: device not yet started + * -ENOTSUP: device does not support MTU feature + */ +int rte_vhost_get_mtu(int vid, uint16_t *mtu); + +/** + * Get the numa node from which the virtio net device's memory + * is allocated. + * + * @param vid + * vhost device ID + * + * @return + * The numa node, -1 on failure + */ +int rte_vhost_get_numa_node(int vid); + +/** + * @deprecated + * Get the number of queues the device supports. + * + * Note this function is deprecated, as it returns a queue pair number, + * which is vhost specific. Instead, rte_vhost_get_vring_num should + * be used. + * + * @param vid + * vhost device ID + * + * @return + * The number of queues, 0 on failure + */ +__rte_deprecated +uint32_t rte_vhost_get_queue_num(int vid); + +/** + * Get the number of vrings the device supports. + * + * @param vid + * vhost device ID + * + * @return + * The number of vrings, 0 on failure + */ +uint16_t rte_vhost_get_vring_num(int vid); + +/** + * Get the virtio net device's ifname, which is the vhost-user socket + * file path. + * + * @param vid + * vhost device ID + * @param buf + * The buffer to stored the queried ifname + * @param len + * The length of buf + * + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_get_ifname(int vid, char *buf, size_t len); + +/** + * Get how many avail entries are left in the queue + * + * @param vid + * vhost device ID + * @param queue_id + * virtio queue index + * + * @return + * num of avail entires left + */ +uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id); + +struct rte_mbuf; +struct rte_mempool; +/** + * This function adds buffers to the virtio devices RX virtqueue. Buffers can + * be received from the physical port or from another virtual device. A packet + * count is returned to indicate the number of packets that were succesfully + * added to the RX queue. + * @param vid + * vhost device ID + * @param queue_id + * virtio queue index in mq case + * @param pkts + * array to contain packets to be enqueued + * @param count + * packets num to be enqueued + * @return + * num of packets enqueued + */ +uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id, + struct rte_mbuf **pkts, uint16_t count); + +/** + * This function gets guest buffers from the virtio device TX virtqueue, + * construct host mbufs, copies guest buffer content to host mbufs and + * store them in pkts to be processed. + * @param vid + * vhost device ID + * @param queue_id + * virtio queue index in mq case + * @param mbuf_pool + * mbuf_pool where host mbuf is allocated. + * @param pkts + * array to contain packets to be dequeued + * @param count + * packets num to be dequeued + * @return + * num of packets dequeued + */ +uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count); + +/** + * Get guest mem table: a list of memory regions. + * + * An rte_vhost_vhost_memory object will be allocated internaly, to hold the + * guest memory regions. Application should free it at destroy_device() + * callback. + * + * @param vid + * vhost device ID + * @param mem + * To store the returned mem regions + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); + +/** + * Get guest vring info, including the vring address, vring size, etc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested vring info + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, + struct rte_vhost_vring *vring); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VHOST_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5ceaa8a5..07858732 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -4,12 +4,8 @@ DPDK_2.0 { rte_vhost_dequeue_burst; rte_vhost_driver_callback_register; rte_vhost_driver_register; - rte_vhost_driver_session_start; rte_vhost_enable_guest_notification; rte_vhost_enqueue_burst; - rte_vhost_feature_disable; - rte_vhost_feature_enable; - rte_vhost_feature_get; local: *; }; @@ -30,3 +26,22 @@ DPDK_16.07 { rte_vhost_get_queue_num; } DPDK_2.1; + +DPDK_17.05 { + global: + + rte_vhost_driver_disable_features; + rte_vhost_driver_enable_features; + rte_vhost_driver_get_features; + rte_vhost_driver_set_features; + rte_vhost_driver_start; + rte_vhost_get_mem_table; + rte_vhost_get_mtu; + rte_vhost_get_negotiated_features; + rte_vhost_get_vhost_vring; + rte_vhost_get_vring_num; + rte_vhost_gpa_to_vva; + rte_vhost_log_used_vring; + rte_vhost_log_write; + +} DPDK_16.07; diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h deleted file mode 100644 index 926039c5..00000000 --- a/lib/librte_vhost/rte_virtio_net.h +++ /dev/null @@ -1,193 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VIRTIO_NET_H_ -#define _VIRTIO_NET_H_ - -/** - * @file - * Interface to vhost net - */ - -#include <stdint.h> -#include <linux/vhost.h> -#include <linux/virtio_ring.h> -#include <linux/virtio_net.h> -#include <sys/eventfd.h> -#include <sys/socket.h> -#include <linux/if.h> - -#include <rte_memory.h> -#include <rte_mempool.h> -#include <rte_ether.h> - -#define RTE_VHOST_USER_CLIENT (1ULL << 0) -#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) -#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) - -/* Enum for virtqueue management. */ -enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; - -/** - * Device and vring operations. - */ -struct virtio_net_device_ops { - int (*new_device)(int vid); /**< Add device. */ - void (*destroy_device)(int vid); /**< Remove device. */ - - int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ - - void *reserved[5]; /**< Reserved for future extension */ -}; - -/** - * Disable features in feature_mask. Returns 0 on success. - */ -int rte_vhost_feature_disable(uint64_t feature_mask); - -/** - * Enable features in feature_mask. Returns 0 on success. - */ -int rte_vhost_feature_enable(uint64_t feature_mask); - -/* Returns currently supported vhost features */ -uint64_t rte_vhost_feature_get(void); - -int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable); - -/** - * Register vhost driver. path could be different for multiple - * instance support. - */ -int rte_vhost_driver_register(const char *path, uint64_t flags); - -/* Unregister vhost driver. This is only meaningful to vhost user. */ -int rte_vhost_driver_unregister(const char *path); - -/* Register callbacks. */ -int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const); -/* Start vhost driver session blocking loop. */ -int rte_vhost_driver_session_start(void); - -/** - * Get the numa node from which the virtio net device's memory - * is allocated. - * - * @param vid - * virtio-net device ID - * - * @return - * The numa node, -1 on failure - */ -int rte_vhost_get_numa_node(int vid); - -/** - * Get the number of queues the device supports. - * - * @param vid - * virtio-net device ID - * - * @return - * The number of queues, 0 on failure - */ -uint32_t rte_vhost_get_queue_num(int vid); - -/** - * Get the virtio net device's ifname, which is the vhost-user socket - * file path. - * - * @param vid - * virtio-net device ID - * @param buf - * The buffer to stored the queried ifname - * @param len - * The length of buf - * - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_get_ifname(int vid, char *buf, size_t len); - -/** - * Get how many avail entries are left in the queue - * - * @param vid - * virtio-net device ID - * @param queue_id - * virtio queue index - * - * @return - * num of avail entires left - */ -uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id); - -/** - * This function adds buffers to the virtio devices RX virtqueue. Buffers can - * be received from the physical port or from another virtual device. A packet - * count is returned to indicate the number of packets that were succesfully - * added to the RX queue. - * @param vid - * virtio-net device ID - * @param queue_id - * virtio queue index in mq case - * @param pkts - * array to contain packets to be enqueued - * @param count - * packets num to be enqueued - * @return - * num of packets enqueued - */ -uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count); - -/** - * This function gets guest buffers from the virtio device TX virtqueue, - * construct host mbufs, copies guest buffer content to host mbufs and - * store them in pkts to be processed. - * @param vid - * virtio-net device - * @param queue_id - * virtio queue index in mq case - * @param mbuf_pool - * mbuf_pool where host mbuf is allocated. - * @param pkts - * array to contain packets to be dequeued - * @param count - * packets num to be dequeued - * @return - * num of packets dequeued - */ -uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count); - -#endif /* _VIRTIO_NET_H_ */ diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index aaa9c270..c7f99b08 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -52,22 +52,42 @@ #include "vhost.h" #include "vhost_user.h" + +TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); + /* * Every time rte_vhost_driver_register() is invoked, an associated * vhost_user_socket struct will be created. */ struct vhost_user_socket { + struct vhost_user_connection_list conn_list; + pthread_mutex_t conn_mutex; char *path; - int listenfd; - int connfd; + int socket_fd; + struct sockaddr_un un; bool is_server; bool reconnect; bool dequeue_zero_copy; + + /* + * The "supported_features" indicates the feature bits the + * vhost driver supports. The "features" indicates the feature + * bits after the rte_vhost_driver_features_disable/enable(). + * It is also the final feature bits used for vhost-user + * features negotiation. + */ + uint64_t supported_features; + uint64_t features; + + struct vhost_device_ops const *notify_ops; }; struct vhost_user_connection { struct vhost_user_socket *vsocket; + int connfd; int vid; + + TAILQ_ENTRY(vhost_user_connection) next; }; #define MAX_VHOST_SOCKET 1024 @@ -82,7 +102,8 @@ struct vhost_user { static void vhost_user_server_new_connection(int fd, void *data, int *remove); static void vhost_user_read_cb(int fd, void *dat, int *remove); -static int vhost_user_create_client(struct vhost_user_socket *vsocket); +static int create_unix_socket(struct vhost_user_socket *vsocket); +static int vhost_user_start_client(struct vhost_user_socket *vsocket); static struct vhost_user vhost_user = { .fdset = { @@ -209,19 +230,24 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); - vsocket->connfd = fd; + conn->connfd = fd; conn->vsocket = vsocket; conn->vid = vid; ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, NULL, conn); if (ret < 0) { - vsocket->connfd = -1; + conn->connfd = -1; free(conn); close(fd); RTE_LOG(ERR, VHOST_CONFIG, "failed to add fd %d into vhost server fdset\n", fd); + return; } + + pthread_mutex_lock(&vsocket->conn_mutex); + TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); + pthread_mutex_unlock(&vsocket->conn_mutex); } /* call back when there is new vhost-user connection from client */ @@ -247,29 +273,36 @@ vhost_user_read_cb(int connfd, void *dat, int *remove) ret = vhost_user_msg_handler(conn->vid, connfd); if (ret < 0) { - vsocket->connfd = -1; close(connfd); *remove = 1; vhost_destroy_device(conn->vid); + + pthread_mutex_lock(&vsocket->conn_mutex); + TAILQ_REMOVE(&vsocket->conn_list, conn, next); + pthread_mutex_unlock(&vsocket->conn_mutex); + free(conn); - if (vsocket->reconnect) - vhost_user_create_client(vsocket); + if (vsocket->reconnect) { + create_unix_socket(vsocket); + vhost_user_start_client(vsocket); + } } } static int -create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server) +create_unix_socket(struct vhost_user_socket *vsocket) { int fd; + struct sockaddr_un *un = &vsocket->un; fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) return -1; RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", - is_server ? "server" : "client", fd); + vsocket->is_server ? "server" : "client", fd); - if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { + if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { RTE_LOG(ERR, VHOST_CONFIG, "vhost-user: can't set nonblocking mode for socket, fd: " "%d (%s)\n", fd, strerror(errno)); @@ -279,25 +312,21 @@ create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server) memset(un, 0, sizeof(*un)); un->sun_family = AF_UNIX; - strncpy(un->sun_path, path, sizeof(un->sun_path)); + strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); un->sun_path[sizeof(un->sun_path) - 1] = '\0'; - return fd; + vsocket->socket_fd = fd; + return 0; } static int -vhost_user_create_server(struct vhost_user_socket *vsocket) +vhost_user_start_server(struct vhost_user_socket *vsocket) { - int fd; int ret; - struct sockaddr_un un; + int fd = vsocket->socket_fd; const char *path = vsocket->path; - fd = create_unix_socket(path, &un, vsocket->is_server); - if (fd < 0) - return -1; - - ret = bind(fd, (struct sockaddr *)&un, sizeof(un)); + ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to bind to %s: %s; remove it and try again\n", @@ -310,7 +339,6 @@ vhost_user_create_server(struct vhost_user_socket *vsocket) if (ret < 0) goto err; - vsocket->listenfd = fd; ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, NULL, vsocket); if (ret < 0) { @@ -429,26 +457,21 @@ vhost_user_reconnect_init(void) } static int -vhost_user_create_client(struct vhost_user_socket *vsocket) +vhost_user_start_client(struct vhost_user_socket *vsocket) { - int fd; int ret; - struct sockaddr_un un; + int fd = vsocket->socket_fd; const char *path = vsocket->path; struct vhost_user_reconnect *reconn; - fd = create_unix_socket(path, &un, vsocket->is_server); - if (fd < 0) - return -1; - - ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un, - sizeof(un)); + ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un, + sizeof(vsocket->un)); if (ret == 0) { vhost_user_add_connection(fd, vsocket); return 0; } - RTE_LOG(ERR, VHOST_CONFIG, + RTE_LOG(WARNING, VHOST_CONFIG, "failed to connect to %s: %s\n", path, strerror(errno)); @@ -457,7 +480,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) return -1; } - RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path); + RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path); reconn = malloc(sizeof(*reconn)); if (reconn == NULL) { RTE_LOG(ERR, VHOST_CONFIG, @@ -465,7 +488,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) close(fd); return -1; } - reconn->un = un; + reconn->un = vsocket->un; reconn->fd = fd; reconn->vsocket = vsocket; pthread_mutex_lock(&reconn_list.mutex); @@ -475,6 +498,94 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) return 0; } +static struct vhost_user_socket * +find_vhost_user_socket(const char *path) +{ + int i; + + for (i = 0; i < vhost_user.vsocket_cnt; i++) { + struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; + + if (!strcmp(vsocket->path, path)) + return vsocket; + } + + return NULL; +} + +int +rte_vhost_driver_disable_features(const char *path, uint64_t features) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + vsocket->features &= ~features; + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_enable_features(const char *path, uint64_t features) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) { + if ((vsocket->supported_features & features) != features) { + /* + * trying to enable features the driver doesn't + * support. + */ + pthread_mutex_unlock(&vhost_user.mutex); + return -1; + } + vsocket->features |= features; + } + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_set_features(const char *path, uint64_t features) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) { + vsocket->supported_features = features; + vsocket->features = features; + } + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_get_features(const char *path, uint64_t *features) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + *features = vsocket->features; + pthread_mutex_unlock(&vhost_user.mutex); + + if (!vsocket) { + RTE_LOG(ERR, VHOST_CONFIG, + "socket file %s is not registered yet.\n", path); + return -1; + } else { + return 0; + } +} + /* * Register a new vhost-user socket; here we could act as server * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag @@ -502,9 +613,25 @@ rte_vhost_driver_register(const char *path, uint64_t flags) goto out; memset(vsocket, 0, sizeof(struct vhost_user_socket)); vsocket->path = strdup(path); - vsocket->connfd = -1; + TAILQ_INIT(&vsocket->conn_list); + pthread_mutex_init(&vsocket->conn_mutex, NULL); vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; + /* + * Set the supported features correctly for the builtin vhost-user + * net driver. + * + * Applications know nothing about features the builtin virtio net + * driver (virtio_net.c) supports, thus it's not possible for them + * to invoke rte_vhost_driver_set_features(). To workaround it, here + * we set it unconditionally. If the application want to implement + * another vhost-user driver (say SCSI), it should call the + * rte_vhost_driver_set_features(), which will overwrite following + * two values. + */ + vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; + vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; + if ((flags & RTE_VHOST_USER_CLIENT) != 0) { vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); if (vsocket->reconnect && reconn_tid == 0) { @@ -514,11 +641,10 @@ rte_vhost_driver_register(const char *path, uint64_t flags) goto out; } } - ret = vhost_user_create_client(vsocket); } else { vsocket->is_server = true; - ret = vhost_user_create_server(vsocket); } + ret = create_unix_socket(vsocket); if (ret < 0) { free(vsocket->path); free(vsocket); @@ -565,7 +691,7 @@ rte_vhost_driver_unregister(const char *path) { int i; int count; - struct vhost_user_connection *conn; + struct vhost_user_connection *conn, *next; pthread_mutex_lock(&vhost_user.mutex); @@ -574,22 +700,29 @@ rte_vhost_driver_unregister(const char *path) if (!strcmp(vsocket->path, path)) { if (vsocket->is_server) { - fdset_del(&vhost_user.fdset, vsocket->listenfd); - close(vsocket->listenfd); + fdset_del(&vhost_user.fdset, vsocket->socket_fd); + close(vsocket->socket_fd); unlink(path); } else if (vsocket->reconnect) { vhost_user_remove_reconnect(vsocket); } - conn = fdset_del(&vhost_user.fdset, vsocket->connfd); - if (conn) { + pthread_mutex_lock(&vsocket->conn_mutex); + for (conn = TAILQ_FIRST(&vsocket->conn_list); + conn != NULL; + conn = next) { + next = TAILQ_NEXT(conn, next); + + fdset_del(&vhost_user.fdset, conn->connfd); RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", - vsocket->connfd, path); - close(vsocket->connfd); + conn->connfd, path); + close(conn->connfd); vhost_destroy_device(conn->vid); + TAILQ_REMOVE(&vsocket->conn_list, conn, next); free(conn); } + pthread_mutex_unlock(&vsocket->conn_mutex); free(vsocket->path); free(vsocket); @@ -607,9 +740,59 @@ rte_vhost_driver_unregister(const char *path) return -1; } +/* + * Register ops so that we can add/remove device to data core. + */ +int +rte_vhost_driver_callback_register(const char *path, + struct vhost_device_ops const * const ops) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + vsocket->notify_ops = ops; + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +struct vhost_device_ops const * +vhost_driver_callback_get(const char *path) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? vsocket->notify_ops : NULL; +} + int -rte_vhost_driver_session_start(void) +rte_vhost_driver_start(const char *path) { - fdset_event_dispatch(&vhost_user.fdset); - return 0; + struct vhost_user_socket *vsocket; + static pthread_t fdset_tid; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + pthread_mutex_unlock(&vhost_user.mutex); + + if (!vsocket) + return -1; + + if (fdset_tid == 0) { + int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch, + &vhost_user.fdset); + if (ret < 0) + RTE_LOG(ERR, VHOST_CONFIG, + "failed to create fdset handling thread"); + } + + if (vsocket->is_server) + return vhost_user_start_server(vsocket); + else + return vhost_user_start_client(vsocket); } diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index e4150934..0b19d2eb 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -45,36 +45,12 @@ #include <rte_string_fns.h> #include <rte_memory.h> #include <rte_malloc.h> -#include <rte_virtio_net.h> +#include <rte_vhost.h> #include "vhost.h" -#define VHOST_USER_F_PROTOCOL_FEATURES 30 - -/* Features supported by this lib. */ -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ - (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ - (1ULL << VIRTIO_NET_F_CTRL_RX) | \ - (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ - (VHOST_SUPPORTS_MQ) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VHOST_F_LOG_ALL) | \ - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO4) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ - (1ULL << VIRTIO_NET_F_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ - (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) - -uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES; - struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; -/* device ops to add/remove device to/from data core. */ -struct virtio_net_device_ops const *notify_ops; - struct virtio_net * get_device(int vid) { @@ -108,10 +84,8 @@ cleanup_device(struct virtio_net *dev, int destroy) vhost_backend_cleanup(dev); - for (i = 0; i < dev->virt_qp_nb; i++) { - cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy); - cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy); - } + for (i = 0; i < dev->nr_vring; i++) + cleanup_vq(dev->virtqueue[i], destroy); } /* @@ -121,24 +95,21 @@ static void free_device(struct virtio_net *dev) { uint32_t i; - struct vhost_virtqueue *rxq, *txq; + struct vhost_virtqueue *vq; - for (i = 0; i < dev->virt_qp_nb; i++) { - rxq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; - txq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; + for (i = 0; i < dev->nr_vring; i++) { + vq = dev->virtqueue[i]; - rte_free(rxq->shadow_used_ring); - rte_free(txq->shadow_used_ring); + rte_free(vq->shadow_used_ring); - /* rxq and txq are allocated together as queue-pair */ - rte_free(rxq); + rte_free(vq); } rte_free(dev); } static void -init_vring_queue(struct vhost_virtqueue *vq, int qp_idx) +init_vring_queue(struct vhost_virtqueue *vq) { memset(vq, 0, sizeof(struct vhost_virtqueue)); @@ -148,69 +119,48 @@ init_vring_queue(struct vhost_virtqueue *vq, int qp_idx) /* Backends are set to -1 indicating an inactive device. */ vq->backend = -1; - /* always set the default vq pair to enabled */ - if (qp_idx == 0) - vq->enabled = 1; + /* + * always set the vq to enabled; this is to keep compatibility + * with the old QEMU, whereas there is no SET_VRING_ENABLE message. + */ + vq->enabled = 1; TAILQ_INIT(&vq->zmbuf_list); } static void -init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) -{ - uint32_t base_idx = qp_idx * VIRTIO_QNUM; - - init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); - init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); -} - -static void -reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx) +reset_vring_queue(struct vhost_virtqueue *vq) { int callfd; callfd = vq->callfd; - init_vring_queue(vq, qp_idx); + init_vring_queue(vq); vq->callfd = callfd; } -static void -reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) -{ - uint32_t base_idx = qp_idx * VIRTIO_QNUM; - - reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); - reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); -} - int -alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) { - struct vhost_virtqueue *virtqueue = NULL; - uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ; - uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ; + struct vhost_virtqueue *vq; - virtqueue = rte_malloc(NULL, - sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0); - if (virtqueue == NULL) { + vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0); + if (vq == NULL) { RTE_LOG(ERR, VHOST_CONFIG, - "Failed to allocate memory for virt qp:%d.\n", qp_idx); + "Failed to allocate memory for vring:%u.\n", vring_idx); return -1; } - dev->virtqueue[virt_rx_q_idx] = virtqueue; - dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ; - - init_vring_queue_pair(dev, qp_idx); + dev->virtqueue[vring_idx] = vq; + init_vring_queue(vq); - dev->virt_qp_nb += 1; + dev->nr_vring += 1; return 0; } /* * Reset some variables in device structure, while keeping few - * others untouched, such as vid, ifname, virt_qp_nb: they + * others untouched, such as vid, ifname, nr_vring: they * should be same unless the device is removed. */ void @@ -222,8 +172,8 @@ reset_device(struct virtio_net *dev) dev->protocol_features = 0; dev->flags = 0; - for (i = 0; i < dev->virt_qp_nb; i++) - reset_vring_queue_pair(dev, i); + for (i = 0; i < dev->nr_vring; i++) + reset_vring_queue(dev->virtqueue[i]); } /* @@ -274,7 +224,7 @@ vhost_destroy_device(int vid) if (dev->flags & VIRTIO_DEV_RUNNING) { dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(vid); + dev->notify_ops->destroy_device(vid); } cleanup_device(dev, 1); @@ -312,6 +262,25 @@ vhost_enable_dequeue_zero_copy(int vid) } int +rte_vhost_get_mtu(int vid, uint16_t *mtu) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -ENODEV; + + if (!(dev->flags & VIRTIO_DEV_READY)) + return -EAGAIN; + + if (!(dev->features & VIRTIO_NET_F_MTU)) + return -ENOTSUP; + + *mtu = dev->mtu; + + return 0; +} + +int rte_vhost_get_numa_node(int vid) { #ifdef RTE_LIBRTE_VHOST_NUMA @@ -345,7 +314,18 @@ rte_vhost_get_queue_num(int vid) if (dev == NULL) return 0; - return dev->virt_qp_nb; + return dev->nr_vring / 2; +} + +uint16_t +rte_vhost_get_vring_num(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return 0; + + return dev->nr_vring; } int @@ -364,6 +344,72 @@ rte_vhost_get_ifname(int vid, char *buf, size_t len) return 0; } +int +rte_vhost_get_negotiated_features(int vid, uint64_t *features) +{ + struct virtio_net *dev; + + dev = get_device(vid); + if (!dev) + return -1; + + *features = dev->features; + return 0; +} + +int +rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) +{ + struct virtio_net *dev; + struct rte_vhost_memory *m; + size_t size; + + dev = get_device(vid); + if (!dev) + return -1; + + size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region); + m = malloc(size); + if (!m) + return -1; + + m->nregions = dev->mem->nregions; + memcpy(m->regions, dev->mem->regions, size); + *mem = m; + + return 0; +} + +int +rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, + struct rte_vhost_vring *vring) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (!dev) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + vring->desc = vq->desc; + vring->avail = vq->avail; + vring->used = vq->used; + vring->log_guest_addr = vq->log_guest_addr; + + vring->callfd = vq->callfd; + vring->kickfd = vq->kickfd; + vring->size = vq->size; + + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { @@ -399,33 +445,33 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) return 0; } -uint64_t rte_vhost_feature_get(void) +void +rte_vhost_log_write(int vid, uint64_t addr, uint64_t len) { - return VHOST_FEATURES; -} + struct virtio_net *dev = get_device(vid); -int rte_vhost_feature_disable(uint64_t feature_mask) -{ - VHOST_FEATURES = VHOST_FEATURES & ~feature_mask; - return 0; -} + if (dev == NULL) + return; -int rte_vhost_feature_enable(uint64_t feature_mask) -{ - if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) { - VHOST_FEATURES = VHOST_FEATURES | feature_mask; - return 0; - } - return -1; + vhost_log_write(dev, addr, len); } -/* - * Register ops so that we can add/remove device to data core. - */ -int -rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops) +void +rte_vhost_log_used_vring(int vid, uint16_t vring_idx, + uint64_t offset, uint64_t len) { - notify_ops = ops; + struct virtio_net *dev; + struct vhost_virtqueue *vq; - return 0; + dev = get_device(vid); + if (dev == NULL) + return; + + if (vring_idx >= VHOST_MAX_VRING) + return; + vq = dev->virtqueue[vring_idx]; + if (!vq) + return; + + vhost_log_used_vring(dev, vq, offset, len); } diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 22564f1c..ddd8a9c4 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -39,13 +39,19 @@ #include <sys/queue.h> #include <unistd.h> #include <linux/vhost.h> +#include <linux/virtio_net.h> +#include <sys/socket.h> +#include <linux/if.h> #include <rte_log.h> +#include <rte_ether.h> -#include "rte_virtio_net.h" +#include "rte_vhost.h" /* Used to indicate that the device is running on a data core */ #define VIRTIO_DEV_RUNNING 1 +/* Used to indicate that the device is ready to operate */ +#define VIRTIO_DEV_READY 2 /* Backend value set by guest. */ #define VIRTIO_DEV_STOPPED -1 @@ -110,24 +116,20 @@ struct vhost_virtqueue { uint16_t shadow_used_idx; } __rte_cache_aligned; -/* Old kernels have no such macro defined */ +/* Old kernels have no such macros defined */ #ifndef VIRTIO_NET_F_GUEST_ANNOUNCE #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 #endif +#ifndef VIRTIO_NET_F_MQ + #define VIRTIO_NET_F_MQ 22 +#endif -/* - * Make an extra wrapper for VIRTIO_NET_F_MQ and - * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are - * introduced since kernel v3.8. This makes our - * code buildable for older kernel. - */ -#ifdef VIRTIO_NET_F_MQ - #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX - #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) -#else - #define VHOST_MAX_QUEUE_PAIRS 1 - #define VHOST_SUPPORTS_MQ 0 +#define VHOST_MAX_VRING 0x100 +#define VHOST_MAX_QUEUE_PAIRS 0x80 + +#ifndef VIRTIO_NET_F_MTU + #define VIRTIO_NET_F_MTU 3 #endif /* @@ -137,6 +139,27 @@ struct vhost_virtqueue { #define VIRTIO_F_VERSION_1 32 #endif +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + +/* Features supported by this builtin vhost-user net driver. */ +#define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ + (1ULL << VIRTIO_NET_F_CTRL_RX) | \ + (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + (1ULL << VIRTIO_NET_F_MQ) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VHOST_F_LOG_ALL) | \ + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ + (1ULL << VIRTIO_NET_F_MTU)) + + struct guest_page { uint64_t guest_phys_addr; uint64_t host_phys_addr; @@ -149,7 +172,7 @@ struct guest_page { */ struct virtio_net { /* Frontend (QEMU) memory and memory region information */ - struct virtio_memory *mem; + struct rte_vhost_memory *mem; uint64_t features; uint64_t protocol_features; int vid; @@ -157,7 +180,7 @@ struct virtio_net { uint16_t vhost_hlen; /* to tell if we need broadcast rarp packet */ rte_atomic16_t broadcast_rarp; - uint32_t virt_qp_nb; + uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) @@ -166,35 +189,52 @@ struct virtio_net { uint64_t log_base; uint64_t log_addr; struct ether_addr mac; + uint16_t mtu; + + struct vhost_device_ops const *notify_ops; uint32_t nr_guest_pages; uint32_t max_guest_pages; struct guest_page *guest_pages; } __rte_cache_aligned; -/** - * Information relating to memory regions including offsets to - * addresses in QEMUs memory file. - */ -struct virtio_memory_region { - uint64_t guest_phys_addr; - uint64_t guest_user_addr; - uint64_t host_user_addr; - uint64_t size; - void *mmap_addr; - uint64_t mmap_size; - int fd; -}; +#define VHOST_LOG_PAGE 4096 -/** - * Memory structure includes region and mapping information. - */ -struct virtio_memory { - uint32_t nregions; - struct virtio_memory_region regions[0]; -}; +static inline void __attribute__((always_inline)) +vhost_log_page(uint8_t *log_base, uint64_t page) +{ + log_base[page / 8] |= 1 << (page % 8); +} + +static inline void __attribute__((always_inline)) +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + /* To make sure guest memory updates are committed before logging */ + rte_smp_wmb(); + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + page += 1; + } +} +static inline void __attribute__((always_inline)) +vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_write(dev, vq->log_guest_addr + offset, len); +} /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 @@ -231,25 +271,6 @@ extern uint64_t VHOST_FEATURES; #define MAX_VHOST_DEVICE 1024 extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; -/* Convert guest physical Address to host virtual address */ -static inline uint64_t __attribute__((always_inline)) -gpa_to_vva(struct virtio_net *dev, uint64_t gpa) -{ - struct virtio_memory_region *reg; - uint32_t i; - - for (i = 0; i < dev->mem->nregions; i++) { - reg = &dev->mem->regions[i]; - if (gpa >= reg->guest_phys_addr && - gpa < reg->guest_phys_addr + reg->size) { - return gpa - reg->guest_phys_addr + - reg->host_user_addr; - } - } - - return 0; -} - /* Convert guest physical address to host physical address */ static inline phys_addr_t __attribute__((always_inline)) gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) @@ -270,7 +291,6 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) return 0; } -struct virtio_net_device_ops const *notify_ops; struct virtio_net *get_device(int vid); int vhost_new_device(void); @@ -278,11 +298,13 @@ void cleanup_device(struct virtio_net *dev, int destroy); void reset_device(struct virtio_net *dev); void vhost_destroy_device(int); -int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx); +int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); void vhost_set_ifname(int, const char *if_name, unsigned int if_len); void vhost_enable_dequeue_zero_copy(int vid); +struct vhost_device_ops const *vhost_driver_callback_get(const char *path); + /* * Backend-specific cleanup. * diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 0cb1c677..5c8058b6 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -51,6 +51,9 @@ #include "vhost.h" #include "vhost_user.h" +#define VIRTIO_MIN_MTU 68 +#define VIRTIO_MAX_MTU 65535 + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -72,6 +75,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", + [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", }; static uint64_t @@ -88,7 +92,7 @@ static void free_mem_region(struct virtio_net *dev) { uint32_t i; - struct virtio_memory_region *reg; + struct rte_vhost_mem_region *reg; if (!dev || !dev->mem) return; @@ -131,7 +135,7 @@ vhost_user_reset_owner(struct virtio_net *dev) { if (dev->flags & VIRTIO_DEV_RUNNING) { dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(dev->vid); + dev->notify_ops->destroy_device(dev->vid); } cleanup_device(dev, 0); @@ -143,9 +147,12 @@ vhost_user_reset_owner(struct virtio_net *dev) * The features that we support are requested. */ static uint64_t -vhost_user_get_features(void) +vhost_user_get_features(struct virtio_net *dev) { - return VHOST_FEATURES; + uint64_t features = 0; + + rte_vhost_driver_get_features(dev->ifname, &features); + return features; } /* @@ -154,9 +161,17 @@ vhost_user_get_features(void) static int vhost_user_set_features(struct virtio_net *dev, uint64_t features) { - if (features & ~VHOST_FEATURES) + uint64_t vhost_features = 0; + + rte_vhost_driver_get_features(dev->ifname, &vhost_features); + if (features & ~vhost_features) return -1; + if ((dev->flags & VIRTIO_DEV_RUNNING) && dev->features != features) { + if (dev->notify_ops->features_changed) + dev->notify_ops->features_changed(dev->vid, features); + } + dev->features = features; if (dev->features & ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) { @@ -223,12 +238,7 @@ numa_realloc(struct virtio_net *dev, int index) struct vhost_virtqueue *old_vq, *vq; int ret; - /* - * vq is allocated on pairs, we should try to do realloc - * on first queue of one queue pair only. - */ - if (index % VIRTIO_QNUM != 0) - return dev; + enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; old_dev = dev; vq = old_vq = dev->virtqueue[index]; @@ -247,8 +257,7 @@ numa_realloc(struct virtio_net *dev, int index) if (oldnode != newnode) { RTE_LOG(INFO, VHOST_CONFIG, "reallocate vq from %d to %d node\n", oldnode, newnode); - vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0, - newnode); + vq = rte_malloc_socket(NULL, sizeof(*vq), 0, newnode); if (!vq) return dev; @@ -280,7 +289,6 @@ numa_realloc(struct virtio_net *dev, int index) out: dev->virtqueue[index] = vq; - dev->virtqueue[index + 1] = vq + 1; vhost_devices[dev->vid] = dev; return dev; @@ -300,7 +308,7 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused) static uint64_t qva_to_vva(struct virtio_net *dev, uint64_t qva) { - struct virtio_memory_region *reg; + struct rte_vhost_mem_region *reg; uint32_t i; /* Find the region where the address lives. */ @@ -428,7 +436,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, } static void -add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg, +add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, uint64_t page_size) { uint64_t reg_size = reg->size; @@ -488,7 +496,7 @@ static int vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) { struct VhostUserMemory memory = pmsg->payload.memory; - struct virtio_memory_region *reg; + struct rte_vhost_mem_region *reg; void *mmap_addr; uint64_t mmap_size; uint64_t mmap_offset; @@ -496,12 +504,6 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) uint32_t i; int fd; - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(dev->vid); - } - if (dev->mem) { free_mem_region(dev); rte_free(dev->mem); @@ -515,8 +517,8 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) sizeof(struct guest_page)); } - dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) + - sizeof(struct virtio_memory_region) * memory.nregions, 0); + dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) + + sizeof(struct rte_vhost_mem_region) * memory.nregions, 0); if (dev->mem == NULL) { RTE_LOG(ERR, VHOST_CONFIG, "(%d) failed to allocate memory for dev->mem\n", @@ -611,18 +613,17 @@ vq_is_ready(struct vhost_virtqueue *vq) static int virtio_is_ready(struct virtio_net *dev) { - struct vhost_virtqueue *rvq, *tvq; + struct vhost_virtqueue *vq; uint32_t i; - for (i = 0; i < dev->virt_qp_nb; i++) { - rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; - tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; + if (dev->nr_vring == 0) + return 0; - if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) { - RTE_LOG(INFO, VHOST_CONFIG, - "virtio is not ready for processing.\n"); + for (i = 0; i < dev->nr_vring; i++) { + vq = dev->virtqueue[i]; + + if (!vq_is_ready(vq)) return 0; - } } RTE_LOG(INFO, VHOST_CONFIG, @@ -635,7 +636,6 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg) { struct vhost_vring_file file; struct vhost_virtqueue *vq; - uint32_t cur_qp_idx; file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) @@ -645,29 +645,13 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg) RTE_LOG(INFO, VHOST_CONFIG, "vring call idx:%d file:%d\n", file.index, file.fd); - /* - * FIXME: VHOST_SET_VRING_CALL is the first per-vring message - * we get, so we do vring queue pair allocation here. - */ - cur_qp_idx = file.index / VIRTIO_QNUM; - if (cur_qp_idx + 1 > dev->virt_qp_nb) { - if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0) - return; - } - vq = dev->virtqueue[file.index]; - assert(vq != NULL); - if (vq->callfd >= 0) close(vq->callfd); vq->callfd = file.fd; } -/* - * In vhost-user, when we receive kick message, will test whether virtio - * device is ready for packet processing. - */ static void vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg) { @@ -686,16 +670,6 @@ vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg) if (vq->kickfd >= 0) close(vq->kickfd); vq->kickfd = file.fd; - - if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) { - if (dev->dequeue_zero_copy) { - RTE_LOG(INFO, VHOST_CONFIG, - "dequeue zero copy is enabled\n"); - } - - if (notify_ops->new_device(dev->vid) == 0) - dev->flags |= VIRTIO_DEV_RUNNING; - } } static void @@ -726,9 +700,11 @@ vhost_user_get_vring_base(struct virtio_net *dev, /* We have to stop the queue (virtio) if it is running. */ if (dev->flags & VIRTIO_DEV_RUNNING) { dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(dev->vid); + dev->notify_ops->destroy_device(dev->vid); } + dev->flags &= ~VIRTIO_DEV_READY; + /* Here we are safe to get the last used index */ state->num = vq->last_used_idx; @@ -766,8 +742,8 @@ vhost_user_set_vring_enable(struct virtio_net *dev, "set queue enable: %d to qp idx: %d\n", enable, state->index); - if (notify_ops->vring_state_changed) - notify_ops->vring_state_changed(dev->vid, state->index, enable); + if (dev->notify_ops->vring_state_changed) + dev->notify_ops->vring_state_changed(dev->vid, state->index, enable); dev->virtqueue[state->index]->enabled = enable; @@ -865,6 +841,22 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) return 0; } +static int +vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg) +{ + if (msg->payload.u64 < VIRTIO_MIN_MTU || + msg->payload.u64 > VIRTIO_MAX_MTU) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid MTU size (%"PRIu64")\n", + msg->payload.u64); + + return -1; + } + + dev->mtu = msg->payload.u64; + + return 0; +} + /* return bytes# of read on success or negative val on failure. */ static int read_vhost_message(int sockfd, struct VhostUserMsg *msg) @@ -904,6 +896,7 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg) return 0; msg->flags &= ~VHOST_USER_VERSION_MASK; + msg->flags &= ~VHOST_USER_NEED_REPLY; msg->flags |= VHOST_USER_VERSION; msg->flags |= VHOST_USER_REPLY_MASK; @@ -913,6 +906,44 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg) return ret; } +/* + * Allocate a queue pair if it hasn't been allocated yet + */ +static int +vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg) +{ + uint16_t vring_idx; + + switch (msg->request) { + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + case VHOST_USER_SET_VRING_ERR: + vring_idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + break; + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: + vring_idx = msg->payload.state.index; + break; + case VHOST_USER_SET_VRING_ADDR: + vring_idx = msg->payload.addr.index; + break; + default: + return 0; + } + + if (vring_idx >= VHOST_MAX_VRING) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid vring index: %u\n", vring_idx); + return -1; + } + + if (dev->virtqueue[vring_idx]) + return 0; + + return alloc_vring_queue(dev, vring_idx); +} + int vhost_user_msg_handler(int vid, int fd) { @@ -924,6 +955,16 @@ vhost_user_msg_handler(int vid, int fd) if (dev == NULL) return -1; + if (!dev->notify_ops) { + dev->notify_ops = vhost_driver_callback_get(dev->ifname); + if (!dev->notify_ops) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get callback ops for driver %s\n", + dev->ifname); + return -1; + } + } + ret = read_vhost_message(fd, &msg); if (ret <= 0 || msg.request >= VHOST_USER_MAX) { if (ret < 0) @@ -939,11 +980,20 @@ vhost_user_msg_handler(int vid, int fd) return -1; } + ret = 0; RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", vhost_message_str[msg.request]); + + ret = vhost_user_check_and_alloc_queue_pair(dev, &msg); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to alloc queue\n"); + return -1; + } + switch (msg.request) { case VHOST_USER_GET_FEATURES: - msg.payload.u64 = vhost_user_get_features(); + msg.payload.u64 = vhost_user_get_features(dev); msg.size = sizeof(msg.payload.u64); send_vhost_message(fd, &msg); break; @@ -968,7 +1018,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_SET_MEM_TABLE: - vhost_user_set_mem_table(dev, &msg); + ret = vhost_user_set_mem_table(dev, &msg); break; case VHOST_USER_SET_LOG_BASE: @@ -994,7 +1044,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_GET_VRING_BASE: - ret = vhost_user_get_vring_base(dev, &msg.payload.state); + vhost_user_get_vring_base(dev, &msg.payload.state); msg.size = sizeof(msg.payload.state); send_vhost_message(fd, &msg); break; @@ -1025,10 +1075,35 @@ vhost_user_msg_handler(int vid, int fd) vhost_user_send_rarp(dev, &msg); break; + case VHOST_USER_NET_SET_MTU: + ret = vhost_user_net_set_mtu(dev, &msg); + break; + default: + ret = -1; break; } + if (msg.flags & VHOST_USER_NEED_REPLY) { + msg.payload.u64 = !!ret; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(fd, &msg); + } + + if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) { + dev->flags |= VIRTIO_DEV_READY; + + if (!(dev->flags & VIRTIO_DEV_RUNNING)) { + if (dev->dequeue_zero_copy) { + RTE_LOG(INFO, VHOST_CONFIG, + "dequeue zero copy is enabled\n"); + } + + if (dev->notify_ops->new_device(dev->vid) == 0) + dev->flags |= VIRTIO_DEV_RUNNING; + } + } + return 0; } diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index ba78d326..35ebd719 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -37,7 +37,7 @@ #include <stdint.h> #include <linux/vhost.h> -#include "rte_virtio_net.h" +#include "rte_vhost.h" /* refer to hw/virtio/vhost-user.c */ @@ -46,10 +46,18 @@ #define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 #define VHOST_USER_PROTOCOL_F_RARP 2 +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#define VHOST_USER_PROTOCOL_F_NET_MTU 4 +/* + * disable REPLY_ACK feature to workaround the buggy QEMU implementation. + * Proved buggy QEMU includes v2.7 - v2.9. + */ #define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ - (1ULL << VHOST_USER_PROTOCOL_F_RARP)) + (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \ + (0ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ + (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU)) typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -72,6 +80,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_QUEUE_NUM = 17, VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SET_MTU = 20, VHOST_USER_MAX } VhostUserRequest; @@ -98,6 +107,7 @@ typedef struct VhostUserMsg { #define VHOST_USER_VERSION_MASK 0x3 #define VHOST_USER_REPLY_MASK (0x1 << 2) +#define VHOST_USER_NEED_REPLY (0x1 << 3) uint32_t flags; uint32_t size; /* the following payload size */ union { diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 337470d6..48219e05 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -39,7 +39,7 @@ #include <rte_memcpy.h> #include <rte_ether.h> #include <rte_ip.h> -#include <rte_virtio_net.h> +#include <rte_vhost.h> #include <rte_tcp.h> #include <rte_udp.h> #include <rte_sctp.h> @@ -48,47 +48,11 @@ #include "vhost.h" #define MAX_PKT_BURST 32 -#define VHOST_LOG_PAGE 4096 - -static inline void __attribute__((always_inline)) -vhost_log_page(uint8_t *log_base, uint64_t page) -{ - log_base[page / 8] |= 1 << (page % 8); -} - -static inline void __attribute__((always_inline)) -vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) -{ - uint64_t page; - - if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || - !dev->log_base || !len)) - return; - - if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) - return; - - /* To make sure guest memory updates are committed before logging */ - rte_smp_wmb(); - - page = addr / VHOST_LOG_PAGE; - while (page * VHOST_LOG_PAGE < addr + len) { - vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); - page += 1; - } -} - -static inline void __attribute__((always_inline)) -vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t offset, uint64_t len) -{ - vhost_log_write(dev, vq->log_guest_addr + offset, len); -} static bool -is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) +is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) { - return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; + return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; } static inline void __attribute__((always_inline)) @@ -141,6 +105,12 @@ update_shadow_used_ring(struct vhost_virtqueue *vq, vq->shadow_used_ring[i].len = len; } +/* avoid write operation when necessary, to lessen cache issues */ +#define ASSIGN_UNLESS_EQUAL(var, val) do { \ + if ((var) != (val)) \ + (var) = (val); \ +} while (0) + static void virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { @@ -162,6 +132,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) cksum)); break; } + } else { + ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); + ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); + ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); } if (m_buf->ol_flags & PKT_TX_TCP_SEG) { @@ -172,19 +146,13 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) net_hdr->gso_size = m_buf->tso_segsz; net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + m_buf->l4_len; + } else { + ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); + ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); + ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); } } -static inline void -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, - struct virtio_net_hdr_mrg_rxbuf hdr) -{ - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; - else - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; -} - static inline int __attribute__((always_inline)) copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, struct rte_mbuf *m, uint16_t desc_idx, uint32_t size) @@ -194,12 +162,11 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, uint32_t cpy_len; struct vring_desc *desc; uint64_t desc_addr; - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; /* A counter to avoid desc dead loop chain */ uint16_t nr_desc = 1; desc = &descs[desc_idx]; - desc_addr = gpa_to_vva(dev, desc->addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); /* * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid * performance issue with some versions of gcc (4.8.4 and 5.3.0) which @@ -210,8 +177,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, rte_prefetch0((void *)(uintptr_t)desc_addr); - virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); + virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr); vhost_log_write(dev, desc->addr, dev->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); @@ -239,7 +205,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, return -1; desc = &descs[desc->next]; - desc_addr = gpa_to_vva(dev, desc->addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); if (unlikely(!desc_addr)) return -1; @@ -283,7 +249,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, uint32_t i, sz; LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; @@ -323,7 +289,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, int err; if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) { - descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev, + descs = (struct vring_desc *)(uintptr_t) + rte_vhost_gpa_to_vva(dev->mem, vq->desc[desc_idx].addr); if (unlikely(!descs)) { count = i; @@ -383,7 +350,7 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { descs = (struct vring_desc *)(uintptr_t) - gpa_to_vva(dev, vq->desc[idx].addr); + rte_vhost_gpa_to_vva(dev->mem, vq->desc[idx].addr); if (unlikely(!descs)) return -1; @@ -461,7 +428,6 @@ static inline int __attribute__((always_inline)) copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, struct buf_vector *buf_vec, uint16_t num_buffers) { - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint32_t vec_idx = 0; uint64_t desc_addr; uint32_t mbuf_offset, mbuf_avail; @@ -473,7 +439,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, if (unlikely(m == NULL)) return -1; - desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, buf_vec[vec_idx].buf_addr); if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) return -1; @@ -482,7 +448,6 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, hdr_phys_addr = buf_vec[vec_idx].buf_addr; rte_prefetch0((void *)(uintptr_t)hdr_addr); - virtio_hdr.num_buffers = num_buffers; LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", dev->vid, num_buffers); @@ -495,7 +460,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, /* done with current desc buf, get the next one */ if (desc_avail == 0) { vec_idx++; - desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, + buf_vec[vec_idx].buf_addr); if (unlikely(!desc_addr)) return -1; @@ -514,8 +480,13 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, } if (hdr_addr) { - virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr); - copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr); + struct virtio_net_hdr_mrg_rxbuf *hdr; + + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t) + hdr_addr; + virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); + ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers); + vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)hdr_addr, dev->vhost_hlen, 0); @@ -552,7 +523,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, uint16_t avail_head; LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; @@ -663,14 +634,14 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) switch (ethertype) { case ETHER_TYPE_IPv4: - ipv4_hdr = (struct ipv4_hdr *)l3_hdr; + ipv4_hdr = l3_hdr; *l4_proto = ipv4_hdr->next_proto_id; m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; *l4_hdr = (char *)l3_hdr + m->l3_len; m->ol_flags |= PKT_TX_IPV4; break; case ETHER_TYPE_IPv6: - ipv6_hdr = (struct ipv6_hdr *)l3_hdr; + ipv6_hdr = l3_hdr; *l4_proto = ipv6_hdr->proto; m->l3_len = sizeof(struct ipv6_hdr); *l4_hdr = (char *)l3_hdr + m->l3_len; @@ -720,7 +691,7 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV6: - tcp_hdr = (struct tcp_hdr *)l4_hdr; + tcp_hdr = l4_hdr; m->ol_flags |= PKT_TX_TCP_SEG; m->tso_segsz = hdr->gso_size; m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; @@ -798,7 +769,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, (desc->flags & VRING_DESC_F_INDIRECT)) return -1; - desc_addr = gpa_to_vva(dev, desc->addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); if (unlikely(!desc_addr)) return -1; @@ -818,7 +789,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) return -1; - desc_addr = gpa_to_vva(dev, desc->addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); if (unlikely(!desc_addr)) return -1; @@ -882,7 +853,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) return -1; - desc_addr = gpa_to_vva(dev, desc->addr); + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); if (unlikely(!desc_addr)) return -1; @@ -905,6 +876,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, "allocate memory for mbuf.\n"); return -1; } + if (unlikely(dev->dequeue_zero_copy)) + rte_mbuf_refcnt_update(cur, 1); prev->next = cur; prev->data_len = mbuf_offset; @@ -1017,7 +990,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (!dev) return 0; - if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; @@ -1056,9 +1029,21 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, * array, to looks like that guest actually send such packet. * * Check user_send_rarp() for more information. + * + * broadcast_rarp shares a cacheline in the virtio_net structure + * with some fields that are accessed during enqueue and + * rte_atomic16_cmpset() causes a write if using cmpxchg. This could + * result in false sharing between enqueue and dequeue. + * + * Prevent unnecessary false sharing by reading broadcast_rarp first + * and only performing cmpset if the read indicates it is likely to + * be set. */ - if (unlikely(rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { + + if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && + rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool); if (rarp_mbuf == NULL) { RTE_LOG(ERR, VHOST_DATA, @@ -1113,7 +1098,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) { - desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev, + desc = (struct vring_desc *)(uintptr_t) + rte_vhost_gpa_to_vva(dev->mem, vq->desc[desc_indexes[i]].addr); if (unlikely(!desc)) break; |