diff options
Diffstat (limited to 'lib')
194 files changed, 11704 insertions, 2871 deletions
diff --git a/lib/Makefile b/lib/Makefile index 07e1fd0c..86caba17 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -52,7 +52,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf DEPDIRS-librte_cryptodev += librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev -DEPDIRS-librte_eventdev := librte_eal +DEPDIRS-librte_eventdev := librte_eal librte_ring DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash @@ -68,6 +68,8 @@ DEPDIRS-librte_net := librte_mbuf librte_eal DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether DEPDIRS-librte_ip_frag += librte_hash +DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro +DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats DEPDIRS-librte_jobstats := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_METRICS) += librte_metrics diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index e2dacd60..59767920 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -51,15 +51,14 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c -CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized +CFLAGS_acl_run_neon.o += -flax-vector-conversions +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_acl_run_neon.o += -Wno-maybe-uninitialized +endif else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y) SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_altivec.c else SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c -#check if flag for SSE4.1 is already on, if not set it up manually - ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_1,$(CFLAGS)),) - CFLAGS_acl_run_sse.o += -msse4.1 - endif endif # diff --git a/lib/librte_acl/acl_run_altivec.h b/lib/librte_acl/acl_run_altivec.h index 7d329bcf..62fd6a22 100644 --- a/lib/librte_acl/acl_run_altivec.h +++ b/lib/librte_acl/acl_run_altivec.h @@ -104,13 +104,13 @@ resolve_priority_altivec(uint64_t transition, int n, /* * Check for any match in 4 transitions */ -static inline __attribute__((always_inline)) uint32_t +static __rte_always_inline uint32_t check_any_match_x4(uint64_t val[]) { return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH; } -static inline __attribute__((always_inline)) void +static __rte_always_inline void acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, struct acl_flow_data *flows, uint64_t transitions[]) { diff --git a/lib/librte_acl/acl_run_avx2.h b/lib/librte_acl/acl_run_avx2.h index b01a46a5..804e45af 100644 --- a/lib/librte_acl/acl_run_avx2.h +++ b/lib/librte_acl/acl_run_avx2.h @@ -86,7 +86,7 @@ static const rte_ymm_t ymm_range_base = { * tr_hi contains high 32 bits for 8 transition. * next_input contains up to 4 input bytes for 8 flows. */ -static inline __attribute__((always_inline)) ymm_t +static __rte_always_inline ymm_t transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi) { const int32_t *tr; diff --git a/lib/librte_acl/acl_run_neon.c b/lib/librte_acl/acl_run_neon.c index b0144514..0b1c71c0 100644 --- a/lib/librte_acl/acl_run_neon.c +++ b/lib/librte_acl/acl_run_neon.c @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h index d233ff00..37881c45 100644 --- a/lib/librte_acl/acl_run_neon.h +++ b/lib/librte_acl/acl_run_neon.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -99,13 +99,13 @@ resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx, /* * Check for any match in 4 transitions */ -static inline __attribute__((always_inline)) uint32_t +static __rte_always_inline uint32_t check_any_match_x4(uint64_t val[]) { return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH; } -static inline __attribute__((always_inline)) void +static __rte_always_inline void acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, struct acl_flow_data *flows, uint64_t transitions[]) { @@ -124,7 +124,7 @@ acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, /* * Process 4 transitions (in 2 NEON Q registers) in parallel */ -static inline __attribute__((always_inline)) int32x4_t +static __rte_always_inline int32x4_t transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) { int32x4x2_t tr_hi_lo; diff --git a/lib/librte_acl/acl_run_sse.h b/lib/librte_acl/acl_run_sse.h index ad40a674..72f66e4f 100644 --- a/lib/librte_acl/acl_run_sse.h +++ b/lib/librte_acl/acl_run_sse.h @@ -149,7 +149,7 @@ acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx, /* * Check for any match in 4 transitions (contained in 2 SSE registers) */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2, xmm_t match_mask) @@ -176,7 +176,7 @@ acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, /* * Process 4 transitions (in 2 XMM registers) in parallel */ -static inline __attribute__((always_inline)) xmm_t +static __rte_always_inline xmm_t transition4(xmm_t next_input, const uint64_t *trans, xmm_t *indices1, xmm_t *indices2) { diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h index 41f7e3d4..9e4af530 100644 --- a/lib/librte_acl/rte_acl_osdep.h +++ b/lib/librte_acl/rte_acl_osdep.h @@ -74,7 +74,6 @@ #include <rte_errno.h> #include <rte_string_fns.h> #include <rte_cpuflags.h> -#include <rte_log.h> #include <rte_debug.h> #endif /* _RTE_ACL_OSDEP_H_ */ diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c index 193aa690..3ceb3516 100644 --- a/lib/librte_bitratestats/rte_bitrate.c +++ b/lib/librte_bitratestats/rte_bitrate.c @@ -112,7 +112,7 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, port_data->peak_ibits = cnt_bits; delta = cnt_bits; delta -= port_data->ewma_ibits; - /* The +-50 fixes integer rounding during divison */ + /* The +-50 fixes integer rounding during division */ if (delta > 0) delta = (delta * alpha_percent + 50) / 100; else diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c index b8148808..56491eac 100644 --- a/lib/librte_cmdline/cmdline_parse.c +++ b/lib/librte_cmdline/cmdline_parse.c @@ -139,6 +139,21 @@ nb_common_chars(const char * s1, const char * s2) return i; } +/** Retrieve either static or dynamic token at a given index. */ +static cmdline_parse_token_hdr_t * +get_token(cmdline_parse_inst_t *inst, unsigned int index) +{ + cmdline_parse_token_hdr_t *token_p; + + /* check presence of static tokens first */ + if (inst->tokens[0] || !inst->f) + return inst->tokens[index]; + /* generate dynamic token */ + token_p = NULL; + inst->f(&token_p, NULL, &inst->tokens[index]); + return token_p; +} + /** * try to match the buffer with an instruction (only the first * nb_match_token tokens if != 0). Return 0 if we match all the @@ -146,27 +161,20 @@ nb_common_chars(const char * s1, const char * s2) */ static int match_inst(cmdline_parse_inst_t *inst, const char *buf, - unsigned int nb_match_token, void *resbuf, unsigned resbuf_size, - cmdline_parse_token_hdr_t - *(*dyn_tokens)[CMDLINE_PARSE_DYNAMIC_TOKENS]) + unsigned int nb_match_token, void *resbuf, unsigned resbuf_size) { - unsigned int token_num=0; cmdline_parse_token_hdr_t * token_p; unsigned int i=0; int n = 0; struct cmdline_token_hdr token_hdr; - token_p = inst->tokens[token_num]; - if (!token_p && dyn_tokens && inst->f) { - if (!(*dyn_tokens)[0]) - inst->f(&(*dyn_tokens)[0], NULL, dyn_tokens); - token_p = (*dyn_tokens)[0]; - } - if (token_p) + /* check if we match all tokens of inst */ + while (!nb_match_token || i < nb_match_token) { + token_p = get_token(inst, i); + if (!token_p) + break; memcpy(&token_hdr, token_p, sizeof(token_hdr)); - /* check if we match all tokens of inst */ - while (token_p && (!nb_match_token || i<nb_match_token)) { debug_printf("TK\n"); /* skip spaces */ while (isblank2(*buf)) { @@ -201,21 +209,6 @@ match_inst(cmdline_parse_inst_t *inst, const char *buf, debug_printf("TK parsed (len=%d)\n", n); i++; buf += n; - - token_num ++; - if (!inst->tokens[0]) { - if (token_num < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) { - if (!(*dyn_tokens)[token_num]) - inst->f(&(*dyn_tokens)[token_num], - NULL, - dyn_tokens); - token_p = (*dyn_tokens)[token_num]; - } else - token_p = NULL; - } else - token_p = inst->tokens[token_num]; - if (token_p) - memcpy(&token_hdr, token_p, sizeof(token_hdr)); } /* does not match */ @@ -259,7 +252,6 @@ cmdline_parse(struct cmdline *cl, const char * buf) char buf[CMDLINE_PARSE_RESULT_BUFSIZE]; long double align; /* strong alignment constraint for buf */ } result, tmp_result; - cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS]; void (*f)(void *, struct cmdline *, void *) = NULL; void *data = NULL; int comment = 0; @@ -276,7 +268,6 @@ cmdline_parse(struct cmdline *cl, const char * buf) return CMDLINE_PARSE_BAD_ARGS; ctx = cl->ctx; - memset(&dyn_tokens, 0, sizeof(dyn_tokens)); /* * - look if the buffer contains at least one line @@ -322,7 +313,7 @@ cmdline_parse(struct cmdline *cl, const char * buf) /* fully parsed */ tok = match_inst(inst, buf, 0, tmp_result.buf, - sizeof(tmp_result.buf), &dyn_tokens); + sizeof(tmp_result.buf)); if (tok > 0) /* we matched at least one token */ err = CMDLINE_PARSE_BAD_ARGS; @@ -380,7 +371,6 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, cmdline_parse_token_hdr_t *token_p; struct cmdline_token_hdr token_hdr; char tmpbuf[CMDLINE_BUFFER_SIZE], comp_buf[CMDLINE_BUFFER_SIZE]; - cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS]; unsigned int partial_tok_len; int comp_len = -1; int tmp_len = -1; @@ -400,7 +390,6 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, debug_printf("%s called\n", __func__); memset(&token_hdr, 0, sizeof(token_hdr)); - memset(&dyn_tokens, 0, sizeof(dyn_tokens)); /* count the number of complete token to parse */ for (i=0 ; buf[i] ; i++) { @@ -424,23 +413,11 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, while (inst) { /* parse the first tokens of the inst */ if (nb_token && - match_inst(inst, buf, nb_token, NULL, 0, - &dyn_tokens)) + match_inst(inst, buf, nb_token, NULL, 0)) goto next; debug_printf("instruction match\n"); - if (!inst->tokens[0]) { - if (nb_token < - (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) { - if (!dyn_tokens[nb_token]) - inst->f(&dyn_tokens[nb_token], - NULL, - &dyn_tokens); - token_p = dyn_tokens[nb_token]; - } else - token_p = NULL; - } else - token_p = inst->tokens[nb_token]; + token_p = get_token(inst, nb_token); if (token_p) memcpy(&token_hdr, token_p, sizeof(token_hdr)); @@ -531,20 +508,10 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, inst = ctx[inst_num]; if (nb_token && - match_inst(inst, buf, nb_token, NULL, 0, &dyn_tokens)) + match_inst(inst, buf, nb_token, NULL, 0)) goto next2; - if (!inst->tokens[0]) { - if (nb_token < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) { - if (!dyn_tokens[nb_token]) - inst->f(&dyn_tokens[nb_token], - NULL, - &dyn_tokens); - token_p = dyn_tokens[nb_token]; - } else - token_p = NULL; - } else - token_p = inst->tokens[nb_token]; + token_p = get_token(inst, nb_token); if (token_p) memcpy(&token_hdr, token_p, sizeof(token_hdr)); diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h index 65b18d4f..13e086f2 100644 --- a/lib/librte_cmdline/cmdline_parse.h +++ b/lib/librte_cmdline/cmdline_parse.h @@ -83,9 +83,6 @@ extern "C" { /* maximum buffer size for parsed result */ #define CMDLINE_PARSE_RESULT_BUFSIZE 8192 -/* maximum number of dynamic tokens */ -#define CMDLINE_PARSE_DYNAMIC_TOKENS 128 - /** * Stores a pointer to the ops struct, and the offset: the place to * write the parsed result in the destination structure. @@ -137,20 +134,53 @@ struct cmdline; * When no tokens are defined (tokens[0] == NULL), they are retrieved * dynamically by calling f() as follows: * - * f((struct cmdline_token_hdr **)&token_hdr, - * NULL, - * (struct cmdline_token_hdr *[])tokens)); + * @code + * + * f((struct cmdline_token_hdr **)&token_p, + * NULL, + * (struct cmdline_token_hdr **)&inst->tokens[num]); + * + * @endcode * * The address of the resulting token is expected at the location pointed by * the first argument. Can be set to NULL to end the list. * * The cmdline argument (struct cmdline *) is always NULL. * - * The last argument points to the NULL-terminated list of dynamic tokens - * defined so far. Since token_hdr points to an index of that list, the - * current index can be derived as follows: + * The last argument points to the inst->tokens[] entry to retrieve, which + * is not necessarily inside allocated memory and should neither be read nor + * written. Its sole purpose is to deduce the token entry index of interest + * as described in the example below. + * + * Note about constraints: + * + * - Only the address of these tokens is dynamic, their storage should be + * static like normal tokens. + * - Dynamic token lists that need to maintain an internal context (e.g. in + * order to determine the next token) must store it statically also. This + * context must be reinitialized when the first token is requested, that + * is, when &inst->tokens[0] is provided as the third argument. + * - Dynamic token lists must be NULL-terminated to generate usable + * commands. + * + * @code + * + * // Assuming first and third arguments are respectively named "token_p" + * // and "token": + * + * int index = token - inst->tokens; + * + * if (!index) { + * [...] // Clean up internal context if any. + * } + * [...] // Then set up dyn_token according to index. + * + * if (no_more_tokens) + * *token_p = NULL; + * else + * *token_p = &dyn_token; * - * int index = token_hdr - &(*tokens)[0]; + * @endcode */ struct cmdline_inst { /* f(parsed_struct, data) */ diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.c b/lib/librte_cmdline/cmdline_parse_etheraddr.c index dbfe4a61..da02d2c9 100644 --- a/lib/librte_cmdline/cmdline_parse_etheraddr.c +++ b/lib/librte_cmdline/cmdline_parse_etheraddr.c @@ -65,7 +65,6 @@ #include <inttypes.h> #include <ctype.h> #include <string.h> -#include <errno.h> #include <sys/types.h> #include <net/ethernet.h> diff --git a/lib/librte_compat/rte_compat.h b/lib/librte_compat/rte_compat.h index 1c3c8d52..41e8032b 100644 --- a/lib/librte_compat/rte_compat.h +++ b/lib/librte_compat/rte_compat.h @@ -39,7 +39,7 @@ * When a symol is exported from a library to provide an API, it also provides a * calling convention (ABI) that is embodied in its name, return type, * arguments, etc. On occasion that function may need to change to accommodate - * new functionality, behavior, etc. When that occurs, it is desireable to + * new functionality, behavior, etc. When that occurs, it is desirable to * allow for backwards compatibility for a time with older binaries that are * dynamically linked to the dpdk. To support that, the __vsym and * VERSION_SYMBOL macros are created. They, in conjunction with the diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile index 18f5e8c5..6ac331bc 100644 --- a/lib/librte_cryptodev/Makefile +++ b/lib/librte_cryptodev/Makefile @@ -34,20 +34,22 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_cryptodev.a # library version -LIBABIVER := 2 +LIBABIVER := 3 # build flags CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) # library source files -SRCS-y += rte_cryptodev.c +SRCS-y += rte_cryptodev.c rte_cryptodev_pmd.c # export include files SYMLINK-y-include += rte_crypto.h SYMLINK-y-include += rte_crypto_sym.h SYMLINK-y-include += rte_cryptodev.h SYMLINK-y-include += rte_cryptodev_pmd.h +SYMLINK-y-include += rte_cryptodev_vdev.h +SYMLINK-y-include += rte_cryptodev_pci.h # versioning export map EXPORT_MAP := rte_cryptodev_version.map diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h index 90195188..10fe0804 100644 --- a/lib/librte_cryptodev/rte_crypto.h +++ b/lib/librte_cryptodev/rte_crypto.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -66,8 +66,6 @@ enum rte_crypto_op_status { /**< Operation completed successfully */ RTE_CRYPTO_OP_STATUS_NOT_PROCESSED, /**< Operation has not yet been processed by a crypto device */ - RTE_CRYPTO_OP_STATUS_ENQUEUED, - /**< Operation is enqueued on device */ RTE_CRYPTO_OP_STATUS_AUTH_FAILED, /**< Authentication verification failed */ RTE_CRYPTO_OP_STATUS_INVALID_SESSION, @@ -82,6 +80,16 @@ enum rte_crypto_op_status { }; /** + * Crypto operation session type. This is used to specify whether a crypto + * operation has session structure attached for immutable parameters or if all + * operation information is included in the operation data structure. + */ +enum rte_crypto_op_sess_type { + RTE_CRYPTO_OP_WITH_SESSION, /**< Session based crypto operation */ + RTE_CRYPTO_OP_SESSIONLESS /**< Session-less crypto operation */ +}; + +/** * Cryptographic Operation. * * This structure contains data relating to performing cryptographic @@ -92,32 +100,32 @@ enum rte_crypto_op_status { * rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() . */ struct rte_crypto_op { - enum rte_crypto_op_type type; + uint8_t type; /**< operation type */ - - enum rte_crypto_op_status status; + uint8_t status; /**< * operation status - this is reset to * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation * is successfully processed by a crypto PMD */ + uint8_t sess_type; + /**< operation session type */ + uint8_t reserved[5]; + /**< Reserved bytes to fill 64 bits for future additions */ struct rte_mempool *mempool; /**< crypto operation mempool which operation is allocated from */ phys_addr_t phys_addr; /**< physical address of crypto operation */ - void *opaque_data; - /**< Opaque pointer for user data */ - RTE_STD_C11 union { - struct rte_crypto_sym_op *sym; + struct rte_crypto_sym_op sym[0]; /**< Symmetric operation parameters */ }; /**< operation specific parameters */ -} __rte_cache_aligned; +}; /** * Reset the fields of a crypto operation to their default values. @@ -130,22 +138,15 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type) { op->type = type; op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + op->sess_type = RTE_CRYPTO_OP_SESSIONLESS; switch (type) { case RTE_CRYPTO_OP_TYPE_SYMMETRIC: - /** Symmetric operation structure starts after the end of the - * rte_crypto_op structure. - */ - op->sym = (struct rte_crypto_sym_op *)(op + 1); - op->type = type; - __rte_crypto_sym_op_reset(op->sym); break; default: break; } - - op->opaque_data = NULL; } /** @@ -265,8 +266,9 @@ rte_crypto_op_alloc(struct rte_mempool *mempool, enum rte_crypto_op_type type) * @param nb_ops Number of crypto operations to allocate * * @returns - * - On success returns a valid rte_crypto_op structure - * - On failure returns NULL + * - nb_ops if the number of operations requested were allocated. + * - 0 if the requested number of ops are not available. + * None are allocated in this case. */ static inline unsigned @@ -407,6 +409,8 @@ rte_crypto_op_attach_sym_session(struct rte_crypto_op *op, if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) return -1; + op->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + return __rte_crypto_sym_op_attach_sym_session(op->sym, sess); } diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h index 3a408448..0ceaa917 100644 --- a/lib/librte_cryptodev/rte_crypto_sym.h +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -68,28 +68,12 @@ enum rte_crypto_cipher_algorithm { RTE_CRYPTO_CIPHER_AES_CBC, /**< AES algorithm in CBC mode */ - RTE_CRYPTO_CIPHER_AES_CCM, - /**< AES algorithm in CCM mode. When this cipher algorithm is used the - * *RTE_CRYPTO_AUTH_AES_CCM* element of the - * *rte_crypto_hash_algorithm* enum MUST be used to set up the related - * *rte_crypto_auth_xform* structure in the session context or in - * the op_params of the crypto operation structure in the case of a - * session-less crypto operation - */ RTE_CRYPTO_CIPHER_AES_CTR, /**< AES algorithm in Counter mode */ RTE_CRYPTO_CIPHER_AES_ECB, /**< AES algorithm in ECB mode */ RTE_CRYPTO_CIPHER_AES_F8, /**< AES algorithm in F8 mode */ - RTE_CRYPTO_CIPHER_AES_GCM, - /**< AES algorithm in GCM mode. When this cipher algorithm is used the - * *RTE_CRYPTO_AUTH_AES_GCM* or *RTE_CRYPTO_AUTH_AES_GMAC* element - * of the *rte_crypto_auth_algorithm* enum MUST be used to set up - * the related *rte_crypto_auth_setup_data* structure in the session - * context or in the op_params of the crypto operation structure - * in the case of a session-less crypto operation. - */ RTE_CRYPTO_CIPHER_AES_XTS, /**< AES algorithm in XTS mode */ @@ -159,7 +143,7 @@ struct rte_crypto_cipher_xform { struct { uint8_t *data; /**< pointer to key data */ - size_t length; /**< key length in bytes */ + uint16_t length;/**< key length in bytes */ } key; /**< Cipher key * @@ -190,6 +174,55 @@ struct rte_crypto_cipher_xform { * - Each key can be either 128 bits (16 bytes) or 256 bits (32 bytes). * - Both keys must have the same size. **/ + struct { + uint16_t offset; + /**< Starting point for Initialisation Vector or Counter, + * specified as number of bytes from start of crypto + * operation (rte_crypto_op). + * + * - For block ciphers in CBC or F8 mode, or for KASUMI + * in F8 mode, or for SNOW 3G in UEA2 mode, this is the + * Initialisation Vector (IV) value. + * + * - For block ciphers in CTR mode, this is the counter. + * + * - For GCM mode, this is either the IV (if the length + * is 96 bits) or J0 (for other sizes), where J0 is as + * defined by NIST SP800-38D. Regardless of the IV + * length, a full 16 bytes needs to be allocated. + * + * - For CCM mode, the first byte is reserved, and the + * nonce should be written starting at &iv[1] (to allow + * space for the implementation to write in the flags + * in the first byte). Note that a full 16 bytes should + * be allocated, even though the length field will + * have a value less than this. + * + * - For AES-XTS, this is the 128bit tweak, i, from + * IEEE Std 1619-2007. + * + * For optimum performance, the data pointed to SHOULD + * be 8-byte aligned. + */ + uint16_t length; + /**< Length of valid IV data. + * + * - For block ciphers in CBC or F8 mode, or for KASUMI + * in F8 mode, or for SNOW 3G in UEA2 mode, this is the + * length of the IV (which must be the same as the + * block length of the cipher). + * + * - For block ciphers in CTR mode, this is the length + * of the counter (which must be the same as the block + * length of the cipher). + * + * - For GCM mode, this is either 12 (for 96-bit IVs) + * or 16, in which case data points to J0. + * + * - For CCM mode, this is the length of the nonce, + * which can be in the range 7 to 13 inclusive. + */ + } iv; /**< Initialisation vector parameters */ }; /** Symmetric Authentication / Hash Algorithms */ @@ -199,33 +232,10 @@ enum rte_crypto_auth_algorithm { RTE_CRYPTO_AUTH_AES_CBC_MAC, /**< AES-CBC-MAC algorithm. Only 128-bit keys are supported. */ - RTE_CRYPTO_AUTH_AES_CCM, - /**< AES algorithm in CCM mode. This is an authenticated cipher. When - * this hash algorithm is used, the *RTE_CRYPTO_CIPHER_AES_CCM* - * element of the *rte_crypto_cipher_algorithm* enum MUST be used to - * set up the related rte_crypto_cipher_setup_data structure in the - * session context or the corresponding parameter in the crypto - * operation data structures op_params parameter MUST be set for a - * session-less crypto operation. - */ RTE_CRYPTO_AUTH_AES_CMAC, /**< AES CMAC algorithm. */ - RTE_CRYPTO_AUTH_AES_GCM, - /**< AES algorithm in GCM mode. When this hash algorithm - * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the - * rte_crypto_cipher_algorithm enum MUST be used to set up the related - * rte_crypto_cipher_setup_data structure in the session context, or - * the corresponding parameter in the crypto operation data structures - * op_params parameter MUST be set for a session-less crypto operation. - */ RTE_CRYPTO_AUTH_AES_GMAC, - /**< AES GMAC algorithm. When this hash algorithm - * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the - * rte_crypto_cipher_algorithm enum MUST be used to set up the related - * rte_crypto_cipher_setup_data structure in the session context, or - * the corresponding parameter in the crypto operation data structures - * op_params parameter MUST be set for a session-less crypto operation. - */ + /**< AES GMAC algorithm. */ RTE_CRYPTO_AUTH_AES_XCBC_MAC, /**< AES XCBC algorithm. */ @@ -296,7 +306,7 @@ struct rte_crypto_auth_xform { struct { uint8_t *data; /**< pointer to key data */ - size_t length; /**< key length in bytes */ + uint16_t length;/**< key length in bytes */ } key; /**< Authentication key data. * The authentication key length MUST be less than or equal to the @@ -305,7 +315,35 @@ struct rte_crypto_auth_xform { * (for example RFC 2104, FIPS 198a). */ - uint32_t digest_length; + struct { + uint16_t offset; + /**< Starting point for Initialisation Vector or Counter, + * specified as number of bytes from start of crypto + * operation (rte_crypto_op). + * + * - For SNOW 3G in UIA2 mode, for ZUC in EIA3 mode and + * for AES-GMAC, this is the authentication + * Initialisation Vector (IV) value. + * + * - For KASUMI in F9 mode and other authentication + * algorithms, this field is not used. + * + * For optimum performance, the data pointed to SHOULD + * be 8-byte aligned. + */ + uint16_t length; + /**< Length of valid IV data. + * + * - For SNOW3G in UIA2 mode, for ZUC in EIA3 mode and + * for AES-GMAC, this is the length of the IV. + * + * - For KASUMI in F9 mode and other authentication + * algorithms, this field is not used. + * + */ + } iv; /**< Initialisation vector parameters */ + + uint16_t digest_length; /**< Length of the digest to be returned. If the verify option is set, * this specifies the length of the digest to be compared for the * session. @@ -315,42 +353,89 @@ struct rte_crypto_auth_xform { * If the value is less than the maximum length allowed by the hash, * the result shall be truncated. */ +}; - uint32_t add_auth_data_length; - /**< The length of the additional authenticated data (AAD) in bytes. - * The maximum permitted value is 65535 (2^16 - 1) bytes, unless - * otherwise specified below. - * - * This field must be specified when the hash algorithm is one of the - * following: - * - * - For SNOW 3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the - * length of the IV (which should be 16). - * - * - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM). In this case, this is - * the length of the Additional Authenticated Data (called A, in NIST - * SP800-38D). - * - * - For CCM (@ref RTE_CRYPTO_AUTH_AES_CCM). In this case, this is - * the length of the associated data (called A, in NIST SP800-38C). - * Note that this does NOT include the length of any padding, or the - * 18 bytes reserved at the start of the above field to store the - * block B0 and the encoded length. The maximum permitted value in - * this case is 222 bytes. - * - * @note - * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation - * this field is not used and should be set to 0. Instead the length - * of the AAD data is specified in additional authentication data - * length field of the rte_crypto_sym_op_data structure - */ + +/** Symmetric AEAD Algorithms */ +enum rte_crypto_aead_algorithm { + RTE_CRYPTO_AEAD_AES_CCM = 1, + /**< AES algorithm in CCM mode. */ + RTE_CRYPTO_AEAD_AES_GCM, + /**< AES algorithm in GCM mode. */ + RTE_CRYPTO_AEAD_LIST_END +}; + +/** AEAD algorithm name strings */ +extern const char * +rte_crypto_aead_algorithm_strings[]; + +/** Symmetric AEAD Operations */ +enum rte_crypto_aead_operation { + RTE_CRYPTO_AEAD_OP_ENCRYPT, + /**< Encrypt and generate digest */ + RTE_CRYPTO_AEAD_OP_DECRYPT + /**< Verify digest and decrypt */ +}; + +/** Authentication operation name strings */ +extern const char * +rte_crypto_aead_operation_strings[]; + +struct rte_crypto_aead_xform { + enum rte_crypto_aead_operation op; + /**< AEAD operation type */ + enum rte_crypto_aead_algorithm algo; + /**< AEAD algorithm selection */ + + struct { + uint8_t *data; /**< pointer to key data */ + uint16_t length;/**< key length in bytes */ + } key; + + struct { + uint16_t offset; + /**< Starting point for Initialisation Vector or Counter, + * specified as number of bytes from start of crypto + * operation (rte_crypto_op). + * + * - For GCM mode, this is either the IV (if the length + * is 96 bits) or J0 (for other sizes), where J0 is as + * defined by NIST SP800-38D. Regardless of the IV + * length, a full 16 bytes needs to be allocated. + * + * - For CCM mode, the first byte is reserved, and the + * nonce should be written starting at &iv[1] (to allow + * space for the implementation to write in the flags + * in the first byte). Note that a full 16 bytes should + * be allocated, even though the length field will + * have a value less than this. + * + * For optimum performance, the data pointed to SHOULD + * be 8-byte aligned. + */ + uint16_t length; + /**< Length of valid IV data. + * + * - For GCM mode, this is either 12 (for 96-bit IVs) + * or 16, in which case data points to J0. + * + * - For CCM mode, this is the length of the nonce, + * which can be in the range 7 to 13 inclusive. + */ + } iv; /**< Initialisation vector parameters */ + + uint16_t digest_length; + + uint16_t aad_length; + /**< The length of the additional authenticated data (AAD) in bytes. */ }; /** Crypto transformation types */ enum rte_crypto_sym_xform_type { RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED = 0, /**< No xform specified */ RTE_CRYPTO_SYM_XFORM_AUTH, /**< Authentication xform */ - RTE_CRYPTO_SYM_XFORM_CIPHER /**< Cipher xform */ + RTE_CRYPTO_SYM_XFORM_CIPHER, /**< Cipher xform */ + RTE_CRYPTO_SYM_XFORM_AEAD /**< AEAD xform */ }; /** @@ -373,20 +458,11 @@ struct rte_crypto_sym_xform { /**< Authentication / hash xform */ struct rte_crypto_cipher_xform cipher; /**< Cipher xform */ + struct rte_crypto_aead_xform aead; + /**< AEAD xform */ }; }; -/** - * Crypto operation session type. This is used to specify whether a crypto - * operation has session structure attached for immutable parameters or if all - * operation information is included in the operation data structure. - */ -enum rte_crypto_sym_op_sess_type { - RTE_CRYPTO_SYM_OP_WITH_SESSION, /**< Session based crypto operation */ - RTE_CRYPTO_SYM_OP_SESSIONLESS /**< Session-less crypto operation */ -}; - - struct rte_cryptodev_sym_session; /** @@ -423,8 +499,6 @@ struct rte_crypto_sym_op { struct rte_mbuf *m_src; /**< source mbuf */ struct rte_mbuf *m_dst; /**< destination mbuf */ - enum rte_crypto_sym_op_sess_type sess_type; - RTE_STD_C11 union { struct rte_cryptodev_sym_session *session; @@ -433,227 +507,182 @@ struct rte_crypto_sym_op { /**< Session-less API crypto operation parameters */ }; - struct { - struct { - uint32_t offset; - /**< Starting point for cipher processing, specified - * as number of bytes from start of data in the source - * buffer. The result of the cipher operation will be - * written back into the output buffer starting at - * this location. - * - * @note - * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2, - * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8 - * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3, - * this field should be in bits. - */ - - uint32_t length; - /**< The message length, in bytes, of the source buffer - * on which the cryptographic operation will be - * computed. This must be a multiple of the block size - * if a block cipher is being used. This is also the - * same as the result length. - * - * @note - * In the case of CCM @ref RTE_CRYPTO_AUTH_AES_CCM, - * this value should not include the length of the - * padding or the length of the MAC; the driver will - * compute the actual number of bytes over which the - * encryption will occur, which will include these - * values. - * - * @note - * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC, this - * field should be set to 0. - * - * @note - * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2, - * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8 - * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3, - * this field should be in bits. - */ - } data; /**< Data offsets and length for ciphering */ - - struct { - uint8_t *data; - /**< Initialisation Vector or Counter. - * - * - For block ciphers in CBC or F8 mode, or for KASUMI - * in F8 mode, or for SNOW 3G in UEA2 mode, this is the - * Initialisation Vector (IV) value. - * - * - For block ciphers in CTR mode, this is the counter. - * - * - For GCM mode, this is either the IV (if the length - * is 96 bits) or J0 (for other sizes), where J0 is as - * defined by NIST SP800-38D. Regardless of the IV - * length, a full 16 bytes needs to be allocated. - * - * - For CCM mode, the first byte is reserved, and the - * nonce should be written starting at &iv[1] (to allow - * space for the implementation to write in the flags - * in the first byte). Note that a full 16 bytes should - * be allocated, even though the length field will - * have a value less than this. - * - * - For AES-XTS, this is the 128bit tweak, i, from - * IEEE Std 1619-2007. - * - * For optimum performance, the data pointed to SHOULD - * be 8-byte aligned. - */ - phys_addr_t phys_addr; - uint16_t length; - /**< Length of valid IV data. - * - * - For block ciphers in CBC or F8 mode, or for KASUMI - * in F8 mode, or for SNOW 3G in UEA2 mode, this is the - * length of the IV (which must be the same as the - * block length of the cipher). - * - * - For block ciphers in CTR mode, this is the length - * of the counter (which must be the same as the block - * length of the cipher). - * - * - For GCM mode, this is either 12 (for 96-bit IVs) - * or 16, in which case data points to J0. - * - * - For CCM mode, this is the length of the nonce, - * which can be in the range 7 to 13 inclusive. - */ - } iv; /**< Initialisation vector parameters */ - } cipher; - - struct { - struct { - uint32_t offset; - /**< Starting point for hash processing, specified as - * number of bytes from start of packet in source - * buffer. - * - * @note - * For CCM and GCM modes of operation, this field is - * ignored. The field @ref aad field - * should be set instead. - * - * @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) - * mode of operation, this field is set to 0. aad data - * pointer of rte_crypto_sym_op_data structure is - * used instead - * - * @note - * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2, - * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9 - * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3, - * this field should be in bits. - */ - - uint32_t length; - /**< The message length, in bytes, of the source - * buffer that the hash will be computed on. - * - * @note - * For CCM and GCM modes of operation, this field is - * ignored. The field @ref aad field should be set - * instead. - * - * @note - * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode - * of operation, this field is set to 0. - * Auth.aad.length is used instead. - * - * @note - * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2, - * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9 - * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3, - * this field should be in bits. - */ - } data; /**< Data offsets and length for authentication */ - + RTE_STD_C11 + union { struct { - uint8_t *data; - /**< This points to the location where the digest result - * should be inserted (in the case of digest generation) - * or where the purported digest exists (in the case of - * digest verification). - * - * At session creation time, the client specified the - * digest result length with the digest_length member - * of the @ref rte_crypto_auth_xform structure. For - * physical crypto devices the caller must allocate at - * least digest_length of physically contiguous memory - * at this location. - * - * For digest generation, the digest result will - * overwrite any data at this location. - * - * @note - * For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), for - * "digest result" read "authentication tag T". - */ - phys_addr_t phys_addr; - /**< Physical address of digest */ - uint16_t length; - /**< Length of digest. This must be the same value as - * @ref rte_crypto_auth_xform.digest_length. - */ - } digest; /**< Digest parameters */ + struct { + uint32_t offset; + /**< Starting point for AEAD processing, specified as + * number of bytes from start of packet in source + * buffer. + */ + uint32_t length; + /**< The message length, in bytes, of the source buffer + * on which the cryptographic operation will be + * computed. This must be a multiple of the block size + */ + } data; /**< Data offsets and length for AEAD */ + struct { + uint8_t *data; + /**< This points to the location where the digest result + * should be inserted (in the case of digest generation) + * or where the purported digest exists (in the case of + * digest verification). + * + * At session creation time, the client specified the + * digest result length with the digest_length member + * of the @ref rte_crypto_auth_xform structure. For + * physical crypto devices the caller must allocate at + * least digest_length of physically contiguous memory + * at this location. + * + * For digest generation, the digest result will + * overwrite any data at this location. + * + * @note + * For GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), for + * "digest result" read "authentication tag T". + */ + phys_addr_t phys_addr; + /**< Physical address of digest */ + } digest; /**< Digest parameters */ + struct { + uint8_t *data; + /**< Pointer to Additional Authenticated Data (AAD) + * needed for authenticated cipher mechanisms (CCM and + * GCM) + * + * Specifically for CCM (@ref RTE_CRYPTO_AEAD_AES_CCM), + * the caller should setup this field as follows: + * + * - the nonce should be written starting at an offset + * of one byte into the array, leaving room for the + * implementation to write in the flags to the first + * byte. + * + * - the additional authentication data itself should + * be written starting at an offset of 18 bytes into + * the array, leaving room for the length encoding in + * the first two bytes of the second block. + * + * - the array should be big enough to hold the above + * fields, plus any padding to round this up to the + * nearest multiple of the block size (16 bytes). + * Padding will be added by the implementation. + * + * Finally, for GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), the + * caller should setup this field as follows: + * + * - the AAD is written in starting at byte 0 + * - the array must be big enough to hold the AAD, plus + * any space to round this up to the nearest multiple + * of the block size (16 bytes). + * + */ + phys_addr_t phys_addr; /**< physical address */ + } aad; + /**< Additional authentication parameters */ + } aead; struct { - uint8_t *data; - /**< Pointer to Additional Authenticated Data (AAD) - * needed for authenticated cipher mechanisms (CCM and - * GCM), and to the IV for SNOW 3G authentication - * (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other - * authentication mechanisms this pointer is ignored. - * - * The length of the data pointed to by this field is - * set up for the session in the @ref - * rte_crypto_auth_xform structure as part of the @ref - * rte_cryptodev_sym_session_create function call. - * This length must not exceed 65535 (2^16-1) bytes. - * - * Specifically for CCM (@ref RTE_CRYPTO_AUTH_AES_CCM), - * the caller should setup this field as follows: - * - * - the nonce should be written starting at an offset - * of one byte into the array, leaving room for the - * implementation to write in the flags to the first - * byte. - * - * - the additional authentication data itself should - * be written starting at an offset of 18 bytes into - * the array, leaving room for the length encoding in - * the first two bytes of the second block. - * - * - the array should be big enough to hold the above - * fields, plus any padding to round this up to the - * nearest multiple of the block size (16 bytes). - * Padding will be added by the implementation. - * - * Finally, for GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), the - * caller should setup this field as follows: - * - * - the AAD is written in starting at byte 0 - * - the array must be big enough to hold the AAD, plus - * any space to round this up to the nearest multiple - * of the block size (16 bytes). - * - * @note - * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of - * operation, this field is used to pass plaintext. - */ - phys_addr_t phys_addr; /**< physical address */ - uint16_t length; - /**< Length of additional authenticated data (AAD) - * in bytes - */ - } aad; - /**< Additional authentication parameters */ - } auth; -} __rte_cache_aligned; + struct { + struct { + uint32_t offset; + /**< Starting point for cipher processing, + * specified as number of bytes from start + * of data in the source buffer. + * The result of the cipher operation will be + * written back into the output buffer + * starting at this location. + * + * @note + * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2, + * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8 + * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3, + * this field should be in bits. + */ + uint32_t length; + /**< The message length, in bytes, of the + * source buffer on which the cryptographic + * operation will be computed. + * This must be a multiple of the block size + * if a block cipher is being used. This is + * also the same as the result length. + * + * @note + * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2, + * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8 + * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3, + * this field should be in bits. + */ + } data; /**< Data offsets and length for ciphering */ + } cipher; + + struct { + struct { + uint32_t offset; + /**< Starting point for hash processing, + * specified as number of bytes from start of + * packet in source buffer. + * + * @note + * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2, + * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9 + * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3, + * this field should be in bits. + * + * @note + * For KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9, + * this offset should be such that + * data to authenticate starts at COUNT. + */ + uint32_t length; + /**< The message length, in bytes, of the source + * buffer that the hash will be computed on. + * + * @note + * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2, + * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9 + * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3, + * this field should be in bits. + * + * @note + * For KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9, + * the length should include the COUNT, + * FRESH, message, direction bit and padding + * (to be multiple of 8 bits). + */ + } data; + /**< Data offsets and length for authentication */ + + struct { + uint8_t *data; + /**< This points to the location where + * the digest result should be inserted + * (in the case of digest generation) + * or where the purported digest exists + * (in the case of digest verification). + * + * At session creation time, the client + * specified the digest result length with + * the digest_length member of the + * @ref rte_crypto_auth_xform structure. + * For physical crypto devices the caller + * must allocate at least digest_length of + * physically contiguous memory at this + * location. + * + * For digest generation, the digest result + * will overwrite any data at this location. + * + */ + phys_addr_t phys_addr; + /**< Physical address of digest */ + } digest; /**< Digest parameters */ + } auth; + }; + }; +}; /** @@ -665,8 +694,6 @@ static inline void __rte_crypto_sym_op_reset(struct rte_crypto_sym_op *op) { memset(op, 0, sizeof(*op)); - - op->sess_type = RTE_CRYPTO_SYM_OP_SESSIONLESS; } @@ -708,7 +735,6 @@ __rte_crypto_sym_op_attach_sym_session(struct rte_crypto_sym_op *sym_op, struct rte_cryptodev_sym_session *sess) { sym_op->session = sess; - sym_op->sess_type = RTE_CRYPTO_SYM_OP_WITH_SESSION; return 0; } diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c index b65cd9ce..327d7e84 100644 --- a/lib/librte_cryptodev/rte_cryptodev.c +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -47,7 +47,6 @@ #include <rte_debug.h> #include <rte_dev.h> #include <rte_interrupts.h> -#include <rte_pci.h> #include <rte_memory.h> #include <rte_memcpy.h> #include <rte_memzone.h> @@ -70,6 +69,8 @@ #include "rte_cryptodev.h" #include "rte_cryptodev_pmd.h" +static uint8_t nb_drivers; + struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS]; struct rte_cryptodev *rte_cryptodevs = &rte_crypto_devices[0]; @@ -101,18 +102,6 @@ struct rte_cryptodev_callback { uint32_t active; /**< Callback is executing */ }; -#define RTE_CRYPTODEV_VDEV_NAME ("name") -#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs") -#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions") -#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id") - -static const char *cryptodev_vdev_valid_params[] = { - RTE_CRYPTODEV_VDEV_NAME, - RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, - RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, - RTE_CRYPTODEV_VDEV_SOCKET_ID -}; - /** * The crypto cipher algorithm strings identifiers. * It could be used in application command line. @@ -124,11 +113,9 @@ rte_crypto_cipher_algorithm_strings[] = { [RTE_CRYPTO_CIPHER_3DES_CTR] = "3des-ctr", [RTE_CRYPTO_CIPHER_AES_CBC] = "aes-cbc", - [RTE_CRYPTO_CIPHER_AES_CCM] = "aes-ccm", [RTE_CRYPTO_CIPHER_AES_CTR] = "aes-ctr", [RTE_CRYPTO_CIPHER_AES_DOCSISBPI] = "aes-docsisbpi", [RTE_CRYPTO_CIPHER_AES_ECB] = "aes-ecb", - [RTE_CRYPTO_CIPHER_AES_GCM] = "aes-gcm", [RTE_CRYPTO_CIPHER_AES_F8] = "aes-f8", [RTE_CRYPTO_CIPHER_AES_XTS] = "aes-xts", @@ -161,9 +148,7 @@ rte_crypto_cipher_operation_strings[] = { const char * rte_crypto_auth_algorithm_strings[] = { [RTE_CRYPTO_AUTH_AES_CBC_MAC] = "aes-cbc-mac", - [RTE_CRYPTO_AUTH_AES_CCM] = "aes-ccm", [RTE_CRYPTO_AUTH_AES_CMAC] = "aes-cmac", - [RTE_CRYPTO_AUTH_AES_GCM] = "aes-gcm", [RTE_CRYPTO_AUTH_AES_GMAC] = "aes-gmac", [RTE_CRYPTO_AUTH_AES_XCBC_MAC] = "aes-xcbc-mac", @@ -189,6 +174,26 @@ rte_crypto_auth_algorithm_strings[] = { [RTE_CRYPTO_AUTH_ZUC_EIA3] = "zuc-eia3" }; +/** + * The crypto AEAD algorithm strings identifiers. + * It could be used in application command line. + */ +const char * +rte_crypto_aead_algorithm_strings[] = { + [RTE_CRYPTO_AEAD_AES_CCM] = "aes-ccm", + [RTE_CRYPTO_AEAD_AES_GCM] = "aes-gcm", +}; + +/** + * The crypto AEAD operation strings identifiers. + * It could be used in application command line. + */ +const char * +rte_crypto_aead_operation_strings[] = { + [RTE_CRYPTO_AEAD_OP_ENCRYPT] = "encrypt", + [RTE_CRYPTO_AEAD_OP_DECRYPT] = "decrypt" +}; + int rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum, const char *algo_string) @@ -223,6 +228,23 @@ rte_cryptodev_get_auth_algo_enum(enum rte_crypto_auth_algorithm *algo_enum, return -1; } +int +rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum, + const char *algo_string) +{ + unsigned int i; + + for (i = 1; i < RTE_DIM(rte_crypto_aead_algorithm_strings); i++) { + if (strcmp(algo_string, rte_crypto_aead_algorithm_strings[i]) == 0) { + *algo_enum = (enum rte_crypto_aead_algorithm) i; + return 0; + } + } + + /* Invalid string */ + return -1; +} + /** * The crypto auth operation strings identifiers. * It could be used in application command line. @@ -233,111 +255,6 @@ rte_crypto_auth_operation_strings[] = { [RTE_CRYPTO_AUTH_OP_GENERATE] = "generate" }; -static uint8_t -number_of_sockets(void) -{ - int sockets = 0; - int i; - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - - for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) { - if (sockets < ms[i].socket_id) - sockets = ms[i].socket_id; - } - - /* Number of sockets = maximum socket_id + 1 */ - return ++sockets; -} - -/** Parse integer from integer argument */ -static int -parse_integer_arg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - int *i = extra_args; - - *i = atoi(value); - if (*i < 0) { - CDEV_LOG_ERR("Argument has to be positive."); - return -1; - } - - return 0; -} - -/** Parse name */ -static int -parse_name_arg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - struct rte_crypto_vdev_init_params *params = extra_args; - - if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) { - CDEV_LOG_ERR("Invalid name %s, should be less than " - "%u bytes", value, - RTE_CRYPTODEV_NAME_MAX_LEN - 1); - return -1; - } - - strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN); - - return 0; -} - -int -rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params, - const char *input_args) -{ - struct rte_kvargs *kvlist = NULL; - int ret = 0; - - if (params == NULL) - return -EINVAL; - - if (input_args) { - kvlist = rte_kvargs_parse(input_args, - cryptodev_vdev_valid_params); - if (kvlist == NULL) - return -1; - - ret = rte_kvargs_process(kvlist, - RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, - &parse_integer_arg, - ¶ms->max_nb_queue_pairs); - if (ret < 0) - goto free_kvlist; - - ret = rte_kvargs_process(kvlist, - RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, - &parse_integer_arg, - ¶ms->max_nb_sessions); - if (ret < 0) - goto free_kvlist; - - ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID, - &parse_integer_arg, - ¶ms->socket_id); - if (ret < 0) - goto free_kvlist; - - ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME, - &parse_name_arg, - params); - if (ret < 0) - goto free_kvlist; - - if (params->socket_id >= number_of_sockets()) { - CDEV_LOG_ERR("Invalid socket id specified to create " - "the virtual crypto device on"); - goto free_kvlist; - } - } - -free_kvlist: - rte_kvargs_free(kvlist); - return ret; -} - const struct rte_cryptodev_symmetric_capability * rte_cryptodev_sym_capability_get(uint8_t dev_id, const struct rte_cryptodev_sym_capability_idx *idx) @@ -363,6 +280,10 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id, if (idx->type == RTE_CRYPTO_SYM_XFORM_CIPHER && capability->sym.cipher.algo == idx->algo.cipher) return &capability->sym; + + if (idx->type == RTE_CRYPTO_SYM_XFORM_AEAD && + capability->sym.aead.algo == idx->algo.aead) + return &capability->sym; } return NULL; @@ -390,7 +311,7 @@ rte_cryptodev_sym_capability_check_cipher( int rte_cryptodev_sym_capability_check_auth( const struct rte_cryptodev_symmetric_capability *capability, - uint16_t key_size, uint16_t digest_size, uint16_t aad_size) + uint16_t key_size, uint16_t digest_size, uint16_t iv_size) { if (param_range_check(key_size, capability->auth.key_size)) return -1; @@ -398,12 +319,32 @@ rte_cryptodev_sym_capability_check_auth( if (param_range_check(digest_size, capability->auth.digest_size)) return -1; - if (param_range_check(aad_size, capability->auth.aad_size)) + if (param_range_check(iv_size, capability->auth.iv_size)) return -1; return 0; } +int +rte_cryptodev_sym_capability_check_aead( + const struct rte_cryptodev_symmetric_capability *capability, + uint16_t key_size, uint16_t digest_size, uint16_t aad_size, + uint16_t iv_size) +{ + if (param_range_check(key_size, capability->aead.key_size)) + return -1; + + if (param_range_check(digest_size, capability->aead.digest_size)) + return -1; + + if (param_range_check(aad_size, capability->aead.aad_size)) + return -1; + + if (param_range_check(iv_size, capability->aead.iv_size)) + return -1; + + return 0; +} const char * rte_cryptodev_get_feature_name(uint64_t flag) @@ -509,12 +450,12 @@ rte_cryptodev_count(void) } uint8_t -rte_cryptodev_count_devtype(enum rte_cryptodev_type type) +rte_cryptodev_device_count_by_driver(uint8_t driver_id) { uint8_t i, dev_count = 0; for (i = 0; i < rte_cryptodev_globals->max_devs; i++) - if (rte_cryptodev_globals->devs[i].dev_type == type && + if (rte_cryptodev_globals->devs[i].driver_id == driver_id && rte_cryptodev_globals->devs[i].attached == RTE_CRYPTODEV_ATTACHED) dev_count++; @@ -523,7 +464,7 @@ rte_cryptodev_count_devtype(enum rte_cryptodev_type type) } uint8_t -rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices, +rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices, uint8_t nb_devices) { uint8_t i, count = 0; @@ -533,15 +474,11 @@ rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices, for (i = 0; i < max_devs && count < nb_devices; i++) { if (devs[i].attached == RTE_CRYPTODEV_ATTACHED) { - const struct rte_cryptodev_driver *drv = devs[i].driver; int cmp; - if (drv) - cmp = strncmp(drv->pci_drv.driver.name, - dev_name, strlen(dev_name)); - else - cmp = strncmp(devs[i].data->name, - dev_name, strlen(dev_name)); + cmp = strncmp(devs[i].device->driver->name, + driver_name, + strlen(driver_name)); if (cmp == 0) devices[count++] = devs[i].data->dev_id; @@ -662,144 +599,15 @@ rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev) if (cryptodev == NULL) return -EINVAL; - ret = rte_cryptodev_close(cryptodev->data->dev_id); - if (ret < 0) - return ret; - - cryptodev->attached = RTE_CRYPTODEV_DETACHED; - cryptodev_globals.nb_devs--; - return 0; -} - -struct rte_cryptodev * -rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, - int socket_id) -{ - struct rte_cryptodev *cryptodev; - - /* allocate device structure */ - cryptodev = rte_cryptodev_pmd_allocate(name, socket_id); - if (cryptodev == NULL) - return NULL; - - /* allocate private device structure */ - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - cryptodev->data->dev_private = - rte_zmalloc_socket("cryptodev device private", - dev_private_size, - RTE_CACHE_LINE_SIZE, - socket_id); - - if (cryptodev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private device" - " data"); - } - - /* initialise user call-back tail queue */ - TAILQ_INIT(&(cryptodev->link_intr_cbs)); - - return cryptodev; -} - -int -rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) -{ - struct rte_cryptodev_driver *cryptodrv; - struct rte_cryptodev *cryptodev; - - char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; - - int retval; - - cryptodrv = (struct rte_cryptodev_driver *)pci_drv; - if (cryptodrv == NULL) - return -ENODEV; - - rte_pci_device_name(&pci_dev->addr, cryptodev_name, - sizeof(cryptodev_name)); - - cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id()); - if (cryptodev == NULL) - return -ENOMEM; - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - cryptodev->data->dev_private = - rte_zmalloc_socket( - "cryptodev private structure", - cryptodrv->dev_private_size, - RTE_CACHE_LINE_SIZE, - rte_socket_id()); - - if (cryptodev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private " - "device data"); + /* Close device only if device operations have been set */ + if (cryptodev->dev_ops) { + ret = rte_cryptodev_close(cryptodev->data->dev_id); + if (ret < 0) + return ret; } - cryptodev->device = &pci_dev->device; - cryptodev->driver = cryptodrv; - - /* init user callbacks */ - TAILQ_INIT(&(cryptodev->link_intr_cbs)); - - /* Invoke PMD device initialization function */ - retval = (*cryptodrv->cryptodev_init)(cryptodrv, cryptodev); - if (retval == 0) - return 0; - - CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)" - " failed", pci_drv->driver.name, - (unsigned) pci_dev->id.vendor_id, - (unsigned) pci_dev->id.device_id); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(cryptodev->data->dev_private); - cryptodev->attached = RTE_CRYPTODEV_DETACHED; cryptodev_globals.nb_devs--; - - return -ENXIO; -} - -int -rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev) -{ - const struct rte_cryptodev_driver *cryptodrv; - struct rte_cryptodev *cryptodev; - char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; - int ret; - - if (pci_dev == NULL) - return -EINVAL; - - rte_pci_device_name(&pci_dev->addr, cryptodev_name, - sizeof(cryptodev_name)); - - cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); - if (cryptodev == NULL) - return -ENODEV; - - cryptodrv = (const struct rte_cryptodev_driver *)pci_dev->driver; - if (cryptodrv == NULL) - return -ENODEV; - - /* Invoke PMD device uninit function */ - if (*cryptodrv->cryptodev_uninit) { - ret = (*cryptodrv->cryptodev_uninit)(cryptodrv, cryptodev); - if (ret) - return ret; - } - - /* free crypto device */ - rte_cryptodev_pmd_release_device(cryptodev); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(cryptodev->data->dev_private); - - cryptodev->device = NULL; - cryptodev->driver = NULL; - cryptodev->data = NULL; - return 0; } @@ -934,10 +742,6 @@ rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id) } -static int -rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev, - unsigned nb_objs, unsigned obj_cache_size, int socket_id); - int rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config) { @@ -968,14 +772,6 @@ rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config) return diag; } - /* Setup Session mempool for device */ - diag = rte_cryptodev_sym_session_pool_create(dev, - config->session_mp.nb_objs, - config->session_mp.cache_size, - config->socket_id); - if (diag != 0) - return diag; - return (*dev->dev_ops->dev_configure)(dev, config); } @@ -1032,8 +828,8 @@ rte_cryptodev_stop(uint8_t dev_id) return; } - dev->data->dev_started = 0; (*dev->dev_ops->dev_stop)(dev); + dev->data->dev_started = 0; } int @@ -1078,7 +874,9 @@ rte_cryptodev_close(uint8_t dev_id) int rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, - const struct rte_cryptodev_qp_conf *qp_conf, int socket_id) + const struct rte_cryptodev_qp_conf *qp_conf, int socket_id, + struct rte_mempool *session_pool) + { struct rte_cryptodev *dev; @@ -1102,7 +900,7 @@ rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_setup, -ENOTSUP); return (*dev->dev_ops->queue_pair_setup)(dev, queue_pair_id, qp_conf, - socket_id); + socket_id, session_pool); } @@ -1163,9 +961,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); - if (dev->driver) - dev_info->driver_name = dev->driver->pci_drv.driver.name; + dev_info->driver_name = dev->device->driver->name; } @@ -1281,142 +1077,74 @@ rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev, } -static void -rte_cryptodev_sym_session_init(struct rte_mempool *mp, - void *opaque_arg, - void *_sess, - __rte_unused unsigned i) -{ - struct rte_cryptodev_sym_session *sess = _sess; - struct rte_cryptodev *dev = opaque_arg; - - memset(sess, 0, mp->elt_size); - - sess->dev_id = dev->data->dev_id; - sess->dev_type = dev->dev_type; - sess->mp = mp; - - if (dev->dev_ops->session_initialize) - (*dev->dev_ops->session_initialize)(mp, sess); -} - -static int -rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev, - unsigned nb_objs, unsigned obj_cache_size, int socket_id) +int +rte_cryptodev_sym_session_init(uint8_t dev_id, + struct rte_cryptodev_sym_session *sess, + struct rte_crypto_sym_xform *xforms, + struct rte_mempool *mp) { - char mp_name[RTE_CRYPTODEV_NAME_MAX_LEN]; - unsigned priv_sess_size; + struct rte_cryptodev *dev; + uint8_t index; + int ret; - unsigned n = snprintf(mp_name, sizeof(mp_name), "cdev_%d_sess_mp", - dev->data->dev_id); - if (n > sizeof(mp_name)) { - CDEV_LOG_ERR("Unable to create unique name for session mempool"); - return -ENOMEM; - } + dev = rte_cryptodev_pmd_get_dev(dev_id); - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_get_size, -ENOTSUP); - priv_sess_size = (*dev->dev_ops->session_get_size)(dev); - if (priv_sess_size == 0) { - CDEV_LOG_ERR("%s returned and invalid private session size ", - dev->data->name); - return -ENOMEM; - } + if (sess == NULL || xforms == NULL || dev == NULL) + return -EINVAL; - unsigned elt_size = sizeof(struct rte_cryptodev_sym_session) + - priv_sess_size; + index = dev->driver_id; - dev->data->session_pool = rte_mempool_lookup(mp_name); - if (dev->data->session_pool != NULL) { - if ((dev->data->session_pool->elt_size != elt_size) || - (dev->data->session_pool->cache_size < - obj_cache_size) || - (dev->data->session_pool->size < nb_objs)) { - - CDEV_LOG_ERR("%s mempool already exists with different" - " initialization parameters", mp_name); - dev->data->session_pool = NULL; - return -ENOMEM; - } - } else { - dev->data->session_pool = rte_mempool_create( - mp_name, /* mempool name */ - nb_objs, /* number of elements*/ - elt_size, /* element size*/ - obj_cache_size, /* Cache size*/ - 0, /* private data size */ - NULL, /* obj initialization constructor */ - NULL, /* obj initialization constructor arg */ - rte_cryptodev_sym_session_init, - /**< obj constructor*/ - dev, /* obj constructor arg */ - socket_id, /* socket id */ - 0); /* flags */ - - if (dev->data->session_pool == NULL) { - CDEV_LOG_ERR("%s mempool allocation failed", mp_name); - return -ENOMEM; + if (sess->sess_private_data[index] == NULL) { + ret = dev->dev_ops->session_configure(dev, xforms, sess, mp); + if (ret < 0) { + CDEV_LOG_ERR( + "dev_id %d failed to configure session details", + dev_id); + return ret; } } - CDEV_LOG_DEBUG("%s mempool created!", mp_name); return 0; } struct rte_cryptodev_sym_session * -rte_cryptodev_sym_session_create(uint8_t dev_id, - struct rte_crypto_sym_xform *xform) +rte_cryptodev_sym_session_create(struct rte_mempool *mp) { - struct rte_cryptodev *dev; struct rte_cryptodev_sym_session *sess; - void *_sess; - - if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); - return NULL; - } - - dev = &rte_crypto_devices[dev_id]; /* Allocate a session structure from the session pool */ - if (rte_mempool_get(dev->data->session_pool, &_sess)) { - CDEV_LOG_ERR("Couldn't get object from session mempool"); + if (rte_mempool_get(mp, (void *)&sess)) { + CDEV_LOG_ERR("couldn't get object from session mempool"); return NULL; } - sess = _sess; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_configure, NULL); - if (dev->dev_ops->session_configure(dev, xform, sess->_private) == - NULL) { - CDEV_LOG_ERR("dev_id %d failed to configure session details", - dev_id); - - /* Return session to mempool */ - rte_mempool_put(sess->mp, _sess); - return NULL; - } + /* Clear device session pointer */ + memset(sess, 0, (sizeof(void *) * nb_drivers)); return sess; } int -rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id, +rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id, struct rte_cryptodev_sym_session *sess) { struct rte_cryptodev *dev; - if (!rte_cryptodev_pmd_is_valid_dev(sess->dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%d", sess->dev_id); + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); return -EINVAL; } - dev = &rte_crypto_devices[sess->dev_id]; + dev = &rte_crypto_devices[dev_id]; /* The API is optional, not returning error if driver do not suuport */ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0); - if (dev->dev_ops->qp_attach_session(dev, qp_id, sess->_private)) { + + void *sess_priv = get_session_private_data(sess, dev->driver_id); + + if (dev->dev_ops->qp_attach_session(dev, qp_id, sess_priv)) { CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session", - sess->dev_id, qp_id); + dev_id, qp_id); return -EPERM; } @@ -1424,53 +1152,109 @@ rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id, } int -rte_cryptodev_queue_pair_detach_sym_session(uint16_t qp_id, +rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id, struct rte_cryptodev_sym_session *sess) { struct rte_cryptodev *dev; - if (!rte_cryptodev_pmd_is_valid_dev(sess->dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%d", sess->dev_id); + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); return -EINVAL; } - dev = &rte_crypto_devices[sess->dev_id]; + dev = &rte_crypto_devices[dev_id]; /* The API is optional, not returning error if driver do not suuport */ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0); - if (dev->dev_ops->qp_detach_session(dev, qp_id, sess->_private)) { + + void *sess_priv = get_session_private_data(sess, dev->driver_id); + + if (dev->dev_ops->qp_detach_session(dev, qp_id, sess_priv)) { CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session", - sess->dev_id, qp_id); + dev_id, qp_id); return -EPERM; } return 0; } -struct rte_cryptodev_sym_session * -rte_cryptodev_sym_session_free(uint8_t dev_id, + +int +rte_cryptodev_sym_session_clear(uint8_t dev_id, struct rte_cryptodev_sym_session *sess) { struct rte_cryptodev *dev; - if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); - return sess; - } + dev = rte_cryptodev_pmd_get_dev(dev_id); - dev = &rte_crypto_devices[dev_id]; + if (dev == NULL || sess == NULL) + return -EINVAL; - /* Check the session belongs to this device type */ - if (sess->dev_type != dev->dev_type) - return sess; + dev->dev_ops->session_clear(dev, sess); - /* Let device implementation clear session material */ - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_clear, sess); - dev->dev_ops->session_clear(dev, (void *)sess->_private); + return 0; +} + +int +rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess) +{ + uint8_t i; + void *sess_priv; + struct rte_mempool *sess_mp; + + if (sess == NULL) + return -EINVAL; + + /* Check that all device private data has been freed */ + for (i = 0; i < nb_drivers; i++) { + sess_priv = get_session_private_data(sess, i); + if (sess_priv != NULL) + return -EBUSY; + } /* Return session to mempool */ - rte_mempool_put(sess->mp, (void *)sess); + sess_mp = rte_mempool_from_obj(sess); + rte_mempool_put(sess_mp, sess); + + return 0; +} + +unsigned int +rte_cryptodev_get_header_session_size(void) +{ + /* + * Header contains pointers to the private data + * of all registered drivers + */ + return (sizeof(void *) * nb_drivers); +} + +unsigned int +rte_cryptodev_get_private_session_size(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + unsigned int header_size = sizeof(void *) * nb_drivers; + unsigned int priv_sess_size; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) + return 0; + + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (*dev->dev_ops->session_get_size == NULL) + return 0; + + priv_sess_size = (*dev->dev_ops->session_get_size)(dev); + + /* + * If size is less than session header size, + * return the latter, as this guarantees that + * sessionless operations will work + */ + if (priv_sess_size < header_size) + return header_size; + + return priv_sess_size; - return NULL; } /** Initialise rte_crypto_op mempool element */ @@ -1572,3 +1356,58 @@ rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix) return -1; } + +TAILQ_HEAD(cryptodev_driver_list, cryptodev_driver); + +static struct cryptodev_driver_list cryptodev_driver_list = + TAILQ_HEAD_INITIALIZER(cryptodev_driver_list); + +struct cryptodev_driver { + TAILQ_ENTRY(cryptodev_driver) next; /**< Next in list. */ + const struct rte_driver *driver; + uint8_t id; +}; + +int +rte_cryptodev_driver_id_get(const char *name) +{ + struct cryptodev_driver *driver; + const char *driver_name; + + if (name == NULL) { + RTE_LOG(DEBUG, CRYPTODEV, "name pointer NULL"); + return -1; + } + + TAILQ_FOREACH(driver, &cryptodev_driver_list, next) { + driver_name = driver->driver->name; + if (strncmp(driver_name, name, strlen(driver_name)) == 0) + return driver->id; + } + return -1; +} + +const char * +rte_cryptodev_driver_name_get(uint8_t driver_id) +{ + struct cryptodev_driver *driver; + + TAILQ_FOREACH(driver, &cryptodev_driver_list, next) + if (driver->id == driver_id) + return driver->driver->name; + return NULL; +} + +uint8_t +rte_cryptodev_allocate_driver(const struct rte_driver *drv) +{ + struct cryptodev_driver *driver; + + driver = malloc(sizeof(*driver)); + driver->driver = drv; + driver->id = nb_drivers; + + TAILQ_INSERT_TAIL(&cryptodev_driver_list, driver, next); + + return nb_drivers++; +} diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h index 88aeb873..7ec9c4bc 100644 --- a/lib/librte_cryptodev/rte_cryptodev.h +++ b/lib/librte_cryptodev/rte_cryptodev.h @@ -49,44 +49,7 @@ extern "C" { #include "rte_crypto.h" #include "rte_dev.h" #include <rte_common.h> - -#define CRYPTODEV_NAME_NULL_PMD crypto_null -/**< Null crypto PMD device name */ -#define CRYPTODEV_NAME_AESNI_MB_PMD crypto_aesni_mb -/**< AES-NI Multi buffer PMD device name */ -#define CRYPTODEV_NAME_AESNI_GCM_PMD crypto_aesni_gcm -/**< AES-NI GCM PMD device name */ -#define CRYPTODEV_NAME_OPENSSL_PMD crypto_openssl -/**< Open SSL Crypto PMD device name */ -#define CRYPTODEV_NAME_QAT_SYM_PMD crypto_qat -/**< Intel QAT Symmetric Crypto PMD device name */ -#define CRYPTODEV_NAME_SNOW3G_PMD crypto_snow3g -/**< SNOW 3G PMD device name */ -#define CRYPTODEV_NAME_KASUMI_PMD crypto_kasumi -/**< KASUMI PMD device name */ -#define CRYPTODEV_NAME_ZUC_PMD crypto_zuc -/**< KASUMI PMD device name */ -#define CRYPTODEV_NAME_ARMV8_PMD crypto_armv8 -/**< ARMv8 Crypto PMD device name */ -#define CRYPTODEV_NAME_SCHEDULER_PMD crypto_scheduler -/**< Scheduler Crypto PMD device name */ -#define CRYPTODEV_NAME_DPAA2_SEC_PMD cryptodev_dpaa2_sec_pmd -/**< NXP DPAA2 - SEC PMD device name */ - -/** Crypto device type */ -enum rte_cryptodev_type { - RTE_CRYPTODEV_NULL_PMD = 1, /**< Null crypto PMD */ - RTE_CRYPTODEV_AESNI_GCM_PMD, /**< AES-NI GCM PMD */ - RTE_CRYPTODEV_AESNI_MB_PMD, /**< AES-NI multi buffer PMD */ - RTE_CRYPTODEV_QAT_SYM_PMD, /**< QAT PMD Symmetric Crypto */ - RTE_CRYPTODEV_SNOW3G_PMD, /**< SNOW 3G PMD */ - RTE_CRYPTODEV_KASUMI_PMD, /**< KASUMI PMD */ - RTE_CRYPTODEV_ZUC_PMD, /**< ZUC PMD */ - RTE_CRYPTODEV_OPENSSL_PMD, /**< OpenSSL PMD */ - RTE_CRYPTODEV_ARMV8_PMD, /**< ARMv8 crypto PMD */ - RTE_CRYPTODEV_SCHEDULER_PMD, /**< Crypto Scheduler PMD */ - RTE_CRYPTODEV_DPAA2_SEC_PMD, /**< NXP DPAA2 - SEC PMD */ -}; +#include <rte_vdev.h> extern const char **rte_cyptodev_names; @@ -118,6 +81,38 @@ extern const char **rte_cyptodev_names; #define CDEV_PMD_TRACE(...) (void)0 #endif + + +/** + * A macro that points to an offset from the start + * of the crypto operation structure (rte_crypto_op) + * + * The returned pointer is cast to type t. + * + * @param c + * The crypto operation. + * @param o + * The offset from the start of the crypto operation. + * @param t + * The type to cast the result into. + */ +#define rte_crypto_op_ctod_offset(c, t, o) \ + ((t)((char *)(c) + (o))) + +/** + * A macro that returns the physical address that points + * to an offset from the start of the crypto operation + * (rte_crypto_op) + * + * @param c + * The crypto operation. + * @param o + * The offset from the start of the crypto operation + * to calculate address from. + */ +#define rte_crypto_op_ctophys_offset(c, o) \ + (phys_addr_t)((c)->phys_addr + (o)) + /** * Crypto parameters range description */ @@ -137,7 +132,7 @@ struct rte_crypto_param_range { */ struct rte_cryptodev_symmetric_capability { enum rte_crypto_sym_xform_type xform_type; - /**< Transform type : Authentication / Cipher */ + /**< Transform type : Authentication / Cipher / AEAD */ RTE_STD_C11 union { struct { @@ -151,6 +146,8 @@ struct rte_cryptodev_symmetric_capability { /**< digest size range */ struct rte_crypto_param_range aad_size; /**< Additional authentication data size range */ + struct rte_crypto_param_range iv_size; + /**< Initialisation vector data size range */ } auth; /**< Symmetric Authentication transform capabilities */ struct { @@ -164,6 +161,20 @@ struct rte_cryptodev_symmetric_capability { /**< Initialisation vector data size range */ } cipher; /**< Symmetric Cipher transform capabilities */ + struct { + enum rte_crypto_aead_algorithm algo; + /**< AEAD algorithm */ + uint16_t block_size; + /**< algorithm block size */ + struct rte_crypto_param_range key_size; + /**< AEAD key size range */ + struct rte_crypto_param_range digest_size; + /**< digest size range */ + struct rte_crypto_param_range aad_size; + /**< Additional authentication data size range */ + struct rte_crypto_param_range iv_size; + /**< Initialisation vector data size range */ + } aead; }; }; @@ -185,6 +196,7 @@ struct rte_cryptodev_sym_capability_idx { union { enum rte_crypto_cipher_algorithm cipher; enum rte_crypto_auth_algorithm auth; + enum rte_crypto_aead_algorithm aead; } algo; }; @@ -226,7 +238,7 @@ rte_cryptodev_sym_capability_check_cipher( * @param capability Description of the symmetric crypto capability. * @param key_size Auth key size. * @param digest_size Auth digest size. - * @param aad_size Auth aad size. + * @param iv_size Auth initial vector size. * * @return * - Return 0 if the parameters are in range of the capability. @@ -235,7 +247,27 @@ rte_cryptodev_sym_capability_check_cipher( int rte_cryptodev_sym_capability_check_auth( const struct rte_cryptodev_symmetric_capability *capability, - uint16_t key_size, uint16_t digest_size, uint16_t aad_size); + uint16_t key_size, uint16_t digest_size, uint16_t iv_size); + +/** + * Check if key, digest, AAD and initial vector sizes are supported + * in crypto AEAD capability + * + * @param capability Description of the symmetric crypto capability. + * @param key_size AEAD key size. + * @param digest_size AEAD digest size. + * @param aad_size AEAD AAD size. + * @param iv_size AEAD IV size. + * + * @return + * - Return 0 if the parameters are in range of the capability. + * - Return -1 if the parameters are out of range of the capability. + */ +int +rte_cryptodev_sym_capability_check_aead( + const struct rte_cryptodev_symmetric_capability *capability, + uint16_t key_size, uint16_t digest_size, uint16_t aad_size, + uint16_t iv_size); /** * Provide the cipher algorithm enum, given an algorithm string @@ -267,6 +299,21 @@ int rte_cryptodev_get_auth_algo_enum(enum rte_crypto_auth_algorithm *algo_enum, const char *algo_string); +/** + * Provide the AEAD algorithm enum, given an algorithm string + * + * @param algo_enum A pointer to the AEAD algorithm + * enum to be filled + * @param algo_string AEAD algorithm string + * + * @return + * - Return -1 if string is not valid + * - Return 0 is the string is valid + */ +int +rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum, + const char *algo_string); + /** Macro used at end of crypto PMD list */ #define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \ { RTE_CRYPTO_OP_TYPE_UNDEFINED } @@ -321,7 +368,7 @@ rte_cryptodev_get_feature_name(uint64_t flag); /** Crypto device information */ struct rte_cryptodev_info { const char *driver_name; /**< Driver name. */ - enum rte_cryptodev_type dev_type; /**< Device type */ + uint8_t driver_id; /**< Driver identifier */ struct rte_pci_device *pci_dev; /**< PCI information. */ uint64_t feature_flags; /**< Feature flags */ @@ -385,37 +432,10 @@ struct rte_cryptodev_stats { #define RTE_CRYPTODEV_NAME_MAX_LEN (64) /**< Max length of name of crypto PMD */ -#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8 -#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048 - -/** - * @internal - * Initialisation parameters for virtual crypto devices - */ -struct rte_crypto_vdev_init_params { - unsigned max_nb_queue_pairs; - unsigned max_nb_sessions; - uint8_t socket_id; - char name[RTE_CRYPTODEV_NAME_MAX_LEN]; -}; /** - * Parse virtual device initialisation parameters input arguments - * @internal - * - * @params params Initialisation parameters with defaults set. - * @params input_args Command line arguments + * @deprecated * - * @return - * 0 on successful parse - * <0 on failure to parse - */ -int -rte_cryptodev_parse_vdev_init_params( - struct rte_crypto_vdev_init_params *params, - const char *input_args); - -/** * Create a virtual crypto device * * @param name Cryptodev PMD name of device to be created. @@ -426,6 +446,7 @@ rte_cryptodev_parse_vdev_init_params( * which will be between 0 and rte_cryptodev_count(). * - In the case of a failure, returns -1. */ +__rte_deprecated extern int rte_cryptodev_create_vdev(const char *name, const char *args); @@ -454,18 +475,19 @@ rte_cryptodev_count(void); /** * Get number of crypto device defined type. * - * @param type type of device. + * @param driver_id driver identifier. * * @return * Returns number of crypto device. */ extern uint8_t -rte_cryptodev_count_devtype(enum rte_cryptodev_type type); +rte_cryptodev_device_count_by_driver(uint8_t driver_id); /** - * Get number and identifiers of attached crypto device. + * Get number and identifiers of attached crypto devices that + * use the same crypto driver. * - * @param dev_name device name. + * @param driver_name driver name. * @param devices output devices identifiers. * @param nb_devices maximal number of devices. * @@ -473,7 +495,7 @@ rte_cryptodev_count_devtype(enum rte_cryptodev_type type); * Returns number of attached crypto device. */ uint8_t -rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices, +rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices, uint8_t nb_devices); /* * Return the NUMA socket to which a device is connected @@ -493,11 +515,6 @@ struct rte_cryptodev_config { int socket_id; /**< Socket to allocate resources on */ uint16_t nb_queue_pairs; /**< Number of queue pairs to configure on device */ - - struct { - uint32_t nb_objs; /**< Number of objects in mempool */ - uint32_t cache_size; /**< l-core object cache size */ - } session_mp; /**< Session mempool configuration */ }; /** @@ -574,6 +591,8 @@ rte_cryptodev_close(uint8_t dev_id); * *SOCKET_ID_ANY* if there is no NUMA constraint * for the DMA memory allocated for the receive * queue pair. + * @param session_pool Pointer to device session mempool, used + * for session-less operations. * * @return * - 0: Success, queue pair correctly set up. @@ -581,7 +600,8 @@ rte_cryptodev_close(uint8_t dev_id); */ extern int rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, - const struct rte_cryptodev_qp_conf *qp_conf, int socket_id); + const struct rte_cryptodev_qp_conf *qp_conf, int socket_id, + struct rte_mempool *session_pool); /** * Start a specified queue pair of a device. It is used @@ -721,8 +741,6 @@ struct rte_cryptodev { enqueue_pkt_burst_t enqueue_burst; /**< Pointer to PMD transmit function. */ - const struct rte_cryptodev_driver *driver; - /**< Driver for this device */ struct rte_cryptodev_data *data; /**< Pointer to device data */ struct rte_cryptodev_ops *dev_ops; @@ -732,8 +750,8 @@ struct rte_cryptodev { struct rte_device *device; /**< Backing device */ - enum rte_cryptodev_type dev_type; - /**< Crypto device type */ + uint8_t driver_id; + /**< Crypto driver identifier*/ struct rte_cryptodev_cb_list link_intr_cbs; /**< User application callback for interrupts if present */ @@ -866,66 +884,100 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id, /** Cryptodev symmetric crypto session */ struct rte_cryptodev_sym_session { - RTE_STD_C11 - struct { - uint8_t dev_id; - /**< Device Id */ - enum rte_cryptodev_type dev_type; - /** Crypto Device type session created on */ - struct rte_mempool *mp; - /**< Mempool session allocated from */ - } __rte_aligned(8); - /**< Public symmetric session details */ - - __extension__ char _private[0]; + __extension__ void *sess_private_data[0]; /**< Private session material */ }; /** - * Initialise a session for symmetric cryptographic operations. + * Create symmetric crypto session header (generic with no private data) * - * This function is used by the client to initialize immutable - * parameters of symmetric cryptographic operation. - * To perform the operation the rte_cryptodev_enqueue_burst function is - * used. Each mbuf should contain a reference to the session - * pointer returned from this function contained within it's crypto_op if a - * session-based operation is being provisioned. Memory to contain the session - * information is allocated from within mempool managed by the cryptodev. + * @param mempool Symmetric session mempool to allocate session + * objects from + * @return + * - On success return pointer to sym-session + * - On failure returns NULL + */ +struct rte_cryptodev_sym_session * +rte_cryptodev_sym_session_create(struct rte_mempool *mempool); + +/** + * Frees symmetric crypto session header, after checking that all + * the device private data has been freed, returning it + * to its original mempool. * - * The rte_cryptodev_session_free must be called to free allocated - * memory when the session is no longer required. + * @param sess Session header to be freed. * - * @param dev_id The device identifier. - * @param xform Crypto transform chain. + * @return + * - 0 if successful. + * - -EINVAL if session is NULL. + * - -EBUSY if not all device private data has been freed. + */ +int +rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess); + +/** + * Fill out private data for the device id, based on its device type. + * + * @param dev_id ID of device that we want the session to be used on + * @param sess Session where the private data will be attached to + * @param xforms Symmetric crypto transform operations to apply on flow + * processed with this session + * @param mempool Mempool where the private data is allocated. + * + * @return + * - On success, zero. + * - -EINVAL if input parameters are invalid. + * - -ENOTSUP if crypto device does not support the crypto transform. + * - -ENOMEM if the private session could not be allocated. + */ +int +rte_cryptodev_sym_session_init(uint8_t dev_id, + struct rte_cryptodev_sym_session *sess, + struct rte_crypto_sym_xform *xforms, + struct rte_mempool *mempool); +/** + * Frees private data for the device id, based on its device type, + * returning it to its mempool. + * + * @param dev_id ID of device that uses the session. + * @param sess Session containing the reference to the private data * * @return - * Pointer to the created session or NULL + * - 0 if successful. + * - -EINVAL if device is invalid or session is NULL. */ -extern struct rte_cryptodev_sym_session * -rte_cryptodev_sym_session_create(uint8_t dev_id, - struct rte_crypto_sym_xform *xform); +int +rte_cryptodev_sym_session_clear(uint8_t dev_id, + struct rte_cryptodev_sym_session *sess); /** - * Free the memory associated with a previously allocated session. + * Get the size of the header session, for all registered drivers. + * + * @return + * Size of the header session. + */ +unsigned int +rte_cryptodev_get_header_session_size(void); + +/** + * Get the size of the private session data for a device. * * @param dev_id The device identifier. - * @param session Session pointer previously allocated by - * *rte_cryptodev_sym_session_create*. * * @return - * NULL on successful freeing of session. - * Session pointer on failure to free session. + * - Size of the private data, if successful + * - 0 if device is invalid or does not have private session */ -extern struct rte_cryptodev_sym_session * -rte_cryptodev_sym_session_free(uint8_t dev_id, - struct rte_cryptodev_sym_session *session); +unsigned int +rte_cryptodev_get_private_session_size(uint8_t dev_id); /** * Attach queue pair with sym session. * - * @param qp_id Queue pair to which session will be attached. + * @param dev_id Device to which the session will be attached. + * @param qp_id Queue pair to which the session will be attached. * @param session Session pointer previously allocated by * *rte_cryptodev_sym_session_create*. * @@ -934,13 +986,14 @@ rte_cryptodev_sym_session_free(uint8_t dev_id, * - On failure, a negative value. */ int -rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id, +rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id, struct rte_cryptodev_sym_session *session); /** * Detach queue pair with sym session. * - * @param qp_id Queue pair to which session is attached. + * @param dev_id Device to which the session is attached. + * @param qp_id Queue pair to which the session is attached. * @param session Session pointer previously allocated by * *rte_cryptodev_sym_session_create*. * @@ -949,9 +1002,48 @@ rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id, * - On failure, a negative value. */ int -rte_cryptodev_queue_pair_detach_sym_session(uint16_t qp_id, +rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id, struct rte_cryptodev_sym_session *session); +/** + * Provide driver identifier. + * + * @param name + * The pointer to a driver name. + * @return + * The driver type identifier or -1 if no driver found + */ +int rte_cryptodev_driver_id_get(const char *name); + +/** + * Provide driver name. + * + * @param driver_id + * The driver identifier. + * @return + * The driver name or null if no driver found + */ +const char *rte_cryptodev_driver_name_get(uint8_t driver_id); + +/** + * @internal + * Allocate Cryptodev driver. + * + * @param driver + * Pointer to rte_driver. + * @return + * The driver type identifier + */ +uint8_t rte_cryptodev_allocate_driver(const struct rte_driver *driver); + + +#define RTE_PMD_REGISTER_CRYPTO_DRIVER(drv, driver_id)\ +RTE_INIT(init_ ##driver_id);\ +static void init_ ##driver_id(void)\ +{\ + driver_id = rte_cryptodev_allocate_driver(&(drv).driver);\ +} + #ifdef __cplusplus } diff --git a/lib/librte_cryptodev/rte_cryptodev_pci.h b/lib/librte_cryptodev/rte_cryptodev_pci.h new file mode 100644 index 00000000..67eda96a --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev_pci.h @@ -0,0 +1,92 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRYPTODEV_PCI_H_ +#define _RTE_CRYPTODEV_PCI_H_ + +#include <rte_pci.h> +#include "rte_cryptodev.h" + +/** + * Initialisation function of a crypto driver invoked for each matching + * crypto PCI device detected during the PCI probing phase. + * + * @param dev The dev pointer is the address of the *rte_cryptodev* + * structure associated with the matching device and which + * has been [automatically] allocated in the + * *rte_crypto_devices* array. + * + * @return + * - 0: Success, the device is properly initialised by the driver. + * In particular, the driver MUST have set up the *dev_ops* pointer + * of the *dev* structure. + * - <0: Error code of the device initialisation failure. + */ +typedef int (*cryptodev_pci_init_t)(struct rte_cryptodev *dev); + +/** + * Finalisation function of a driver invoked for each matching + * PCI device detected during the PCI closing phase. + * + * @param dev The dev pointer is the address of the *rte_cryptodev* + * structure associated with the matching device and which + * has been [automatically] allocated in the + * *rte_crypto_devices* array. + * + * * @return + * - 0: Success, the device is properly finalised by the driver. + * In particular, the driver MUST free the *dev_ops* pointer + * of the *dev* structure. + * - <0: Error code of the device initialisation failure. + */ +typedef int (*cryptodev_pci_uninit_t)(struct rte_cryptodev *dev); + +/** + * @internal + * Wrapper for use by pci drivers as a .probe function to attach to a crypto + * interface. + */ +int +rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev, + size_t private_data_size, + cryptodev_pci_init_t dev_init); + +/** + * @internal + * Wrapper for use by pci drivers as a .remove function to detach a crypto + * interface. + */ +int +rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev, + cryptodev_pci_uninit_t dev_uninit); + +#endif /* _RTE_CRYPTODEV_PCI_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c new file mode 100644 index 00000000..a57faadc --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c @@ -0,0 +1,249 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_malloc.h> + +#include "rte_cryptodev_vdev.h" +#include "rte_cryptodev_pci.h" +#include "rte_cryptodev_pmd.h" + +/** + * Parse name from argument + */ +static int +rte_cryptodev_vdev_parse_name_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + struct rte_crypto_vdev_init_params *params = extra_args; + + if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) { + CDEV_LOG_ERR("Invalid name %s, should be less than " + "%u bytes", value, + RTE_CRYPTODEV_NAME_MAX_LEN - 1); + return -1; + } + + strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN); + + return 0; +} + +/** + * Parse integer from argument + */ +static int +rte_cryptodev_vdev_parse_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int *i = extra_args; + + *i = atoi(value); + if (*i < 0) { + CDEV_LOG_ERR("Argument has to be positive."); + return -1; + } + + return 0; +} + +struct rte_cryptodev * +rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size, + int socket_id, struct rte_vdev_device *vdev) +{ + struct rte_cryptodev *cryptodev; + + /* allocate device structure */ + cryptodev = rte_cryptodev_pmd_allocate(name, socket_id); + if (cryptodev == NULL) + return NULL; + + /* allocate private device structure */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + cryptodev->data->dev_private = + rte_zmalloc_socket("cryptodev device private", + dev_private_size, + RTE_CACHE_LINE_SIZE, + socket_id); + + if (cryptodev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private device" + " data"); + } + + cryptodev->device = &vdev->device; + + /* initialise user call-back tail queue */ + TAILQ_INIT(&(cryptodev->link_intr_cbs)); + + return cryptodev; +} + +int +rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params, + const char *input_args) +{ + struct rte_kvargs *kvlist = NULL; + int ret = 0; + + if (params == NULL) + return -EINVAL; + + if (input_args) { + kvlist = rte_kvargs_parse(input_args, + cryptodev_vdev_valid_params); + if (kvlist == NULL) + return -1; + + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, + &rte_cryptodev_vdev_parse_integer_arg, + ¶ms->max_nb_queue_pairs); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, + &rte_cryptodev_vdev_parse_integer_arg, + ¶ms->max_nb_sessions); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID, + &rte_cryptodev_vdev_parse_integer_arg, + ¶ms->socket_id); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME, + &rte_cryptodev_vdev_parse_name_arg, + params); + if (ret < 0) + goto free_kvlist; + } + +free_kvlist: + rte_kvargs_free(kvlist); + return ret; +} + +int +rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev, + size_t private_data_size, + cryptodev_pci_init_t dev_init) +{ + struct rte_cryptodev *cryptodev; + + char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + + int retval; + + rte_pci_device_name(&pci_dev->addr, cryptodev_name, + sizeof(cryptodev_name)); + + cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id()); + if (cryptodev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + cryptodev->data->dev_private = + rte_zmalloc_socket( + "cryptodev private structure", + private_data_size, + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + + if (cryptodev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private " + "device data"); + } + + cryptodev->device = &pci_dev->device; + + /* init user callbacks */ + TAILQ_INIT(&(cryptodev->link_intr_cbs)); + + /* Invoke PMD device initialization function */ + RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL); + retval = dev_init(cryptodev); + if (retval == 0) + return 0; + + CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)" + " failed", pci_dev->device.driver->name, + (unsigned int) pci_dev->id.vendor_id, + (unsigned int) pci_dev->id.device_id); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(cryptodev->data->dev_private); + + /* free crypto device */ + rte_cryptodev_pmd_release_device(cryptodev); + + return -ENXIO; +} + +int +rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev, + cryptodev_pci_uninit_t dev_uninit) +{ + struct rte_cryptodev *cryptodev; + char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + int ret; + + if (pci_dev == NULL) + return -EINVAL; + + rte_pci_device_name(&pci_dev->addr, cryptodev_name, + sizeof(cryptodev_name)); + + cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); + if (cryptodev == NULL) + return -ENODEV; + + /* Invoke PMD device uninit function */ + if (dev_uninit) { + ret = dev_uninit(cryptodev); + if (ret) + return ret; + } + + /* free crypto device */ + rte_cryptodev_pmd_release_device(cryptodev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(cryptodev->data->dev_private); + + cryptodev->device = NULL; + cryptodev->data = NULL; + + return 0; +} diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h index 17ef37c7..c983eb21 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -47,7 +47,6 @@ extern "C" { #include <string.h> #include <rte_dev.h> -#include <rte_pci.h> #include <rte_malloc.h> #include <rte_mbuf.h> #include <rte_mempool.h> @@ -57,80 +56,6 @@ extern "C" { #include "rte_crypto.h" #include "rte_cryptodev.h" -struct rte_cryptodev_session { - RTE_STD_C11 - struct { - uint8_t dev_id; - enum rte_cryptodev_type type; - struct rte_mempool *mp; - } __rte_aligned(8); - - __extension__ char _private[0]; -}; - -struct rte_cryptodev_driver; - -/** - * Initialisation function of a crypto driver invoked for each matching - * crypto PCI device detected during the PCI probing phase. - * - * @param drv The pointer to the [matching] crypto driver structure - * supplied by the PMD when it registered itself. - * @param dev The dev pointer is the address of the *rte_cryptodev* - * structure associated with the matching device and which - * has been [automatically] allocated in the - * *rte_crypto_devices* array. - * - * @return - * - 0: Success, the device is properly initialised by the driver. - * In particular, the driver MUST have set up the *dev_ops* pointer - * of the *dev* structure. - * - <0: Error code of the device initialisation failure. - */ -typedef int (*cryptodev_init_t)(struct rte_cryptodev_driver *drv, - struct rte_cryptodev *dev); - -/** - * Finalisation function of a driver invoked for each matching - * PCI device detected during the PCI closing phase. - * - * @param drv The pointer to the [matching] driver structure supplied - * by the PMD when it registered itself. - * @param dev The dev pointer is the address of the *rte_cryptodev* - * structure associated with the matching device and which - * has been [automatically] allocated in the - * *rte_crypto_devices* array. - * - * * @return - * - 0: Success, the device is properly finalised by the driver. - * In particular, the driver MUST free the *dev_ops* pointer - * of the *dev* structure. - * - <0: Error code of the device initialisation failure. - */ -typedef int (*cryptodev_uninit_t)(const struct rte_cryptodev_driver *drv, - struct rte_cryptodev *dev); - -/** - * The structure associated with a PMD driver. - * - * Each driver acts as a PCI driver and is represented by a generic - * *crypto_driver* structure that holds: - * - * - An *rte_pci_driver* structure (which must be the first field). - * - * - The *cryptodev_init* function invoked for each matching PCI device. - * - * - The size of the private data to allocate for each matching device. - */ -struct rte_cryptodev_driver { - struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ - unsigned dev_private_size; /**< Size of device private data. */ - - cryptodev_init_t cryptodev_init; /**< Device init function. */ - cryptodev_uninit_t cryptodev_uninit; /**< Device uninit function. */ -}; - - /** Global structure used for maintaining state of allocated crypto devices */ struct rte_cryptodev_global { struct rte_cryptodev *devs; /**< Device information array */ @@ -282,12 +207,13 @@ typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev, * @param qp_id Queue Pair Index * @param qp_conf Queue configuration structure * @param socket_id Socket Index + * @param session_pool Pointer to device session mempool * * @return Returns 0 on success. */ typedef int (*cryptodev_queue_pair_setup_t)(struct rte_cryptodev *dev, uint16_t qp_id, const struct rte_cryptodev_qp_conf *qp_conf, - int socket_id); + int socket_id, struct rte_mempool *session_pool); /** * Release memory resources allocated by given queue pair. @@ -341,39 +267,32 @@ typedef unsigned (*cryptodev_sym_get_session_private_size_t)( struct rte_cryptodev *dev); /** - * Initialize a Crypto session on a device. + * Configure a Crypto session on a device. * * @param dev Crypto device pointer * @param xform Single or chain of crypto xforms * @param priv_sess Pointer to cryptodev's private session structure + * @param mp Mempool where the private session is allocated * * @return - * - Returns private session structure on success. - * - Returns NULL on failure. + * - Returns 0 if private session structure have been created successfully. + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if crypto device does not support the crypto transform. + * - Returns -ENOMEM if the private session could not be allocated. */ -typedef void (*cryptodev_sym_initialize_session_t)(struct rte_mempool *mempool, - void *session_private); +typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, + struct rte_crypto_sym_xform *xform, + struct rte_cryptodev_sym_session *session, + struct rte_mempool *mp); /** - * Configure a Crypto session on a device. + * Free driver private session data. * * @param dev Crypto device pointer - * @param xform Single or chain of crypto xforms - * @param priv_sess Pointer to cryptodev's private session structure - * - * @return - * - Returns private session structure on success. - * - Returns NULL on failure. - */ -typedef void * (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, - struct rte_crypto_sym_xform *xform, void *session_private); - -/** - * Free Crypto session. - * @param session Cryptodev session structure to free + * @param sess Cryptodev session structure */ typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev, - void *session_private); + struct rte_cryptodev_sym_session *sess); /** * Optional API for drivers to attach sessions with queue pair. @@ -428,8 +347,6 @@ struct rte_cryptodev_ops { cryptodev_sym_get_session_private_size_t session_get_size; /**< Return private session. */ - cryptodev_sym_initialize_session_t session_initialize; - /**< Initialization function for private session data */ cryptodev_sym_configure_session_t session_configure; /**< Configure a Crypto session. */ cryptodev_sym_free_session_t session_clear; @@ -456,23 +373,6 @@ struct rte_cryptodev * rte_cryptodev_pmd_allocate(const char *name, int socket_id); /** - * Creates a new virtual crypto device and returns the pointer - * to that device. - * - * @param name PMD type name - * @param dev_private_size Size of crypto PMDs private data - * @param socket_id Socket to allocate resources on. - * - * @return - * - Cryptodev pointer if device is successfully created. - * - NULL if device cannot be created. - */ -struct rte_cryptodev * -rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, - int socket_id); - - -/** * Function for internal use by dummy drivers primarily, e.g. ring-based * driver. * Release the specified cryptodev device. @@ -499,25 +399,25 @@ void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev, enum rte_cryptodev_event_type event); /** - * Wrapper for use by pci drivers as a .probe function to attach to a crypto - * interface. - */ -int rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev); - -/** - * Wrapper for use by pci drivers as a .remove function to detach a crypto - * interface. - */ -int rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev); - -/** * @internal * Create unique device name */ int rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix); +static inline void * +get_session_private_data(const struct rte_cryptodev_sym_session *sess, + uint8_t driver_id) { + return sess->sess_private_data[driver_id]; +} + +static inline void +set_session_private_data(struct rte_cryptodev_sym_session *sess, + uint8_t driver_id, void *private_data) +{ + sess->sess_private_data[driver_id] = private_data; +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_cryptodev/rte_cryptodev_vdev.h b/lib/librte_cryptodev/rte_cryptodev_vdev.h new file mode 100644 index 00000000..94ab9d33 --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev_vdev.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRYPTODEV_VDEV_H_ +#define _RTE_CRYPTODEV_VDEV_H_ + +#include <rte_vdev.h> +#include <inttypes.h> + +#include "rte_cryptodev.h" + +#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8 +#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048 + +#define RTE_CRYPTODEV_VDEV_NAME ("name") +#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs") +#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions") +#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id") + +static const char * const cryptodev_vdev_valid_params[] = { + RTE_CRYPTODEV_VDEV_NAME, + RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, + RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, + RTE_CRYPTODEV_VDEV_SOCKET_ID +}; + +/** + * @internal + * Initialisation parameters for virtual crypto devices + */ +struct rte_crypto_vdev_init_params { + unsigned int max_nb_queue_pairs; + unsigned int max_nb_sessions; + uint8_t socket_id; + char name[RTE_CRYPTODEV_NAME_MAX_LEN]; +}; + +/** + * @internal + * Creates a new virtual crypto device and returns the pointer + * to that device. + * + * @param name PMD type name + * @param dev_private_size Size of crypto PMDs private data + * @param socket_id Socket to allocate resources on. + * @param vdev Pointer to virtual device structure. + * + * @return + * - Cryptodev pointer if device is successfully created. + * - NULL if device cannot be created. + */ +struct rte_cryptodev * +rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size, + int socket_id, struct rte_vdev_device *vdev); + +/** + * @internal + * Parse virtual device initialisation parameters input arguments + * + * @params params Initialisation parameters with defaults set. + * @params input_args Command line arguments + * + * @return + * 0 on successful parse + * <0 on failure to parse + */ +int +rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params, + const char *input_args); + +#endif /* _RTE_CRYPTODEV_VDEV_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map index 9ac510ec..e9ba88ac 100644 --- a/lib/librte_cryptodev/rte_cryptodev_version.map +++ b/lib/librte_cryptodev/rte_cryptodev_version.map @@ -6,7 +6,6 @@ DPDK_16.04 { rte_cryptodev_callback_unregister; rte_cryptodev_close; rte_cryptodev_count; - rte_cryptodev_count_devtype; rte_cryptodev_configure; rte_cryptodev_create_vdev; rte_cryptodev_get_dev_id; @@ -15,7 +14,6 @@ DPDK_16.04 { rte_cryptodev_pmd_allocate; rte_cryptodev_pmd_callback_process; rte_cryptodev_pmd_release_device; - rte_cryptodev_pmd_virtual_dev_init; rte_cryptodev_sym_session_create; rte_cryptodev_sym_session_free; rte_cryptodev_socket_id; @@ -32,21 +30,6 @@ DPDK_16.04 { local: *; }; -DPDK_16.07 { - global: - - rte_cryptodev_parse_vdev_init_params; - -} DPDK_16.04; - -DPDK_16.11 { - global: - - rte_cryptodev_pci_probe; - rte_cryptodev_pci_remove; - -} DPDK_16.07; - DPDK_17.02 { global: @@ -63,7 +46,7 @@ DPDK_17.02 { rte_crypto_cipher_algorithm_strings; rte_crypto_cipher_operation_strings; -} DPDK_16.11; +} DPDK_16.04; DPDK_17.05 { global: @@ -74,3 +57,25 @@ DPDK_17.05 { rte_cryptodev_queue_pair_detach_sym_session; } DPDK_17.02; + +DPDK_17.08 { + global: + + rte_cryptodev_allocate_driver; + rte_cryptodev_device_count_by_driver; + rte_cryptodev_driver_id_get; + rte_cryptodev_driver_name_get; + rte_cryptodev_get_aead_algo_enum; + rte_cryptodev_get_header_session_size; + rte_cryptodev_get_private_session_size; + rte_cryptodev_pci_generic_probe; + rte_cryptodev_pci_generic_remove; + rte_cryptodev_sym_capability_check_aead; + rte_cryptodev_sym_session_init; + rte_cryptodev_sym_session_clear; + rte_cryptodev_vdev_parse_init_params; + rte_cryptodev_vdev_pmd_init; + rte_crypto_aead_algorithm_strings; + rte_crypto_aead_operation_strings; + +} DPDK_17.05; diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile index 3ffb911c..b417ee7b 100644 --- a/lib/librte_distributor/Makefile +++ b/lib/librte_distributor/Makefile @@ -46,10 +46,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor_v20.c SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor.c ifeq ($(CONFIG_RTE_ARCH_X86),y) SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c -# distributor SIMD algo needs SSE4.2 support -ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_2,$(CFLAGS)),) -CFLAGS_rte_distributor_match_sse.o += -msse4.2 -endif else SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_generic.c endif diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c index e4dfa7f0..20ba9ffb 100644 --- a/lib/librte_distributor/rte_distributor.c +++ b/lib/librte_distributor/rte_distributor.c @@ -41,7 +41,8 @@ #include <rte_errno.h> #include <rte_string_fns.h> #include <rte_eal_memconfig.h> -#include <rte_compat.h> +#include <rte_pause.h> + #include "rte_distributor_private.h" #include "rte_distributor.h" #include "rte_distributor_v20.h" @@ -656,12 +657,10 @@ rte_distributor_create_v1705(const char *name, d->num_workers = num_workers; d->alg_type = alg_type; + d->dist_match_fn = RTE_DIST_MATCH_SCALAR; #if defined(RTE_ARCH_X86) - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) - d->dist_match_fn = RTE_DIST_MATCH_VECTOR; - else + d->dist_match_fn = RTE_DIST_MATCH_VECTOR; #endif - d->dist_match_fn = RTE_DIST_MATCH_SCALAR; /* * Set up the backog tags so they're pointing at the second cache diff --git a/lib/librte_distributor/rte_distributor_v20.c b/lib/librte_distributor/rte_distributor_v20.c index bb6c5d70..b09abecd 100644 --- a/lib/librte_distributor/rte_distributor_v20.c +++ b/lib/librte_distributor/rte_distributor_v20.c @@ -41,6 +41,8 @@ #include <rte_compat.h> #include <rte_string_fns.h> #include <rte_eal_memconfig.h> +#include <rte_pause.h> + #include "rte_distributor_v20.h" #include "rte_distributor_private.h" diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c index da971deb..e8fb9087 100644 --- a/lib/librte_eal/bsdapp/contigmem/contigmem.c +++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c @@ -50,24 +50,37 @@ __FBSDID("$FreeBSD$"); #include <vm/vm.h> #include <vm/pmap.h> +#include <vm/vm_param.h> #include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_pager.h> +#include <vm/vm_phys.h> + +struct contigmem_buffer { + void *addr; + int refcnt; + struct mtx mtx; +}; + +struct contigmem_vm_handle { + int buffer_index; +}; static int contigmem_load(void); static int contigmem_unload(void); static int contigmem_physaddr(SYSCTL_HANDLER_ARGS); -static d_mmap_t contigmem_mmap; static d_mmap_single_t contigmem_mmap_single; static d_open_t contigmem_open; +static d_close_t contigmem_close; static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS; static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE; static eventhandler_tag contigmem_eh_tag; -static void *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS]; +static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS]; static struct cdev *contigmem_cdev = NULL; +static int contigmem_refcnt; TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers); TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size); @@ -78,6 +91,8 @@ SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD, &contigmem_num_buffers, 0, "Number of contigmem buffers allocated"); SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD, &contigmem_buffer_size, 0, "Size of each contiguous buffer"); +SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD, + &contigmem_refcnt, 0, "Number of references to contigmem"); static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0, "physaddr"); @@ -114,42 +129,49 @@ MODULE_VERSION(contigmem, 1); static struct cdevsw contigmem_ops = { .d_name = "contigmem", .d_version = D_VERSION, - .d_mmap = contigmem_mmap, + .d_flags = D_TRACKCLOSE, .d_mmap_single = contigmem_mmap_single, .d_open = contigmem_open, + .d_close = contigmem_close, }; static int contigmem_load() { char index_string[8], description[32]; - int i; + int i, error = 0; + void *addr; if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) { printf("%d buffers requested is greater than %d allowed\n", contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS); - return EINVAL; + error = EINVAL; + goto error; } if (contigmem_buffer_size < PAGE_SIZE || (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) { printf("buffer size 0x%lx is not greater than PAGE_SIZE and " "power of two\n", contigmem_buffer_size); - return EINVAL; + error = EINVAL; + goto error; } for (i = 0; i < contigmem_num_buffers; i++) { - contigmem_buffers[i] = - contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0, - BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); - - if (contigmem_buffers[i] == NULL) { + addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, + 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); + if (addr == NULL) { printf("contigmalloc failed for buffer %d\n", i); - return ENOMEM; + error = ENOMEM; + goto error; } - printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i], - (void *)pmap_kextract((vm_offset_t)contigmem_buffers[i])); + printf("%2u: virt=%p phys=%p\n", i, addr, + (void *)pmap_kextract((vm_offset_t)addr)); + + mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF); + contigmem_buffers[i].addr = addr; + contigmem_buffers[i].refcnt = 0; snprintf(index_string, sizeof(index_string), "%d", i); snprintf(description, sizeof(description), @@ -165,6 +187,17 @@ contigmem_load() GID_WHEEL, 0600, "contigmem"); return 0; + +error: + for (i = 0; i < contigmem_num_buffers; i++) { + if (contigmem_buffers[i].addr != NULL) + contigfree(contigmem_buffers[i].addr, + contigmem_buffer_size, M_CONTIGMEM); + if (mtx_initialized(&contigmem_buffers[i].mtx)) + mtx_destroy(&contigmem_buffers[i].mtx); + } + + return error; } static int @@ -172,16 +205,22 @@ contigmem_unload() { int i; + if (contigmem_refcnt > 0) + return EBUSY; + if (contigmem_cdev != NULL) destroy_dev(contigmem_cdev); if (contigmem_eh_tag != NULL) EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag); - for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) - if (contigmem_buffers[i] != NULL) - contigfree(contigmem_buffers[i], contigmem_buffer_size, - M_CONTIGMEM); + for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) { + if (contigmem_buffers[i].addr != NULL) + contigfree(contigmem_buffers[i].addr, + contigmem_buffer_size, M_CONTIGMEM); + if (mtx_initialized(&contigmem_buffers[i].mtx)) + mtx_destroy(&contigmem_buffers[i].mtx); + } return 0; } @@ -192,7 +231,7 @@ contigmem_physaddr(SYSCTL_HANDLER_ARGS) uint64_t physaddr; int index = (int)(uintptr_t)arg1; - physaddr = (uint64_t)vtophys(contigmem_buffers[index]); + physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr); return sysctl_handle_64(oidp, &physaddr, 0, req); } @@ -200,22 +239,121 @@ static int contigmem_open(struct cdev *cdev, int fflags, int devtype, struct thread *td) { + + atomic_add_int(&contigmem_refcnt, 1); + return 0; } static int -contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, - int prot, vm_memattr_t *memattr) +contigmem_close(struct cdev *cdev, int fflags, int devtype, + struct thread *td) { - *paddr = offset; + atomic_subtract_int(&contigmem_refcnt, 1); + return 0; } static int +contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + struct contigmem_vm_handle *vmh = handle; + struct contigmem_buffer *buf; + + buf = &contigmem_buffers[vmh->buffer_index]; + + atomic_add_int(&contigmem_refcnt, 1); + + mtx_lock(&buf->mtx); + if (buf->refcnt == 0) + memset(buf->addr, 0, contigmem_buffer_size); + buf->refcnt++; + mtx_unlock(&buf->mtx); + + return 0; +} + +static void +contigmem_cdev_pager_dtor(void *handle) +{ + struct contigmem_vm_handle *vmh = handle; + struct contigmem_buffer *buf; + + buf = &contigmem_buffers[vmh->buffer_index]; + + mtx_lock(&buf->mtx); + buf->refcnt--; + mtx_unlock(&buf->mtx); + + free(vmh, M_CONTIGMEM); + + atomic_subtract_int(&contigmem_refcnt, 1); +} + +static int +contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, + vm_page_t *mres) +{ + vm_paddr_t paddr; + vm_page_t m_paddr, page; + vm_memattr_t memattr, memattr1; + + memattr = object->memattr; + + VM_OBJECT_WUNLOCK(object); + + paddr = offset; + + m_paddr = vm_phys_paddr_to_vm_page(paddr); + if (m_paddr != NULL) { + memattr1 = pmap_page_get_memattr(m_paddr); + if (memattr1 != memattr) + memattr = memattr1; + } + + if (((*mres)->flags & PG_FICTITIOUS) != 0) { + /* + * If the passed in result page is a fake page, update it with + * the new physical address. + */ + page = *mres; + VM_OBJECT_WLOCK(object); + vm_page_updatefake(page, paddr, memattr); + } else { + vm_page_t mret; + /* + * Replace the passed in reqpage page with our own fake page and + * free up the original page. + */ + page = vm_page_getfake(paddr, memattr); + VM_OBJECT_WLOCK(object); + mret = vm_page_replace(page, object, (*mres)->pindex); + KASSERT(mret == *mres, + ("invalid page replacement, old=%p, ret=%p", *mres, mret)); + vm_page_lock(mret); + vm_page_free(mret); + vm_page_unlock(mret); + *mres = page; + } + + page->valid = VM_PAGE_BITS_ALL; + + return VM_PAGER_OK; +} + +static struct cdev_pager_ops contigmem_cdev_pager_ops = { + .cdev_pg_ctor = contigmem_cdev_pager_ctor, + .cdev_pg_dtor = contigmem_cdev_pager_dtor, + .cdev_pg_fault = contigmem_cdev_pager_fault, +}; + +static int contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **obj, int nprot) { + struct contigmem_vm_handle *vmh; uint64_t buffer_index; /* @@ -227,10 +365,17 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, if (buffer_index >= contigmem_num_buffers) return EINVAL; - memset(contigmem_buffers[buffer_index], 0, contigmem_buffer_size); - *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index]); - *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, - curthread->td_ucred); + if (size > contigmem_buffer_size) + return EINVAL; + + vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO); + if (vmh == NULL) + return ENOMEM; + vmh->buffer_index = buffer_index; + + *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr); + *obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops, + size, nprot, *offset, curthread->td_ucred); return 0; } diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index a0f99502..005019ed 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -48,7 +48,7 @@ LDLIBS += -lgcc_s EXPORT_MAP := rte_eal_version.map -LIBABIVER := 4 +LIBABIVER := 5 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 05f0c1f9..5fa59884 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -45,7 +45,6 @@ #include <stddef.h> #include <errno.h> #include <limits.h> -#include <errno.h> #include <sys/mman.h> #include <sys/queue.h> @@ -59,6 +58,7 @@ #include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> +#include <rte_service_component.h> #include <rte_log.h> #include <rte_random.h> #include <rte_cycles.h> @@ -69,7 +69,6 @@ #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> -#include <rte_common.h> #include <rte_version.h> #include <rte_atomic.h> #include <malloc_heap.h> @@ -615,6 +614,11 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); + if (eal_option_device_parse()) { + rte_errno = ENODEV; + return -1; + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -653,6 +657,14 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services so vdevs register service during bus_probe. */ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -660,6 +672,15 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default service/lcore mappings and start running. Ignore + * -ENOTSUP, as it indicates no service coremask passed to EAL. + */ + ret = rte_service_start_with_defaults(); + if (ret < 0 && ret != -ENOTSUP) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c index e321461d..04eacdcc 100644 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -41,7 +41,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> -#include <stdarg.h> #include <errno.h> #include <dirent.h> #include <limits.h> @@ -52,7 +51,6 @@ #include <dev/pci/pcireg.h> #if defined(RTE_ARCH_X86) -#include <sys/types.h> #include <machine/cpufunc.h> #endif @@ -282,8 +280,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) /* FreeBSD has no NUMA support (yet) */ dev->device.numa_node = 0; - rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name)); - dev->device.name = dev->name; + pci_name_set(dev); /* FreeBSD has only one pass through driver */ dev->kdrv = RTE_KDRV_NIC_UIO; @@ -334,6 +331,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); @@ -396,7 +394,7 @@ rte_pci_scan(void) close(fd); - RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count); + RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count); return 0; error: diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c index 1b8cd8a6..783d68c5 100644 --- a/lib/librte_eal/bsdapp/eal/eal_thread.c +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -49,7 +49,6 @@ #include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_lcore.h> #include "eal_private.h" diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 2e48a736..aac6fd77 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -193,3 +193,47 @@ DPDK_17.05 { vfio_get_group_no; } DPDK_17.02; + +DPDK_17.08 { + global: + + rte_bus_find; + rte_bus_find_by_device; + rte_bus_find_by_name; + rte_log_get_level; + +} DPDK_17.05; + +EXPERIMENTAL { + global: + + rte_eal_devargs_insert; + rte_eal_devargs_parse; + rte_eal_devargs_remove; + rte_eal_hotplug_add; + rte_eal_hotplug_remove; + rte_service_disable_on_lcore; + rte_service_dump; + rte_service_enable_on_lcore; + rte_service_get_by_id; + rte_service_get_by_name; + rte_service_get_count; + rte_service_get_enabled_on_lcore; + rte_service_is_running; + rte_service_lcore_add; + rte_service_lcore_count; + rte_service_lcore_del; + rte_service_lcore_list; + rte_service_lcore_reset_all; + rte_service_lcore_start; + rte_service_lcore_stop; + rte_service_probe_capability; + rte_service_register; + rte_service_reset; + rte_service_set_stats_enable; + rte_service_start; + rte_service_start_with_defaults; + rte_service_stop; + rte_service_unregister; + +} DPDK_17.08; diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index a5bd1089..e8fd67a2 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -41,10 +41,11 @@ INC += rte_eal_memconfig.h rte_malloc_heap.h INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h +INC += rte_service.h rte_service_component.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h -GENERIC_INC += rte_vect.h rte_io.h +GENERIC_INC += rte_vect.h rte_pause.h rte_io.h # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk ARCH_DIR ?= $(RTE_ARCH) diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c index 79160a60..5636e9c1 100644 --- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * Copyright(c) 2015 RehiveTech. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c index 8f9baf8b..08bec2d9 100644 --- a/lib/librte_eal/common/eal_common_bus.c +++ b/lib/librte_eal/common/eal_common_bus.c @@ -1,8 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 NXP - * All rights reserved. + * Copyright 2016 NXP. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -50,6 +49,9 @@ rte_bus_register(struct rte_bus *bus) /* A bus should mandatorily have the scan implemented */ RTE_VERIFY(bus->scan); RTE_VERIFY(bus->probe); + RTE_VERIFY(bus->find_device); + /* Buses supporting driver plug also require unplug. */ + RTE_VERIFY(!bus->plug || bus->unplug); TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); @@ -89,7 +91,7 @@ rte_bus_probe(void) struct rte_bus *bus, *vbus = NULL; TAILQ_FOREACH(bus, &rte_bus_list, next) { - if (!strcmp(bus->name, "virtual")) { + if (!strcmp(bus->name, "vdev")) { vbus = bus; continue; } @@ -145,3 +147,78 @@ rte_bus_dump(FILE *f) } } } + +struct rte_bus * +rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp, + const void *data) +{ + struct rte_bus *bus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + if (start && bus == start) { + start = NULL; /* starting point found */ + continue; + } + if (cmp(bus, data) == 0) + break; + } + return bus; +} + +static int +cmp_rte_device(const struct rte_device *dev1, const void *_dev2) +{ + const struct rte_device *dev2 = _dev2; + + return dev1 != dev2; +} + +static int +bus_find_device(const struct rte_bus *bus, const void *_dev) +{ + struct rte_device *dev; + + dev = bus->find_device(NULL, cmp_rte_device, _dev); + return dev == NULL; +} + +struct rte_bus * +rte_bus_find_by_device(const struct rte_device *dev) +{ + return rte_bus_find(NULL, bus_find_device, (const void *)dev); +} + +static int +cmp_bus_name(const struct rte_bus *bus, const void *_name) +{ + const char *name = _name; + + return strcmp(bus->name, name); +} + +struct rte_bus * +rte_bus_find_by_name(const char *busname) +{ + return rte_bus_find(NULL, cmp_bus_name, (const void *)busname); +} + +static int +bus_can_parse(const struct rte_bus *bus, const void *_name) +{ + const char *name = _name; + + return !(bus->parse && bus->parse(name, NULL) == 0); +} + +struct rte_bus * +rte_bus_find_by_device_name(const char *str) +{ + char name[RTE_DEV_NAME_MAX_LEN]; + char *c; + + snprintf(name, sizeof(name), "%s", str); + c = strchr(name, ','); + if (c != NULL) + c[0] = '\0'; + return rte_bus_find(NULL, bus_can_parse, name); +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index a400ddd0..e2512755 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -37,58 +37,210 @@ #include <inttypes.h> #include <sys/queue.h> +#include <rte_bus.h> #include <rte_dev.h> #include <rte_devargs.h> #include <rte_debug.h> -#include <rte_devargs.h> #include <rte_log.h> #include "eal_private.h" +static int cmp_detached_dev_name(const struct rte_device *dev, + const void *_name) +{ + const char *name = _name; + + /* skip attached devices */ + if (dev->driver != NULL) + return 1; + + return strcmp(dev->name, name); +} + +static int cmp_dev_name(const struct rte_device *dev, const void *_name) +{ + const char *name = _name; + + return strcmp(dev->name, name); +} + int rte_eal_dev_attach(const char *name, const char *devargs) { - struct rte_pci_addr addr; + struct rte_bus *bus; + int ret; if (name == NULL || devargs == NULL) { RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n"); return -EINVAL; } - if (eal_parse_pci_DomBDF(name, &addr) == 0) { - if (rte_pci_probe_one(&addr) < 0) - goto err; + bus = rte_bus_find_by_device_name(name); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Unable to find a bus for the device '%s'\n", + name); + return -EINVAL; + } + if (strcmp(bus->name, "pci") == 0) + return rte_eal_hotplug_add("pci", name, devargs); + if (strcmp(bus->name, "vdev") != 0) { + RTE_LOG(ERR, EAL, "Device attach is only supported for PCI and vdev devices.\n"); + return -ENOTSUP; + } - } else { - if (rte_vdev_init(name, devargs)) - goto err; + /* + * If we haven't found a bus device the user meant to "hotplug" a + * virtual device instead. + */ + ret = rte_vdev_init(name, devargs); + if (ret) + RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", + name); + return ret; +} + +int rte_eal_dev_detach(struct rte_device *dev) +{ + struct rte_bus *bus; + int ret; + + if (dev == NULL) { + RTE_LOG(ERR, EAL, "Invalid device provided.\n"); + return -EINVAL; } - return 0; + bus = rte_bus_find_by_device(dev); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n", + dev->name); + return -EINVAL; + } + + if (bus->unplug == NULL) { + RTE_LOG(ERR, EAL, "Bus function not supported\n"); + return -ENOTSUP; + } -err: - RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name); - return -EINVAL; + ret = bus->unplug(dev); + if (ret) + RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", + dev->name); + return ret; } -int rte_eal_dev_detach(const char *name) +static char * +full_dev_name(const char *bus, const char *dev, const char *args) { - struct rte_pci_addr addr; + char *name; + size_t len; + len = snprintf(NULL, 0, "%s:%s,%s", bus, dev, args) + 1; + name = calloc(1, len); if (name == NULL) { - RTE_LOG(ERR, EAL, "Invalid device provided.\n"); - return -EINVAL; + RTE_LOG(ERR, EAL, "Could not allocate full device name\n"); + return NULL; } + snprintf(name, len, "%s:%s,%s", bus, dev, args); + return name; +} - if (eal_parse_pci_DomBDF(name, &addr) == 0) { - if (rte_pci_detach(&addr) < 0) - goto err; - } else { - if (rte_vdev_uninit(name)) - goto err; +int rte_eal_hotplug_add(const char *busname, const char *devname, + const char *devargs) +{ + struct rte_bus *bus; + struct rte_device *dev; + struct rte_devargs *da; + char *name; + int ret; + + bus = rte_bus_find_by_name(busname); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname); + return -ENOENT; + } + + if (bus->plug == NULL) { + RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n", + bus->name); + return -ENOTSUP; + } + + name = full_dev_name(busname, devname, devargs); + if (name == NULL) + return -ENOMEM; + + da = calloc(1, sizeof(*da)); + if (da == NULL) { + ret = -ENOMEM; + goto err_name; + } + + ret = rte_eal_devargs_parse(name, da); + if (ret) + goto err_devarg; + + ret = rte_eal_devargs_insert(da); + if (ret) + goto err_devarg; + + ret = bus->scan(); + if (ret) + goto err_devarg; + + dev = bus->find_device(NULL, cmp_detached_dev_name, devname); + if (dev == NULL) { + RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n", + devname); + ret = -ENODEV; + goto err_devarg; + } + + ret = bus->plug(dev); + if (ret) { + RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", + dev->name); + goto err_devarg; } + free(name); return 0; -err: - RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name); - return -EINVAL; +err_devarg: + if (rte_eal_devargs_remove(busname, devname)) { + free(da->args); + free(da); + } +err_name: + free(name); + return ret; +} + +int rte_eal_hotplug_remove(const char *busname, const char *devname) +{ + struct rte_bus *bus; + struct rte_device *dev; + int ret; + + bus = rte_bus_find_by_name(busname); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname); + return -ENOENT; + } + + if (bus->unplug == NULL) { + RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n", + bus->name); + return -ENOTSUP; + } + + dev = bus->find_device(NULL, cmp_dev_name, devname); + if (dev == NULL) { + RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname); + return -EINVAL; + } + + ret = bus->unplug(dev); + if (ret) + RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", + dev->name); + rte_eal_devargs_remove(busname, devname); + return ret; } diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index ffa8ad96..6ac88d6a 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -40,8 +40,9 @@ #include <stdio.h> #include <string.h> -#include <rte_pci.h> +#include <rte_dev.h> #include <rte_devargs.h> +#include <rte_tailq.h> #include "eal_private.h" /** Global list of user devices */ @@ -78,50 +79,107 @@ rte_eal_parse_devargs_str(const char *devargs_str, return 0; } +static int +bus_name_cmp(const struct rte_bus *bus, const void *name) +{ + return strncmp(bus->name, name, strlen(bus->name)); +} + +int +rte_eal_devargs_parse(const char *dev, struct rte_devargs *da) +{ + struct rte_bus *bus = NULL; + const char *devname; + const size_t maxlen = sizeof(da->name); + size_t i; + + if (dev == NULL || da == NULL) + return -EINVAL; + /* Retrieve eventual bus info */ + do { + devname = dev; + bus = rte_bus_find(bus, bus_name_cmp, dev); + if (bus == NULL) + break; + devname = dev + strlen(bus->name) + 1; + if (rte_bus_find_by_device_name(devname) == bus) + break; + } while (1); + /* Store device name */ + i = 0; + while (devname[i] != '\0' && devname[i] != ',') { + da->name[i] = devname[i]; + i++; + if (i == maxlen) { + fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n", + dev, maxlen); + da->name[i - 1] = '\0'; + return -EINVAL; + } + } + da->name[i] = '\0'; + if (bus == NULL) { + bus = rte_bus_find_by_device_name(da->name); + if (bus == NULL) { + fprintf(stderr, "ERROR: failed to parse device \"%s\"\n", + da->name); + return -EFAULT; + } + } + da->bus = bus; + /* Parse eventual device arguments */ + if (devname[i] == ',') + da->args = strdup(&devname[i + 1]); + else + da->args = strdup(""); + if (da->args == NULL) { + fprintf(stderr, "ERROR: not enough memory to parse arguments\n"); + return -ENOMEM; + } + return 0; +} + +int +rte_eal_devargs_insert(struct rte_devargs *da) +{ + int ret; + + ret = rte_eal_devargs_remove(da->bus->name, da->name); + if (ret < 0) + return ret; + TAILQ_INSERT_TAIL(&devargs_list, da, next); + return 0; +} + /* store a whitelist parameter for later parsing */ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) { struct rte_devargs *devargs = NULL; - char *buf = NULL; - int ret; + struct rte_bus *bus = NULL; + const char *dev = devargs_str; - /* use malloc instead of rte_malloc as it's called early at init */ - devargs = malloc(sizeof(*devargs)); + /* use calloc instead of rte_zmalloc as it's called early at init */ + devargs = calloc(1, sizeof(*devargs)); if (devargs == NULL) goto fail; - memset(devargs, 0, sizeof(*devargs)); - devargs->type = devtype; - - if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args)) + if (rte_eal_devargs_parse(dev, devargs)) goto fail; - - switch (devargs->type) { - case RTE_DEVTYPE_WHITELISTED_PCI: - case RTE_DEVTYPE_BLACKLISTED_PCI: - /* try to parse pci identifier */ - if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 && - eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0) - goto fail; - - break; - case RTE_DEVTYPE_VIRTUAL: - /* save driver name */ - ret = snprintf(devargs->virt.drv_name, - sizeof(devargs->virt.drv_name), "%s", buf); - if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) - goto fail; - - break; + devargs->type = devtype; + bus = devargs->bus; + if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) + devargs->policy = RTE_DEV_BLACKLISTED; + if (bus->conf.scan_mode == RTE_BUS_SCAN_UNDEFINED) { + if (devargs->policy == RTE_DEV_WHITELISTED) + bus->conf.scan_mode = RTE_BUS_SCAN_WHITELIST; + else if (devargs->policy == RTE_DEV_BLACKLISTED) + bus->conf.scan_mode = RTE_BUS_SCAN_BLACKLIST; } - - free(buf); TAILQ_INSERT_TAIL(&devargs_list, devargs, next); return 0; fail: - free(buf); if (devargs) { free(devargs->args); free(devargs); @@ -130,6 +188,24 @@ fail: return -1; } +int +rte_eal_devargs_remove(const char *busname, const char *devname) +{ + struct rte_devargs *d; + void *tmp; + + TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) { + if (strcmp(d->bus->name, busname) == 0 && + strcmp(d->name, devname) == 0) { + TAILQ_REMOVE(&devargs_list, d, next); + free(d->args); + free(d); + return 0; + } + } + return 1; +} + /* count the number of devices of a specified type */ unsigned int rte_eal_devargs_type_count(enum rte_devtype devtype) @@ -151,27 +227,10 @@ rte_eal_devargs_dump(FILE *f) { struct rte_devargs *devargs; - fprintf(f, "User device white list:\n"); + fprintf(f, "User device list:\n"); TAILQ_FOREACH(devargs, &devargs_list, next) { - if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) - fprintf(f, " PCI whitelist " PCI_PRI_FMT " %s\n", - devargs->pci.addr.domain, - devargs->pci.addr.bus, - devargs->pci.addr.devid, - devargs->pci.addr.function, - devargs->args); - else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) - fprintf(f, " PCI blacklist " PCI_PRI_FMT " %s\n", - devargs->pci.addr.domain, - devargs->pci.addr.bus, - devargs->pci.addr.devid, - devargs->pci.addr.function, - devargs->args); - else if (devargs->type == RTE_DEVTYPE_VIRTUAL) - fprintf(f, " VIRTUAL %s %s\n", - devargs->virt.drv_name, - devargs->args); - else - fprintf(f, " UNKNOWN %s\n", devargs->args); + fprintf(f, " [%s]: %s %s\n", + (devargs->bus ? devargs->bus->name : "??"), + devargs->name, devargs->args); } } diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c index 229c3a03..137c191d 100644 --- a/lib/librte_eal/common/eal_common_launch.c +++ b/lib/librte_eal/common/eal_common_launch.c @@ -41,6 +41,7 @@ #include <rte_memzone.h> #include <rte_eal.h> #include <rte_atomic.h> +#include <rte_pause.h> #include <rte_per_lcore.h> #include <rte_lcore.h> @@ -54,7 +55,8 @@ rte_eal_wait_lcore(unsigned slave_id) return 0; while (lcore_config[slave_id].state != WAIT && - lcore_config[slave_id].state != FINISHED); + lcore_config[slave_id].state != FINISHED) + rte_pause(); rte_rmb(); diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index 84fa0cb5..0db1555b 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -81,6 +81,7 @@ rte_eal_cpu_init(void) /* By default, each detected core is enabled */ config->lcore_role[lcore_id] = ROLE_RTE; + lcore_config[lcore_id].core_role = ROLE_RTE; lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) { diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index ddf65b7f..0e3b9320 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -112,6 +112,15 @@ rte_get_log_level(void) return rte_log_get_global_level(); } +int +rte_log_get_level(uint32_t type) +{ + if (type >= rte_logs.dynamic_types_len) + return -1; + + return rte_logs.dynamic_types[type].loglevel; +} + /* Set global log type */ __rte_deprecated void rte_set_log_type(uint32_t type, int enable) @@ -173,13 +182,13 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level) return 0; } -/* get the current loglevel for the message beeing processed */ +/* get the current loglevel for the message being processed */ int rte_log_cur_msg_loglevel(void) { return RTE_PER_LCORE(log_cur_msg).loglevel; } -/* get the current logtype for the message beeing processed */ +/* get the current logtype for the message being processed */ int rte_log_cur_msg_logtype(void) { return RTE_PER_LCORE(log_cur_msg).logtype; diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 6155752e..996877ef 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -35,7 +35,9 @@ #include <stdint.h> #include <stdlib.h> #include <stdarg.h> +#include <unistd.h> #include <inttypes.h> +#include <sys/mman.h> #include <sys/queue.h> #include <rte_memory.h> @@ -135,6 +137,16 @@ rte_eal_memdevice_init(void) return 0; } +/* Lock page in physical memory and prevent from swapping. */ +int +rte_mem_lock_page(const void *virt) +{ + unsigned long virtual = (unsigned long)virt; + int page_size = getpagesize(); + unsigned long aligned = (virtual & ~(page_size - 1)); + return mlock((void *)aligned, page_size); +} + /* init memory subsystem */ int rte_eal_memory_init(void) diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 64f4e0ad..3026e36b 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -189,7 +189,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } - if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) { + if ((socket_id != SOCKET_ID_ANY) && + (socket_id >= RTE_MAX_NUMA_NODES || socket_id < 0)) { rte_errno = EINVAL; return NULL; } diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index f470195f..1da185e5 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -47,6 +47,7 @@ #include <rte_eal.h> #include <rte_log.h> #include <rte_lcore.h> +#include <rte_tailq.h> #include <rte_version.h> #include <rte_devargs.h> #include <rte_memcpy.h> @@ -61,9 +62,11 @@ const char eal_short_options[] = "b:" /* pci-blacklist */ "c:" /* coremask */ + "s:" /* service coremask */ "d:" /* driver */ "h" /* help */ "l:" /* corelist */ + "S:" /* service corelist */ "m:" /* memory size */ "n:" /* memory channels */ "r:" /* memory ranks */ @@ -123,11 +126,67 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH; static const char dpdk_solib_path[] __attribute__((used)) = "DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH; +TAILQ_HEAD(device_option_list, device_option); + +struct device_option { + TAILQ_ENTRY(device_option) next; + + enum rte_devtype type; + char arg[]; +}; + +static struct device_option_list devopt_list = +TAILQ_HEAD_INITIALIZER(devopt_list); static int master_lcore_parsed; static int mem_parsed; static int core_parsed; +static int +eal_option_device_add(enum rte_devtype type, const char *optarg) +{ + struct device_option *devopt; + size_t optlen; + int ret; + + optlen = strlen(optarg) + 1; + devopt = calloc(1, sizeof(*devopt) + optlen); + if (devopt == NULL) { + RTE_LOG(ERR, EAL, "Unable to allocate device option\n"); + return -ENOMEM; + } + + devopt->type = type; + ret = snprintf(devopt->arg, optlen, "%s", optarg); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Unable to copy device option\n"); + free(devopt); + return -EINVAL; + } + TAILQ_INSERT_TAIL(&devopt_list, devopt, next); + return 0; +} + +int +eal_option_device_parse(void) +{ + struct device_option *devopt; + void *tmp; + int ret = 0; + + TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) { + if (ret == 0) { + ret = rte_eal_devargs_add(devopt->type, devopt->arg); + if (ret) + RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n", + devopt->arg); + } + TAILQ_REMOVE(&devopt_list, devopt, next); + free(devopt); + } + return ret; +} + void eal_reset_internal_config(struct internal_config *internal_cfg) { @@ -267,6 +326,77 @@ static int xdigit2val(unsigned char c) } static int +eal_parse_service_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned int count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . + * Remove 0x/0X if exists. + */ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; + j++, idx++) { + if ((1 << j) & val) { + /* handle master lcore already parsed */ + uint32_t lcore = idx; + if (master_lcore_parsed && + cfg->master_lcore == lcore) { + RTE_LOG(ERR, EAL, + "Error: lcore %u is master lcore, cannot use as service core\n", + idx); + return -1; + } + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, + "lcore %u unavailable\n", idx); + return -1; + } + lcore_config[idx].core_role = ROLE_SERVICE; + count++; + } + } + } + + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + + for (; idx < RTE_MAX_LCORE; idx++) + lcore_config[idx].core_index = -1; + + if (count == 0) + return -1; + + cfg->service_lcore_count = count; + return 0; +} + +static int eal_parse_coremask(const char *coremask) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -330,6 +460,72 @@ eal_parse_coremask(const char *coremask) } static int +eal_parse_service_corelist(const char *corelist) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, idx = 0; + unsigned count = 0; + char *end = NULL; + int min, max; + + if (corelist == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*corelist)) + corelist++; + i = strlen(corelist); + while ((i > 0) && isblank(corelist[i - 1])) + i--; + + /* Get list of cores */ + min = RTE_MAX_LCORE; + do { + while (isblank(*corelist)) + corelist++; + if (*corelist == '\0') + return -1; + errno = 0; + idx = strtoul(corelist, &end, 10); + if (errno || end == NULL) + return -1; + while (isblank(*end)) + end++; + if (*end == '-') { + min = idx; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = min; idx <= max; idx++) { + if (cfg->lcore_role[idx] != ROLE_SERVICE) { + /* handle master lcore already parsed */ + uint32_t lcore = idx; + if (cfg->master_lcore == lcore && + master_lcore_parsed) { + RTE_LOG(ERR, EAL, + "Error: lcore %u is master lcore, cannot use as service core\n", + idx); + return -1; + } + lcore_config[idx].core_role = + ROLE_SERVICE; + count++; + } + } + min = RTE_MAX_LCORE; + } else + return -1; + corelist = end + 1; + } while (*end != '\0'); + + if (count == 0) + return -1; + + return 0; +} + +static int eal_parse_corelist(const char *corelist) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -409,6 +605,13 @@ eal_parse_master_lcore(const char *arg) if (cfg->master_lcore >= RTE_MAX_LCORE) return -1; master_lcore_parsed = 1; + + /* ensure master core is not used as service core */ + if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) { + RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n"); + return -1; + } + return 0; } @@ -795,20 +998,29 @@ int eal_parse_common_option(int opt, const char *optarg, struct internal_config *conf) { + static int b_used; + static int w_used; + switch (opt) { /* blacklist */ case 'b': - if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, + if (w_used) + goto bw_used; + if (eal_option_device_add(RTE_DEVTYPE_BLACKLISTED_PCI, optarg) < 0) { return -1; } + b_used = 1; break; /* whitelist */ case 'w': - if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI, + if (b_used) + goto bw_used; + if (eal_option_device_add(RTE_DEVTYPE_WHITELISTED_PCI, optarg) < 0) { return -1; } + w_used = 1; break; /* coremask */ case 'c': @@ -826,6 +1038,20 @@ eal_parse_common_option(int opt, const char *optarg, } core_parsed = 1; break; + /* service coremask */ + case 's': + if (eal_parse_service_coremask(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid service coremask\n"); + return -1; + } + break; + /* service corelist */ + case 'S': + if (eal_parse_service_corelist(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid service core list\n"); + return -1; + } + break; /* size of memory */ case 'm': conf->memory = atoi(optarg); @@ -901,7 +1127,7 @@ eal_parse_common_option(int opt, const char *optarg, break; case OPT_VDEV_NUM: - if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL, + if (eal_option_device_add(RTE_DEVTYPE_VIRTUAL, optarg) < 0) { return -1; } @@ -940,6 +1166,10 @@ eal_parse_common_option(int opt, const char *optarg, } return 0; +bw_used: + RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) " + "cannot be used at the same time\n"); + return -1; } static void @@ -978,8 +1208,10 @@ eal_adjust_config(struct internal_config *internal_cfg) internal_config.process_type = eal_proc_type_detect(); /* default master lcore is the first one */ - if (!master_lcore_parsed) + if (!master_lcore_parsed) { cfg->master_lcore = rte_get_next_lcore(-1, 0, 0); + lcore_config[cfg->master_lcore].core_role = ROLE_RTE; + } /* if no memory amounts were requested, this will result in 0 and * will be overridden later, right after eal_hugepage_info_init() */ @@ -1025,13 +1257,6 @@ eal_check_common_options(struct internal_config *internal_cfg) return -1; } - if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 && - rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) { - RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) " - "cannot be used at the same time\n"); - return -1; - } - return 0; } @@ -1052,6 +1277,7 @@ eal_common_usage(void) " ',' is used for single number separator.\n" " '( )' can be omitted for single element group,\n" " '@' can be omitted if cpus and lcores have the same value\n" + " -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n" " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n" " -n CHANNELS Number of memory channels\n" " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c index b7499913..52fd38cd 100644 --- a/lib/librte_eal/common/eal_common_pci.c +++ b/lib/librte_eal/common/eal_common_pci.c @@ -2,6 +2,7 @@ * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2013-2014 6WIND S.A. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,36 +31,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* BSD LICENSE - * - * Copyright 2013-2014 6WIND S.A. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #include <string.h> #include <inttypes.h> @@ -102,17 +73,43 @@ const char *pci_get_sysfs_path(void) static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) { struct rte_devargs *devargs; + struct rte_pci_addr addr; + struct rte_bus *pbus; + pbus = rte_bus_find_by_name("pci"); TAILQ_FOREACH(devargs, &devargs_list, next) { - if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI && - devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) + if (devargs->bus != pbus) continue; - if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr)) + devargs->bus->parse(devargs->name, &addr); + if (!rte_eal_compare_pci_addr(&dev->addr, &addr)) return devargs; } return NULL; } +void +pci_name_set(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + + /* Each device has its internal, canonical name set. */ + rte_pci_device_name(&dev->addr, + dev->name, sizeof(dev->name)); + devargs = pci_devargs_lookup(dev); + dev->device.devargs = devargs; + /* In blacklist mode, if the device is not blacklisted, no + * rte_devargs exists for it. + */ + if (devargs != NULL) + /* If an rte_devargs exists, the generic rte_device uses the + * given name as its namea + */ + dev->device.name = dev->device.devargs->name; + else + /* Otherwise, it uses the internal, canonical form. */ + dev->device.name = dev->name; +} + /* map a particular resource from a file */ void * pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, @@ -212,12 +209,9 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, loc = &dev->addr; /* The device is not blacklisted; Check if driver supports it */ - if (!rte_pci_match(dr, dev)) { + if (!rte_pci_match(dr, dev)) /* Match of device and driver failed */ - RTE_LOG(DEBUG, EAL, "Driver (%s) doesn't match the device\n", - dr->driver.name); return 1; - } RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, loc->function, @@ -225,13 +219,18 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, /* no initialization when blacklisted, return without error */ if (dev->device.devargs != NULL && - dev->device.devargs->type == - RTE_DEVTYPE_BLACKLISTED_PCI) { + dev->device.devargs->policy == + RTE_DEV_BLACKLISTED) { RTE_LOG(INFO, EAL, " Device is blacklisted, not" " initializing\n"); return 1; } + if (dev->device.numa_node < 0) { + RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); + dev->device.numa_node = 0; + } + RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, dev->id.device_id, dr->driver.name); @@ -250,7 +249,13 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, ret = dr->probe(dr, dev); if (ret) { dev->driver = NULL; - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + dev->device.driver = NULL; + if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && + /* Don't unmap if device is unsupported and + * driver needs mapped resources. + */ + !(ret > 0 && + (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) rte_pci_unmap_device(dev); } @@ -326,7 +331,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev) /* * Find the pci device specified by pci address, then invoke probe function of - * the driver of the devive. + * the driver of the device. */ int rte_pci_probe_one(const struct rte_pci_addr *addr) @@ -413,22 +418,18 @@ rte_pci_probe(void) int probe_all = 0; int ret = 0; - if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0) + if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST) probe_all = 1; FOREACH_DEVICE_ON_PCIBUS(dev) { probed++; - /* set devargs in PCI structure */ - devargs = pci_devargs_lookup(dev); - if (devargs != NULL) - dev->device.devargs = devargs; - + devargs = dev->device.devargs; /* probe all or only whitelisted devices */ if (probe_all) ret = pci_probe_all_drivers(dev); else if (devargs != NULL && - devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + devargs->policy == RTE_DEV_WHITELISTED) ret = pci_probe_all_drivers(dev); if (ret < 0) { RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT @@ -474,6 +475,20 @@ rte_pci_dump(FILE *f) } } +static int +pci_parse(const char *name, void *addr) +{ + struct rte_pci_addr *out = addr; + struct rte_pci_addr pci_addr; + bool parse; + + parse = (eal_parse_pci_BDF(name, &pci_addr) == 0 || + eal_parse_pci_DomBDF(name, &pci_addr) == 0); + if (parse && addr != NULL) + *out = pci_addr; + return parse == false; +} + /* register a driver */ void rte_pci_register(struct rte_pci_driver *driver) @@ -512,13 +527,54 @@ rte_pci_remove_device(struct rte_pci_device *pci_dev) TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); } +static struct rte_device * +pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + struct rte_pci_device *dev; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (start && &dev->device == start) { + start = NULL; /* starting point found */ + continue; + } + if (cmp(&dev->device, data) == 0) + return &dev->device; + } + + return NULL; +} + +static int +pci_plug(struct rte_device *dev) +{ + return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); +} + +static int +pci_unplug(struct rte_device *dev) +{ + struct rte_pci_device *pdev; + int ret; + + pdev = RTE_DEV_TO_PCI(dev); + ret = rte_pci_detach_dev(pdev); + rte_pci_remove_device(pdev); + free(pdev); + return ret; +} + struct rte_pci_bus rte_pci_bus = { .bus = { .scan = rte_pci_scan, .probe = rte_pci_probe, + .find_device = pci_find_device, + .plug = pci_plug, + .unplug = pci_unplug, + .parse = pci_parse, }, .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), }; -RTE_REGISTER_BUS(PCI_BUS_NAME, rte_pci_bus.bus); +RTE_REGISTER_BUS(pci, rte_pci_bus.bus); diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index 12e0fcac..60526cad 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -46,10 +46,10 @@ rte_eal_primary_proc_alive(const char *config_file_path) if (config_file_path) config_fd = open(config_file_path, O_RDONLY); else { - char default_path[PATH_MAX+1]; - snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT, - default_config_dir, "rte"); - config_fd = open(default_path, O_RDONLY); + const char *path; + + path = eal_runtime_config_path(); + config_fd = open(path, O_RDONLY); } if (config_fd < 0) return 0; diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c index 4f698288..55955f9e 100644 --- a/lib/librte_eal/common/eal_common_tailqs.c +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -46,7 +46,6 @@ #include <rte_eal_memconfig.h> #include <rte_per_lcore.h> #include <rte_lcore.h> -#include <rte_memory.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> #include <rte_log.h> diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c index 72656176..ed0b16d0 100644 --- a/lib/librte_eal/common/eal_common_timer.c +++ b/lib/librte_eal/common/eal_common_timer.c @@ -41,6 +41,7 @@ #include <rte_common.h> #include <rte_log.h> #include <rte_cycles.h> +#include <rte_pause.h> #include "eal_private.h" diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c index 0037a641..f7e547a6 100644 --- a/lib/librte_eal/common/eal_common_vdev.c +++ b/lib/librte_eal/common/eal_common_vdev.c @@ -35,14 +35,20 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <stdbool.h> #include <sys/queue.h> #include <rte_eal.h> +#include <rte_dev.h> #include <rte_bus.h> #include <rte_vdev.h> #include <rte_common.h> #include <rte_devargs.h> #include <rte_memory.h> +#include <rte_errno.h> + +/* Forward declare to access virtual bus name */ +static struct rte_bus rte_vdev_bus; /** Double linked list of virtual device drivers. */ TAILQ_HEAD(vdev_device_list, rte_vdev_device); @@ -52,14 +58,10 @@ static struct vdev_device_list vdev_device_list = struct vdev_driver_list vdev_driver_list = TAILQ_HEAD_INITIALIZER(vdev_driver_list); -static void rte_vdev_bus_register(void); - /* register a driver */ void rte_vdev_register(struct rte_vdev_driver *driver) { - rte_vdev_bus_register(); - TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); } @@ -70,84 +72,45 @@ rte_vdev_unregister(struct rte_vdev_driver *driver) TAILQ_REMOVE(&vdev_driver_list, driver, next); } -/* - * Parse "driver" devargs without adding a dependency on rte_kvargs.h - */ -static char *parse_driver_arg(const char *args) +static int +vdev_parse(const char *name, void *addr) { - const char *c; - char *str; - - if (!args || args[0] == '\0') - return NULL; + struct rte_vdev_driver **out = addr; + struct rte_vdev_driver *driver = NULL; - c = args; - - do { - if (strncmp(c, "driver=", 7) == 0) { - c += 7; + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + if (strncmp(driver->driver.name, name, + strlen(driver->driver.name)) == 0) break; - } - - c = strchr(c, ','); - if (c) - c++; - } while (c); - - if (c) - str = strdup(c); - else - str = NULL; - - return str; + if (driver->driver.alias && + strncmp(driver->driver.alias, name, + strlen(driver->driver.alias)) == 0) + break; + } + if (driver != NULL && + addr != NULL) + *out = driver; + return driver == NULL; } static int vdev_probe_all_drivers(struct rte_vdev_device *dev) { const char *name; - char *drv_name; struct rte_vdev_driver *driver; - int ret = 1; + int ret; - drv_name = parse_driver_arg(rte_vdev_device_args(dev)); - name = drv_name ? drv_name : rte_vdev_device_name(dev); + name = rte_vdev_device_name(dev); RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name, rte_vdev_device_name(dev)); - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "net_pcap", but "name" will be "net_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->driver.name, name, - strlen(driver->driver.name))) { - dev->device.driver = &driver->driver; - ret = driver->probe(dev); - if (ret) - dev->device.driver = NULL; - goto out; - } - } - - /* Give new names precedence over aliases. */ - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - if (driver->driver.alias && - !strncmp(driver->driver.alias, name, - strlen(driver->driver.alias))) { - dev->device.driver = &driver->driver; - ret = driver->probe(dev); - if (ret) - dev->device.driver = NULL; - break; - } - } - -out: - free(drv_name); + if (vdev_parse(name, &driver)) + return -1; + dev->device.driver = &driver->driver; + ret = driver->probe(dev); + if (ret) + dev->device.driver = NULL; return ret; } @@ -178,13 +141,14 @@ alloc_devargs(const char *name, const char *args) if (!devargs) return NULL; - devargs->type = RTE_DEVTYPE_VIRTUAL; + devargs->bus = &rte_vdev_bus; if (args) devargs->args = strdup(args); + else + devargs->args = strdup(""); - ret = snprintf(devargs->virt.drv_name, - sizeof(devargs->virt.drv_name), "%s", name); - if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) { + ret = snprintf(devargs->name, sizeof(devargs->name), "%s", name); + if (ret < 0 || ret >= (int)sizeof(devargs->name)) { free(devargs->args); free(devargs); return NULL; @@ -219,7 +183,7 @@ rte_vdev_init(const char *name, const char *args) dev->device.devargs = devargs; dev->device.numa_node = SOCKET_ID_ANY; - dev->device.name = devargs->virt.drv_name; + dev->device.name = devargs->name; ret = vdev_probe_all_drivers(dev); if (ret) { @@ -293,13 +257,12 @@ vdev_scan(void) struct rte_devargs *devargs; /* for virtual devices we scan the devargs_list populated via cmdline */ - TAILQ_FOREACH(devargs, &devargs_list, next) { - if (devargs->type != RTE_DEVTYPE_VIRTUAL) + if (devargs->bus != &rte_vdev_bus) continue; - dev = find_vdev(devargs->virt.drv_name); + dev = find_vdev(devargs->name); if (dev) continue; @@ -309,7 +272,7 @@ vdev_scan(void) dev->device.devargs = devargs; dev->device.numa_node = SOCKET_ID_ANY; - dev->device.name = devargs->virt.drv_name; + dev->device.name = devargs->name; TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); } @@ -338,21 +301,42 @@ vdev_probe(void) return 0; } -static struct rte_bus rte_vdev_bus = { - .scan = vdev_scan, - .probe = vdev_probe, -}; +static struct rte_device * +vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + struct rte_vdev_device *dev; -RTE_INIT(rte_vdev_bus_register); + TAILQ_FOREACH(dev, &vdev_device_list, next) { + if (start && &dev->device == start) { + start = NULL; + continue; + } + if (cmp(&dev->device, data) == 0) + return &dev->device; + } + return NULL; +} -static void rte_vdev_bus_register(void) +static int +vdev_plug(struct rte_device *dev) { - static int registered; - - if (registered) - return; + return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev)); +} - registered = 1; - rte_vdev_bus.name = RTE_STR(virtual); - rte_bus_register(&rte_vdev_bus); +static int +vdev_unplug(struct rte_device *dev) +{ + return rte_vdev_uninit(dev->name); } + +static struct rte_bus rte_vdev_bus = { + .scan = vdev_scan, + .probe = vdev_probe, + .find_device = vdev_find_device, + .plug = vdev_plug, + .unplug = vdev_unplug, + .parse = vdev_parse, +}; + +RTE_REGISTER_BUS(vdev, rte_vdev_bus); diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index a881c62e..439a2610 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -91,6 +91,7 @@ extern const struct option eal_long_options[]; int eal_parse_common_option(int opt, const char *argv, struct internal_config *conf); +int eal_option_device_parse(void); int eal_adjust_config(struct internal_config *internal_cfg); int eal_check_common_options(struct internal_config *internal_cfg); void eal_common_usage(void); diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 6cacce07..597d82e4 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -113,6 +113,11 @@ struct rte_pci_driver; struct rte_pci_device; /** + * Find the name of a PCI device. + */ +void pci_name_set(struct rte_pci_device *dev); + +/** * Add a PCI device to the PCI Bus (append to PCI Device list). This function * also updates the bus references of the PCI Device (and the generic device * object embedded within. @@ -338,4 +343,16 @@ int rte_eal_hugepage_attach(void); */ bool rte_eal_using_phys_addrs(void); +/** + * Find a bus capable of identifying a device. + * + * @param str + * A device identifier (PCI address, virtual PMD name, ...). + * + * @return + * A valid bus handle if found. + * NULL if no bus is able to parse this device. + */ +struct rte_bus *rte_bus_find_by_device_name(const char *str); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h index dc3a0f3b..0b70d620 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h index 1b312b30..0a29f4bb 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h +++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h @@ -52,7 +52,7 @@ static inline uint16_t rte_arch_bswap16(uint16_t _x) { register uint16_t x = _x; - asm volatile ("rev16 %0,%1" + asm volatile ("rev16 %w0,%w1" : "=r" (x) : "r" (x) ); diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h index 49aead92..5425f4c7 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h index 867a9468..15457691 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/arm/rte_io.h b/lib/librte_eal/common/include/arch/arm/rte_io.h index 9593b424..3b63ec85 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_io.h +++ b/lib/librte_eal/common/include/arch/arm/rte_io.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright(c) 2016 Cavium networks. All rights reserved. + * Copyright(c) 2016 Cavium, Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/arm/rte_io_64.h b/lib/librte_eal/common/include/arch/arm/rte_io_64.h index 0402125b..ee9b8d55 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_io_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_io_64.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2016. + * Copyright (C) Cavium, Inc. 2016. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -44,7 +44,7 @@ extern "C" { #include "generic/rte_io.h" #include "rte_atomic_64.h" -static inline uint8_t __attribute__((always_inline)) +static __rte_always_inline uint8_t rte_read8_relaxed(const volatile void *addr) { uint8_t val; @@ -56,7 +56,7 @@ rte_read8_relaxed(const volatile void *addr) return val; } -static inline uint16_t __attribute__((always_inline)) +static __rte_always_inline uint16_t rte_read16_relaxed(const volatile void *addr) { uint16_t val; @@ -68,7 +68,7 @@ rte_read16_relaxed(const volatile void *addr) return val; } -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t rte_read32_relaxed(const volatile void *addr) { uint32_t val; @@ -80,7 +80,7 @@ rte_read32_relaxed(const volatile void *addr) return val; } -static inline uint64_t __attribute__((always_inline)) +static __rte_always_inline uint64_t rte_read64_relaxed(const volatile void *addr) { uint64_t val; @@ -92,7 +92,7 @@ rte_read64_relaxed(const volatile void *addr) return val; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write8_relaxed(uint8_t val, volatile void *addr) { asm volatile( @@ -101,7 +101,7 @@ rte_write8_relaxed(uint8_t val, volatile void *addr) : [val] "r" (val), [addr] "r" (addr)); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write16_relaxed(uint16_t val, volatile void *addr) { asm volatile( @@ -110,7 +110,7 @@ rte_write16_relaxed(uint16_t val, volatile void *addr) : [val] "r" (val), [addr] "r" (addr)); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write32_relaxed(uint32_t val, volatile void *addr) { asm volatile( @@ -119,7 +119,7 @@ rte_write32_relaxed(uint32_t val, volatile void *addr) : [val] "r" (val), [addr] "r" (addr)); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write64_relaxed(uint64_t val, volatile void *addr) { asm volatile( @@ -128,7 +128,7 @@ rte_write64_relaxed(uint64_t val, volatile void *addr) : [val] "r" (val), [addr] "r" (addr)); } -static inline uint8_t __attribute__((always_inline)) +static __rte_always_inline uint8_t rte_read8(const volatile void *addr) { uint8_t val; @@ -137,7 +137,7 @@ rte_read8(const volatile void *addr) return val; } -static inline uint16_t __attribute__((always_inline)) +static __rte_always_inline uint16_t rte_read16(const volatile void *addr) { uint16_t val; @@ -146,7 +146,7 @@ rte_read16(const volatile void *addr) return val; } -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t rte_read32(const volatile void *addr) { uint32_t val; @@ -155,7 +155,7 @@ rte_read32(const volatile void *addr) return val; } -static inline uint64_t __attribute__((always_inline)) +static __rte_always_inline uint64_t rte_read64(const volatile void *addr) { uint64_t val; @@ -164,28 +164,28 @@ rte_read64(const volatile void *addr) return val; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write8(uint8_t value, volatile void *addr) { rte_io_wmb(); rte_write8_relaxed(value, addr); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write16(uint16_t value, volatile void *addr) { rte_io_wmb(); rte_write16_relaxed(value, addr); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write32(uint32_t value, volatile void *addr) { rte_io_wmb(); rte_write32_relaxed(value, addr); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write64(uint64_t value, volatile void *addr) { rte_io_wmb(); diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h index 5db66b63..b80d8ba4 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause.h b/lib/librte_eal/common/include/arch/arm/rte_pause.h new file mode 100644 index 00000000..b772ca07 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_pause.h @@ -0,0 +1,50 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2017 Cavium, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PAUSE_ARM_H_ +#define _RTE_PAUSE_ARM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef RTE_ARCH_64 +#include <rte_pause_64.h> +#else +#include <rte_pause_32.h> +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PAUSE_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_32.h b/lib/librte_eal/common/include/arch/arm/rte_pause_32.h new file mode 100644 index 00000000..ec680b5c --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_pause_32.h @@ -0,0 +1,51 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2017 Cavium, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PAUSE_ARM32_H_ +#define _RTE_PAUSE_ARM32_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> +#include "generic/rte_pause.h" + +static inline void rte_pause(void) +{ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PAUSE_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h new file mode 100644 index 00000000..2da46326 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h @@ -0,0 +1,52 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2017 Cavium, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PAUSE_ARM64_H_ +#define _RTE_PAUSE_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> +#include "generic/rte_pause.h" + +static inline void rte_pause(void) +{ + asm volatile("yield" ::: "memory"); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PAUSE_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h index 0d077ea6..ff59509f 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright (C) Cavium networks Ltd. 2015. + * Copyright (C) Cavium, Inc. 2015. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index 4107c998..782350d1 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2015 Cavium Networks. All rights reserved. + * Copyright(c) 2015 Cavium, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium Networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -35,6 +35,7 @@ #include <stdint.h> #include "generic/rte_vect.h" +#include "rte_debug.h" #include "arm_neon.h" #ifdef __cplusplus @@ -76,8 +77,122 @@ vqtbl1q_u8(uint8x16_t a, uint8x16_t b) return vld1q_u8(rte_ret.u8); } + +static inline uint16_t +vaddvq_u16(uint16x8_t a) +{ + uint32x4_t m = vpaddlq_u16(a); + uint64x2_t n = vpaddlq_u32(m); + uint64x1_t o = vget_low_u64(n) + vget_high_u64(n); + + return vget_lane_u32((uint32x2_t)o, 0); +} + #endif +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000) +static inline uint32x4_t +vcopyq_laneq_u32(uint32x4_t a, const int lane_a, + uint32x4_t b, const int lane_b) +{ + return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a); +} +#endif + +#if defined(RTE_ARCH_ARM64) +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000) + +#if (GCC_VERSION < 40900) +typedef uint64_t poly64_t; +typedef uint64x2_t poly64x2_t; +typedef uint8_t poly128_t __attribute__((vector_size(16), aligned(16))); +#endif + +/* NEON intrinsic vreinterpretq_u64_p128() is supported since GCC version 7 */ +static inline uint64x2_t +vreinterpretq_u64_p128(poly128_t x) +{ + return (uint64x2_t)x; +} + +/* NEON intrinsic vreinterpretq_p64_u64() is supported since GCC version 7 */ +static inline poly64x2_t +vreinterpretq_p64_u64(uint64x2_t x) +{ + return (poly64x2_t)x; +} + +/* NEON intrinsic vgetq_lane_p64() is supported since GCC version 7 */ +static inline poly64_t +vgetq_lane_p64(poly64x2_t x, const int lane) +{ + RTE_ASSERT(lane >= 0 && lane <= 1); + + poly64_t *p = (poly64_t *)&x; + + return p[lane]; +} +#endif +#endif + +/* + * If (0 <= index <= 15), then call the ASIMD ext intruction on the + * 128 bit regs v0 and v1 with the appropriate index. + * + * Else returns a zero vector. + */ +static inline uint8x16_t +vextract(uint8x16_t v0, uint8x16_t v1, const int index) +{ + switch (index) { + case 0: return vextq_u8(v0, v1, 0); + case 1: return vextq_u8(v0, v1, 1); + case 2: return vextq_u8(v0, v1, 2); + case 3: return vextq_u8(v0, v1, 3); + case 4: return vextq_u8(v0, v1, 4); + case 5: return vextq_u8(v0, v1, 5); + case 6: return vextq_u8(v0, v1, 6); + case 7: return vextq_u8(v0, v1, 7); + case 8: return vextq_u8(v0, v1, 8); + case 9: return vextq_u8(v0, v1, 9); + case 10: return vextq_u8(v0, v1, 10); + case 11: return vextq_u8(v0, v1, 11); + case 12: return vextq_u8(v0, v1, 12); + case 13: return vextq_u8(v0, v1, 13); + case 14: return vextq_u8(v0, v1, 14); + case 15: return vextq_u8(v0, v1, 15); + } + return vdupq_n_u8(0); +} + +/** + * Shifts right 128 bit register by specified number of bytes + * + * Value of shift parameter must be in range 0 - 16 + */ +static inline uint64x2_t +vshift_bytes_right(uint64x2_t reg, const unsigned int shift) +{ + return vreinterpretq_u64_u8(vextract( + vreinterpretq_u8_u64(reg), + vdupq_n_u8(0), + shift)); +} + +/** + * Shifts left 128 bit register by specified number of bytes + * + * Value of shift parameter must be in range 0 - 16 + */ +static inline uint64x2_t +vshift_bytes_left(uint64x2_t reg, const unsigned int shift) +{ + return vreinterpretq_u64_u8(vextract( + vdupq_n_u8(0), + vreinterpretq_u8_u64(reg), + 16 - shift)); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_io.h b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h index be192da7..1f42ced5 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_io.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright(c) 2016 Cavium networks. All rights reserved. + * Copyright(c) 2016 Cavium, Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h new file mode 100644 index 00000000..389682ca --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h @@ -0,0 +1,51 @@ +/*- + * BSD LICENSE + * + * Copyright(c) Cavium, Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PAUSE_PPC64_H_ +#define _RTE_PAUSE_PPC64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_pause.h" + +static inline void rte_pause(void) +{ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PAUSE_PPC64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h index af139c9d..39815d9e 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h @@ -38,6 +38,7 @@ extern "C" { #endif #include <rte_common.h> +#include <rte_pause.h> #include "generic/rte_spinlock.h" /* Fixme: Use intrinsics to implement the spinlock on Power architecture */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h index 5eb6ce96..1bb3e1db 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_cycles.h +++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -2,6 +2,7 @@ * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2013 6WIND. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,36 +31,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* BSD LICENSE - * - * Copyright(c) 2013 6WIND. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #ifndef _RTE_CYCLES_X86_64_H_ #define _RTE_CYCLES_X86_64_H_ diff --git a/lib/librte_eal/common/include/arch/x86/rte_io.h b/lib/librte_eal/common/include/arch/x86/rte_io.h index c8d14043..130022d0 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_io.h +++ b/lib/librte_eal/common/include/arch/x86/rte_io.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright(c) 2016 Cavium networks. All rights reserved. + * Copyright(c) 2016 Cavium, Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index b9785e85..74c280c2 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -44,6 +44,7 @@ #include <stdint.h> #include <string.h> #include <rte_vect.h> +#include <rte_common.h> #ifdef __cplusplus extern "C" { @@ -64,8 +65,8 @@ extern "C" { * @return * Pointer to the destination data. */ -static inline void * -rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); +static __rte_always_inline void * +rte_memcpy(void *dst, const void *src, size_t n); #ifdef RTE_MACHINE_CPUFLAG_AVX512F diff --git a/lib/librte_eal/common/include/arch/x86/rte_pause.h b/lib/librte_eal/common/include/arch/x86/rte_pause.h new file mode 100644 index 00000000..29130c4b --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_pause.h @@ -0,0 +1,53 @@ +/*- + * BSD LICENSE + * + * Copyright(c) Cavium, Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PAUSE_X86_H_ +#define _RTE_PAUSE_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_pause.h" + +#include <emmintrin.h> +static inline void rte_pause(void) +{ + _mm_pause(); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PAUSE_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h index 8e630c21..5675c2b4 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -43,6 +43,7 @@ extern "C" { #include "rte_cpuflags.h" #include "rte_branch_prediction.h" #include "rte_common.h" +#include "rte_pause.h" #define RTE_RTM_MAX_RETRIES (10) #define RTE_XABORT_LOCK_BUSY (0xff) diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h index 1b4b85dd..03fc991e 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -45,21 +45,7 @@ #if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) -#ifdef __SSE__ -#include <xmmintrin.h> -#endif - -#ifdef __SSE2__ -#include <emmintrin.h> -#endif - -#ifdef __SSE3__ -#include <tmmintrin.h> -#endif - -#if defined(__SSE4_2__) || defined(__SSE4_1__) -#include <smmintrin.h> -#endif +#include <smmintrin.h> /* SSE4 */ #if defined(__AVX__) #include <immintrin.h> diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h index e00bccbc..e5e820d3 100644 --- a/lib/librte_eal/common/include/generic/rte_byteorder.h +++ b/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -74,6 +74,73 @@ #elif defined __LITTLE_ENDIAN__ #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN #endif +#if !defined(RTE_BYTE_ORDER) +#error Unknown endianness. +#endif + +#define RTE_STATIC_BSWAP16(v) \ + ((((uint16_t)(v) & UINT16_C(0x00ff)) << 8) | \ + (((uint16_t)(v) & UINT16_C(0xff00)) >> 8)) + +#define RTE_STATIC_BSWAP32(v) \ + ((((uint32_t)(v) & UINT32_C(0x000000ff)) << 24) | \ + (((uint32_t)(v) & UINT32_C(0x0000ff00)) << 8) | \ + (((uint32_t)(v) & UINT32_C(0x00ff0000)) >> 8) | \ + (((uint32_t)(v) & UINT32_C(0xff000000)) >> 24)) + +#define RTE_STATIC_BSWAP64(v) \ + ((((uint64_t)(v) & UINT64_C(0x00000000000000ff)) << 56) | \ + (((uint64_t)(v) & UINT64_C(0x000000000000ff00)) << 40) | \ + (((uint64_t)(v) & UINT64_C(0x0000000000ff0000)) << 24) | \ + (((uint64_t)(v) & UINT64_C(0x00000000ff000000)) << 8) | \ + (((uint64_t)(v) & UINT64_C(0x000000ff00000000)) >> 8) | \ + (((uint64_t)(v) & UINT64_C(0x0000ff0000000000)) >> 24) | \ + (((uint64_t)(v) & UINT64_C(0x00ff000000000000)) >> 40) | \ + (((uint64_t)(v) & UINT64_C(0xff00000000000000)) >> 56)) + +/* + * These macros are functionally similar to rte_cpu_to_(be|le)(16|32|64)(), + * they take values in host CPU order and return them converted to the + * intended endianness. + * + * They resolve at compilation time to integer constants which can safely be + * used with static initializers, since those cannot involve function calls. + * + * On the other hand, they are not as optimized as their rte_cpu_to_*() + * counterparts, therefore applications should refrain from using them on + * variable values, particularly inside performance-sensitive code. + */ +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN +#define RTE_BE16(v) (rte_be16_t)(v) +#define RTE_BE32(v) (rte_be32_t)(v) +#define RTE_BE64(v) (rte_be64_t)(v) +#define RTE_LE16(v) (rte_le16_t)(RTE_STATIC_BSWAP16(v)) +#define RTE_LE32(v) (rte_le32_t)(RTE_STATIC_BSWAP32(v)) +#define RTE_LE64(v) (rte_le64_t)(RTE_STATIC_BSWAP64(v)) +#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN +#define RTE_BE16(v) (rte_be16_t)(RTE_STATIC_BSWAP16(v)) +#define RTE_BE32(v) (rte_be32_t)(RTE_STATIC_BSWAP32(v)) +#define RTE_BE64(v) (rte_be64_t)(RTE_STATIC_BSWAP64(v)) +#define RTE_LE16(v) (rte_be16_t)(v) +#define RTE_LE32(v) (rte_be32_t)(v) +#define RTE_LE64(v) (rte_be64_t)(v) +#else +#error Unsupported endianness. +#endif + +/* + * The following types should be used when handling values according to a + * specific byte ordering, which may differ from that of the host CPU. + * + * Libraries, public APIs and applications are encouraged to use them for + * documentation purposes. + */ +typedef uint16_t rte_be16_t; /**< 16-bit big-endian value. */ +typedef uint32_t rte_be32_t; /**< 32-bit big-endian value. */ +typedef uint64_t rte_be64_t; /**< 64-bit big-endian value. */ +typedef uint16_t rte_le16_t; /**< 16-bit little-endian value. */ +typedef uint32_t rte_le32_t; /**< 32-bit little-endian value. */ +typedef uint64_t rte_le64_t; /**< 64-bit little-endian value. */ /* * An internal function to swap bytes in a 16-bit value. @@ -84,8 +151,7 @@ static inline uint16_t rte_constant_bswap16(uint16_t x) { - return (uint16_t)(((x & 0x00ffU) << 8) | - ((x & 0xff00U) >> 8)); + return RTE_STATIC_BSWAP16(x); } /* @@ -97,10 +163,7 @@ rte_constant_bswap16(uint16_t x) static inline uint32_t rte_constant_bswap32(uint32_t x) { - return ((x & 0x000000ffUL) << 24) | - ((x & 0x0000ff00UL) << 8) | - ((x & 0x00ff0000UL) >> 8) | - ((x & 0xff000000UL) >> 24); + return RTE_STATIC_BSWAP32(x); } /* @@ -112,14 +175,7 @@ rte_constant_bswap32(uint32_t x) static inline uint64_t rte_constant_bswap64(uint64_t x) { - return ((x & 0x00000000000000ffULL) << 56) | - ((x & 0x000000000000ff00ULL) << 40) | - ((x & 0x0000000000ff0000ULL) << 24) | - ((x & 0x00000000ff000000ULL) << 8) | - ((x & 0x000000ff00000000ULL) >> 8) | - ((x & 0x0000ff0000000000ULL) >> 24) | - ((x & 0x00ff000000000000ULL) >> 40) | - ((x & 0xff00000000000000ULL) >> 56); + return RTE_STATIC_BSWAP64(x); } @@ -143,65 +199,65 @@ static uint64_t rte_bswap64(uint64_t x); /** * Convert a 16-bit value from CPU order to little endian. */ -static uint16_t rte_cpu_to_le_16(uint16_t x); +static rte_le16_t rte_cpu_to_le_16(uint16_t x); /** * Convert a 32-bit value from CPU order to little endian. */ -static uint32_t rte_cpu_to_le_32(uint32_t x); +static rte_le32_t rte_cpu_to_le_32(uint32_t x); /** * Convert a 64-bit value from CPU order to little endian. */ -static uint64_t rte_cpu_to_le_64(uint64_t x); +static rte_le64_t rte_cpu_to_le_64(uint64_t x); /** * Convert a 16-bit value from CPU order to big endian. */ -static uint16_t rte_cpu_to_be_16(uint16_t x); +static rte_be16_t rte_cpu_to_be_16(uint16_t x); /** * Convert a 32-bit value from CPU order to big endian. */ -static uint32_t rte_cpu_to_be_32(uint32_t x); +static rte_be32_t rte_cpu_to_be_32(uint32_t x); /** * Convert a 64-bit value from CPU order to big endian. */ -static uint64_t rte_cpu_to_be_64(uint64_t x); +static rte_be64_t rte_cpu_to_be_64(uint64_t x); /** * Convert a 16-bit value from little endian to CPU order. */ -static uint16_t rte_le_to_cpu_16(uint16_t x); +static uint16_t rte_le_to_cpu_16(rte_le16_t x); /** * Convert a 32-bit value from little endian to CPU order. */ -static uint32_t rte_le_to_cpu_32(uint32_t x); +static uint32_t rte_le_to_cpu_32(rte_le32_t x); /** * Convert a 64-bit value from little endian to CPU order. */ -static uint64_t rte_le_to_cpu_64(uint64_t x); +static uint64_t rte_le_to_cpu_64(rte_le64_t x); /** * Convert a 16-bit value from big endian to CPU order. */ -static uint16_t rte_be_to_cpu_16(uint16_t x); +static uint16_t rte_be_to_cpu_16(rte_be16_t x); /** * Convert a 32-bit value from big endian to CPU order. */ -static uint32_t rte_be_to_cpu_32(uint32_t x); +static uint32_t rte_be_to_cpu_32(rte_be32_t x); /** * Convert a 64-bit value from big endian to CPU order. */ -static uint64_t rte_be_to_cpu_64(uint64_t x); +static uint64_t rte_be_to_cpu_64(rte_be64_t x); #endif /* __DOXYGEN__ */ diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h index 0e645c2c..0df90474 100644 --- a/lib/librte_eal/common/include/generic/rte_cycles.h +++ b/lib/librte_eal/common/include/generic/rte_cycles.h @@ -2,6 +2,7 @@ * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2013 6WIND. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,36 +31,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* BSD LICENSE - * - * Copyright(c) 2013 6WIND. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #ifndef _RTE_CYCLES_H_ #define _RTE_CYCLES_H_ diff --git a/lib/librte_eal/common/include/generic/rte_io.h b/lib/librte_eal/common/include/generic/rte_io.h index d82ee695..0b88c341 100644 --- a/lib/librte_eal/common/include/generic/rte_io.h +++ b/lib/librte_eal/common/include/generic/rte_io.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright(c) 2016 Cavium networks. All rights reserved. + * Copyright(c) 2016 Cavium, Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -34,8 +34,6 @@ #ifndef _RTE_IO_H_ #define _RTE_IO_H_ -#include <rte_atomic.h> - /** * @file * I/O device memory operations @@ -264,55 +262,55 @@ rte_write64(uint64_t value, volatile void *addr); #ifndef RTE_OVERRIDE_IO_H -static inline uint8_t __attribute__((always_inline)) +static __rte_always_inline uint8_t rte_read8_relaxed(const volatile void *addr) { return *(const volatile uint8_t *)addr; } -static inline uint16_t __attribute__((always_inline)) +static __rte_always_inline uint16_t rte_read16_relaxed(const volatile void *addr) { return *(const volatile uint16_t *)addr; } -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t rte_read32_relaxed(const volatile void *addr) { return *(const volatile uint32_t *)addr; } -static inline uint64_t __attribute__((always_inline)) +static __rte_always_inline uint64_t rte_read64_relaxed(const volatile void *addr) { return *(const volatile uint64_t *)addr; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write8_relaxed(uint8_t value, volatile void *addr) { *(volatile uint8_t *)addr = value; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write16_relaxed(uint16_t value, volatile void *addr) { *(volatile uint16_t *)addr = value; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write32_relaxed(uint32_t value, volatile void *addr) { *(volatile uint32_t *)addr = value; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write64_relaxed(uint64_t value, volatile void *addr) { *(volatile uint64_t *)addr = value; } -static inline uint8_t __attribute__((always_inline)) +static __rte_always_inline uint8_t rte_read8(const volatile void *addr) { uint8_t val; @@ -321,7 +319,7 @@ rte_read8(const volatile void *addr) return val; } -static inline uint16_t __attribute__((always_inline)) +static __rte_always_inline uint16_t rte_read16(const volatile void *addr) { uint16_t val; @@ -330,7 +328,7 @@ rte_read16(const volatile void *addr) return val; } -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t rte_read32(const volatile void *addr) { uint32_t val; @@ -339,7 +337,7 @@ rte_read32(const volatile void *addr) return val; } -static inline uint64_t __attribute__((always_inline)) +static __rte_always_inline uint64_t rte_read64(const volatile void *addr) { uint64_t val; @@ -348,28 +346,28 @@ rte_read64(const volatile void *addr) return val; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write8(uint8_t value, volatile void *addr) { rte_io_wmb(); rte_write8_relaxed(value, addr); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write16(uint16_t value, volatile void *addr) { rte_io_wmb(); rte_write16_relaxed(value, addr); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write32(uint32_t value, volatile void *addr) { rte_io_wmb(); rte_write32_relaxed(value, addr); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_write64(uint64_t value, volatile void *addr) { rte_io_wmb(); diff --git a/lib/librte_eal/common/include/generic/rte_pause.h b/lib/librte_eal/common/include/generic/rte_pause.h new file mode 100644 index 00000000..a8374321 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_pause.h @@ -0,0 +1,52 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Cavium, Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PAUSE_H_ +#define _RTE_PAUSE_H_ + +/** + * @file + * + * CPU pause operation. + * + */ + +/** + * Pause CPU execution for a short while + * + * This call is intended for tight loops which poll a shared resource or wait + * for an event. A short pause within the loop may reduce the power consumption. + */ +static inline void rte_pause(void); + +#endif /* _RTE_PAUSE_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h index 7a0fdc55..fdb3113d 100644 --- a/lib/librte_eal/common/include/generic/rte_rwlock.h +++ b/lib/librte_eal/common/include/generic/rte_rwlock.h @@ -52,6 +52,7 @@ extern "C" { #include <rte_common.h> #include <rte_atomic.h> +#include <rte_pause.h> /** * The rte_rwlock_t type. diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h index e51fc56b..54f83a4c 100644 --- a/lib/librte_eal/common/include/generic/rte_spinlock.h +++ b/lib/librte_eal/common/include/generic/rte_spinlock.h @@ -51,6 +51,7 @@ #ifdef RTE_FORCE_INTRINSICS #include <rte_common.h> #endif +#include <rte_pause.h> /** * The rte_spinlock_t type. diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h index 4012cd67..c275be18 100644 --- a/lib/librte_eal/common/include/rte_alarm.h +++ b/lib/librte_eal/common/include/rte_alarm.h @@ -91,7 +91,7 @@ int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg); * the number of canceled alarm callback functions * - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one * alarm could not be canceled because cancellation was requested from alarm - * callback context. Returned value is the number of succesfuly canceled + * callback context. Returned value is the number of successfully canceled * alarm callbacks * - 0 and rte_errno set to ENOENT - no alarm found * - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback) diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index 7c369692..c79368d3 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -1,8 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2016 NXP - * All rights reserved. + * Copyright 2016 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -58,7 +57,7 @@ TAILQ_HEAD(rte_bus_list, rte_bus); /** * Bus specific scan for devices attached on the bus. - * For each bus object, the scan would be reponsible for finding devices and + * For each bus object, the scan would be responsible for finding devices and * adding them to its private device list. * * A bus should mandatorily implement this method. @@ -82,6 +81,94 @@ typedef int (*rte_bus_scan_t)(void); typedef int (*rte_bus_probe_t)(void); /** + * Device iterator to find a device on a bus. + * + * This function returns an rte_device if one of those held by the bus + * matches the data passed as parameter. + * + * If the comparison function returns zero this function should stop iterating + * over any more devices. To continue a search the device of a previous search + * can be passed via the start parameter. + * + * @param cmp + * Comparison function. + * + * @param data + * Data to compare each device against. + * + * @param start + * starting point for the iteration + * + * @return + * The first device matching the data, NULL if none exists. + */ +typedef struct rte_device * +(*rte_bus_find_device_t)(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data); + +/** + * Implementation specific probe function which is responsible for linking + * devices on that bus with applicable drivers. + * + * @param dev + * Device pointer that was returned by a previous call to find_device. + * + * @return + * 0 on success. + * !0 on error. + */ +typedef int (*rte_bus_plug_t)(struct rte_device *dev); + +/** + * Implementation specific remove function which is responsible for unlinking + * devices on that bus from assigned driver. + * + * @param dev + * Device pointer that was returned by a previous call to find_device. + * + * @return + * 0 on success. + * !0 on error. + */ +typedef int (*rte_bus_unplug_t)(struct rte_device *dev); + +/** + * Bus specific parsing function. + * Validates the syntax used in the textual representation of a device, + * If the syntax is valid and ``addr`` is not NULL, writes the bus-specific + * device representation to ``addr``. + * + * @param[in] name + * device textual description + * + * @param[out] addr + * device information location address, into which parsed info + * should be written. If NULL, nothing should be written, which + * is not an error. + * + * @return + * 0 if parsing was successful. + * !0 for any error. + */ +typedef int (*rte_bus_parse_t)(const char *name, void *addr); + +/** + * Bus scan policies + */ +enum rte_bus_scan_mode { + RTE_BUS_SCAN_UNDEFINED, + RTE_BUS_SCAN_WHITELIST, + RTE_BUS_SCAN_BLACKLIST, +}; + +/** + * A structure used to configure bus operations. + */ +struct rte_bus_conf { + enum rte_bus_scan_mode scan_mode; /**< Scan policy. */ +}; + +/** * A structure describing a generic bus. */ struct rte_bus { @@ -89,6 +176,11 @@ struct rte_bus { const char *name; /**< Name of the bus */ rte_bus_scan_t scan; /**< Scan for devices attached to bus */ rte_bus_probe_t probe; /**< Probe devices on bus */ + rte_bus_find_device_t find_device; /**< Find a device on the bus */ + rte_bus_plug_t plug; /**< Probe single device for drivers */ + rte_bus_unplug_t unplug; /**< Remove single device from driver */ + rte_bus_parse_t parse; /**< Parse a device name */ + struct rte_bus_conf conf; /**< Bus configuration */ }; /** @@ -133,19 +225,68 @@ int rte_bus_probe(void); * * @param f * A valid and open output stream handle + */ +void rte_bus_dump(FILE *f); + +/** + * Bus comparison function. + * + * @param bus + * Bus under test. + * + * @param data + * Data to compare against. * * @return - * 0 in case of success - * !0 in case there is error in opening the output stream + * 0 if the bus matches the data. + * !0 if the bus does not match. + * <0 if ordering is possible and the bus is lower than the data. + * >0 if ordering is possible and the bus is greater than the data. */ -void rte_bus_dump(FILE *f); +typedef int (*rte_bus_cmp_t)(const struct rte_bus *bus, const void *data); + +/** + * Bus iterator to find a particular bus. + * + * This function compares each registered bus to find one that matches + * the data passed as parameter. + * + * If the comparison function returns zero this function will stop iterating + * over any more buses. To continue a search the bus of a previous search can + * be passed via the start parameter. + * + * @param start + * Starting point for the iteration. + * + * @param cmp + * Comparison function. + * + * @param data + * Data to pass to comparison function. + * + * @return + * A pointer to a rte_bus structure or NULL in case no bus matches + */ +struct rte_bus *rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp, + const void *data); + +/** + * Find the registered bus for a particular device. + */ +struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev); + +/** + * Find the registered bus for a given name. + */ +struct rte_bus *rte_bus_find_by_name(const char *busname); /** * Helper for Bus registration. * The constructor has higher priority than PMD constructors. */ #define RTE_REGISTER_BUS(nm, bus) \ -static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \ +RTE_INIT_PRIO(businitfn_ ##nm, 101); \ +static void businitfn_ ##nm(void) \ {\ (bus).name = RTE_STR(nm);\ rte_bus_register(&bus); \ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index e057f6e2..1afc66e3 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -66,6 +66,12 @@ extern "C" { #define RTE_STD_C11 #endif +/** Define GCC_VERSION **/ +#ifdef RTE_TOOLCHAIN_GCC +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + \ + __GNUC_PATCHLEVEL__) +#endif + #ifdef RTE_ARCH_STRICT_ALIGN typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); @@ -102,6 +108,16 @@ typedef uint16_t unaligned_uint16_t; */ #define RTE_SET_USED(x) (void)(x) +/** + * Force a function to be inlined + */ +#define __rte_always_inline inline __attribute__((always_inline)) + +/** + * Force a function to be noinlined + */ +#define __rte_noinline __attribute__((noinline)) + /*********** Macros for pointer arithmetic ********/ /** @@ -294,21 +310,6 @@ rte_align64pow2(uint64_t v) /*********** Other general functions / macros ********/ -#ifdef __SSE2__ -#include <emmintrin.h> -/** - * PAUSE instruction for tight loops (avoid busy waiting) - */ -static inline void -rte_pause (void) -{ - _mm_pause(); -} -#else -static inline void -rte_pause(void) {} -#endif - /** * Searches the input parameter for the least significant set bit * (starting from zero). @@ -326,6 +327,23 @@ rte_bsf32(uint32_t v) return __builtin_ctz(v); } +/** + * Return the rounded-up log2 of a integer. + * + * @param v + * The input parameter. + * @return + * The rounded-up log2 of the input, or 0 if the input is 0. + */ +static inline uint32_t +rte_log2_u32(uint32_t v) +{ + if (v == 0) + return 0; + v = rte_align32pow2(v); + return rte_bsf32(v); +} + #ifndef offsetof /** Return the offset of a field in a structure. */ #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index de20c063..5386d3a2 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -115,6 +115,26 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) } while (0) /** + * Device driver. + */ +enum rte_kernel_driver { + RTE_KDRV_UNKNOWN = 0, + RTE_KDRV_IGB_UIO, + RTE_KDRV_VFIO, + RTE_KDRV_UIO_GENERIC, + RTE_KDRV_NIC_UIO, + RTE_KDRV_NONE, +}; + +/** + * Device policies. + */ +enum rte_dev_policy { + RTE_DEV_WHITELISTED, + RTE_DEV_BLACKLISTED, +}; + +/** * A generic memory resource representation. */ struct rte_mem_resource { @@ -132,6 +152,8 @@ struct rte_driver { const char *alias; /**< Driver alias. */ }; +#define RTE_DEV_NAME_MAX_LEN (32) + /** * A structure describing a generic device. */ @@ -183,13 +205,67 @@ int rte_eal_dev_attach(const char *name, const char *devargs); /** * Detach a device from its driver. * - * @param name - * Same description as for rte_eal_dev_attach(). - * Here, eal will call the driver detaching function. + * @param dev + * A pointer to a rte_device structure. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_detach(struct rte_device *dev); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Hotplug add a given device to a specific bus. + * + * @param busname + * The bus name the device is added to. + * @param devname + * The device name. Based on this device name, eal will identify a driver + * capable of handling it and pass it to the driver probing function. + * @param devargs + * Device arguments to be passed to the driver. + * @return + * 0 on success, negative on error. + */ +int rte_eal_hotplug_add(const char *busname, const char *devname, + const char *devargs); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Hotplug remove a given device from a specific bus. + * + * @param busname + * The bus name the device is removed from. + * @param devname + * The device name being removed. * @return * 0 on success, negative on error. */ -int rte_eal_dev_detach(const char *name); +int rte_eal_hotplug_remove(const char *busname, const char *devname); + +/** + * Device comparison function. + * + * This type of function is used to compare an rte_device with arbitrary + * data. + * + * @param dev + * Device handle. + * + * @param data + * Data to compare against. The type of this parameter is determined by + * the kind of comparison performed by the function. + * + * @return + * 0 if the device matches the data. + * !0 if the device does not match. + * <0 if ordering is possible and the device is lower than the data. + * >0 if ordering is possible and the device is greater than the data. + */ +typedef int (*rte_dev_cmp_t)(const struct rte_device *dev, const void *data); #define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[] diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h index 88120a1c..58d585df 100644 --- a/lib/librte_eal/common/include/rte_devargs.h +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -50,7 +50,7 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> -#include <rte_pci.h> +#include <rte_bus.h> /** * Type of generic device @@ -76,19 +76,12 @@ struct rte_devargs { TAILQ_ENTRY(rte_devargs) next; /** Type of device. */ enum rte_devtype type; - RTE_STD_C11 - union { - /** Used if type is RTE_DEVTYPE_*_PCI. */ - struct { - /** PCI location. */ - struct rte_pci_addr addr; - } pci; - /** Used if type is RTE_DEVTYPE_VIRTUAL. */ - struct { - /** Driver name. */ - char drv_name[32]; - } virt; - }; + /** Device policy. */ + enum rte_dev_policy policy; + /** Bus handle for the device. */ + struct rte_bus *bus; + /** Name of the device. */ + char name[RTE_DEV_NAME_MAX_LEN]; /** Arguments string as given by user or "" for no argument. */ char *args; }; @@ -128,6 +121,39 @@ int rte_eal_parse_devargs_str(const char *devargs_str, char **drvname, char **drvargs); /** + * Parse a device string. + * + * Verify that a bus is capable of handling the device passed + * in argument. Store which bus will handle the device, its name + * and the eventual device parameters. + * + * @param dev + * The device declaration string. + * @param da + * The devargs structure holding the device information. + * + * @return + * - 0 on success. + * - Negative errno on error. + */ +int +rte_eal_devargs_parse(const char *dev, + struct rte_devargs *da); + +/** + * Insert an rte_devargs in the global list. + * + * @param da + * The devargs structure to insert. + * + * @return + * - 0 on success + * - Negative on error. + */ +int +rte_eal_devargs_insert(struct rte_devargs *da); + +/** * Add a device to the user device list * * For PCI devices, the format of arguments string is "PCI_ADDR" or @@ -152,6 +178,24 @@ int rte_eal_parse_devargs_str(const char *devargs_str, int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str); /** + * Remove a device from the user device list. + * Its resources are freed. + * If the devargs cannot be found, nothing happens. + * + * @param busname + * bus name of the devargs to remove. + * + * @param devname + * device name of the devargs to remove. + * + * @return + * 0 on success. + * <0 on error. + * >0 if the devargs was not within the user device list. + */ +int rte_eal_devargs_remove(const char *busname, const char *devname); + +/** * Count the number of user devices of a specified type * * @param devtype diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index abf020bf..0e7363d7 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -61,6 +61,7 @@ extern "C" { enum rte_lcore_role_t { ROLE_RTE, ROLE_OFF, + ROLE_SERVICE, }; /** @@ -80,6 +81,7 @@ enum rte_proc_type_t { struct rte_config { uint32_t master_lcore; /**< Id of the master lcore */ uint32_t lcore_count; /**< Number of available logical cores. */ + uint32_t service_lcore_count;/**< Number of available service cores. */ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ /** Primary or secondary configuration */ @@ -185,6 +187,8 @@ int rte_eal_iopl_init(void); * * EPROTO indicates that the PCI bus is either not present, or is not * readable by the eal. + * + * ENOEXEC indicates that a service core failed to launch successfully. */ int rte_eal_init(int argc, char **argv); @@ -286,6 +290,9 @@ static inline int rte_gettid(void) #define RTE_INIT(func) \ static void __attribute__((constructor, used)) func(void) +#define RTE_INIT_PRIO(func, prio) \ +static void __attribute__((constructor(prio), used)) func(void) + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index 2b5e0b17..b9eee702 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -39,6 +39,7 @@ #include <rte_memzone.h> #include <rte_malloc_heap.h> #include <rte_rwlock.h> +#include <rte_pause.h> #ifdef __cplusplus extern "C" { diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index fe7b5865..50e0d0fe 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -73,6 +73,7 @@ struct lcore_config { unsigned core_id; /**< core number on socket for this lcore */ int core_index; /**< relative index, starting from 0 */ rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */ + uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */ }; /** @@ -175,7 +176,7 @@ rte_lcore_is_enabled(unsigned lcore_id) struct rte_config *cfg = rte_eal_get_configuration(); if (lcore_id >= RTE_MAX_LCORE) return 0; - return cfg->lcore_role[lcore_id] != ROLE_OFF; + return cfg->lcore_role[lcore_id] == ROLE_RTE; } /** diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index 34191385..ec8dba79 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -175,6 +175,16 @@ __rte_deprecated uint32_t rte_get_log_type(void); /** + * Get the log level for a given type. + * + * @param logtype + * The log type identifier. + * @return + * 0 on success, a negative value if logtype is invalid. + */ +int rte_log_get_level(uint32_t logtype); + +/** * Set the log level for a given type. * * @param pattern diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index 008ce134..3d37f79b 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -327,9 +327,9 @@ rte_malloc_set_limit(const char *type, size_t max); * rte_malloc * * @param addr - * Adress obtained from a previous rte_malloc call + * Address obtained from a previous rte_malloc call * @return - * NULL on error + * RTE_BAD_PHYS_ADDR on error * otherwise return physical address of the buffer */ phys_addr_t diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index ab64c63c..8b123391 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -2,6 +2,7 @@ * BSD LICENSE * * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright 2013-2014 6WIND S.A. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,36 +31,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* BSD LICENSE - * - * Copyright 2013-2014 6WIND S.A. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #ifndef _RTE_PCI_H_ #define _RTE_PCI_H_ @@ -92,7 +63,7 @@ const char *pci_get_sysfs_path(void); /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 -#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X") +#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X") /** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ #define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 @@ -106,9 +77,6 @@ const char *pci_get_sysfs_path(void); /** Maximum number of PCI resources. */ #define PCI_MAX_RESOURCE 6 -/** Name of PCI Bus */ -#define PCI_BUS_NAME "PCI" - /* Forward declarations */ struct rte_pci_device; struct rte_pci_driver; @@ -141,7 +109,7 @@ struct rte_pci_id { * A structure describing the location of a PCI device. */ struct rte_pci_addr { - uint16_t domain; /**< Device domain */ + uint32_t domain; /**< Device domain */ uint8_t bus; /**< Device bus */ uint8_t devid; /**< Device ID */ uint8_t function; /**< Device function. */ @@ -149,15 +117,6 @@ struct rte_pci_addr { struct rte_devargs; -enum rte_kernel_driver { - RTE_KDRV_UNKNOWN = 0, - RTE_KDRV_IGB_UIO, - RTE_KDRV_VFIO, - RTE_KDRV_UIO_GENERIC, - RTE_KDRV_NIC_UIO, - RTE_KDRV_NONE, -}; - /** * A structure describing a PCI device. */ @@ -241,6 +200,8 @@ struct rte_pci_bus { #define RTE_PCI_DRV_INTR_LSC 0x0008 /** Device driver supports device removal interrupt */ #define RTE_PCI_DRV_INTR_RMV 0x0010 +/** Device driver needs to keep mapped resources if unsupported dev detected */ +#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 /** * A structure describing a PCI mapping. @@ -373,10 +334,10 @@ rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, if ((addr == NULL) || (addr2 == NULL)) return -1; - dev_addr = (addr->domain << 24) | (addr->bus << 16) | - (addr->devid << 8) | addr->function; - dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) | - (addr2->devid << 8) | addr2->function; + dev_addr = ((uint64_t)addr->domain << 24) | + (addr->bus << 16) | (addr->devid << 8) | addr->function; + dev_addr2 = ((uint64_t)addr2->domain << 24) | + (addr2->bus << 16) | (addr2->devid << 8) | addr2->function; if (dev_addr > dev_addr2) return 1; diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h new file mode 100644 index 00000000..7c6f7383 --- /dev/null +++ b/lib/librte_eal/common/include/rte_service.h @@ -0,0 +1,387 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SERVICE_H_ +#define _RTE_SERVICE_H_ + +/** + * @file + * + * Service functions + * + * The service functionality provided by this header allows a DPDK component + * to indicate that it requires a function call in order for it to perform + * its processing. + * + * An example usage of this functionality would be a component that registers + * a service to perform a particular packet processing duty: for example the + * eventdev software PMD. At startup the application requests all services + * that have been registered, and the cores in the service-coremask run the + * required services. The EAL removes these number of cores from the available + * runtime cores, and dedicates them to performing service-core workloads. The + * application has access to the remaining lcores as normal. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include<stdio.h> +#include <stdint.h> +#include <sys/queue.h> + +#include <rte_lcore.h> + +/* forward declaration only. Definition in rte_service_private.h */ +struct rte_service_spec; + +#define RTE_SERVICE_NAME_MAX 32 + +/* Capabilities of a service. + * + * Use the *rte_service_probe_capability* function to check if a service is + * capable of a specific capability. + */ +/** When set, the service is capable of having multiple threads run it at the + * same time. + */ +#define RTE_SERVICE_CAP_MT_SAFE (1 << 0) + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Return the number of services registered. + * + * The number of services registered can be passed to *rte_service_get_by_id*, + * enabling the application to retrieve the specification of each service. + * + * @return The number of services registered. + */ +uint32_t rte_service_get_count(void); + + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Return the specification of a service by integer id. + * + * This function provides the specification of a service. This can be used by + * the application to understand what the service represents. The service + * must not be modified by the application directly, only passed to the various + * rte_service_* functions. + * + * @param id The integer id of the service to retrieve + * @retval non-zero A valid pointer to the service_spec + * @retval NULL Invalid *id* provided. + */ +struct rte_service_spec *rte_service_get_by_id(uint32_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Return the specification of a service by name. + * + * This function provides the specification of a service using the service name + * as lookup key. This can be used by the application to understand what the + * service represents. The service must not be modified by the application + * directly, only passed to the various rte_service_* functions. + * + * @param name The name of the service to retrieve + * @retval non-zero A valid pointer to the service_spec + * @retval NULL Invalid *name* provided. + */ +struct rte_service_spec *rte_service_get_by_name(const char *name); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Return the name of the service. + * + * @return A pointer to the name of the service. The returned pointer remains + * in ownership of the service, and the application must not free it. + */ +const char *rte_service_get_name(const struct rte_service_spec *service); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Check if a service has a specific capability. + * + * This function returns if *service* has implements *capability*. + * See RTE_SERVICE_CAP_* defines for a list of valid capabilities. + * @retval 1 Capability supported by this service instance + * @retval 0 Capability not supported by this service instance + */ +int32_t rte_service_probe_capability(const struct rte_service_spec *service, + uint32_t capability); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Enable a core to run a service. + * + * Each core can be added or removed from running specific services. This + * functions adds *lcore* to the set of cores that will run *service*. + * + * If multiple cores are enabled on a service, an atomic is used to ensure that + * only one cores runs the service at a time. The exception to this is when + * a service indicates that it is multi-thread safe by setting the capability + * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set, + * the service function can be run on multiple threads at the same time. + * + * @retval 0 lcore added successfully + * @retval -EINVAL An invalid service or lcore was provided. + */ +int32_t rte_service_enable_on_lcore(struct rte_service_spec *service, + uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Disable a core to run a service. + * + * Each core can be added or removed from running specific services. This + * functions removes *lcore* to the set of cores that will run *service*. + * + * @retval 0 Lcore removed successfully + * @retval -EINVAL An invalid service or lcore was provided. + */ +int32_t rte_service_disable_on_lcore(struct rte_service_spec *service, + uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Return if an lcore is enabled for the service. + * + * This function allows the application to query if *lcore* is currently set to + * run *service*. + * + * @retval 1 Lcore enabled on this lcore + * @retval 0 Lcore disabled on this lcore + * @retval -EINVAL An invalid service or lcore was provided. + */ +int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, + uint32_t lcore); + + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Enable *service* to run. + * + * This function switches on a service during runtime. + * @retval 0 The service was successfully started + */ +int32_t rte_service_start(struct rte_service_spec *service); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Disable *service*. + * + * Switch off a service, so it is not run until it is *rte_service_start* is + * called on it. + * @retval 0 Service successfully switched off + */ +int32_t rte_service_stop(struct rte_service_spec *service); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Returns if *service* is currently running. + * + * This function returns true if the service has been started using + * *rte_service_start*, AND a service core is mapped to the service. This + * function can be used to ensure that the service will be run. + * + * @retval 1 Service is currently running, and has a service lcore mapped + * @retval 0 Service is currently stopped, or no service lcore is mapped + * @retval -EINVAL Invalid service pointer provided + */ +int32_t rte_service_is_running(const struct rte_service_spec *service); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Start a service core. + * + * Starting a core makes the core begin polling. Any services assigned to it + * will be run as fast as possible. + * + * @retval 0 Success + * @retval -EINVAL Failed to start core. The *lcore_id* passed in is not + * currently assigned to be a service core. + */ +int32_t rte_service_lcore_start(uint32_t lcore_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Stop a service core. + * + * Stopping a core makes the core become idle, but remains assigned as a + * service core. + * + * @retval 0 Success + * @retval -EINVAL Invalid *lcore_id* provided + * @retval -EALREADY Already stopped core + * @retval -EBUSY Failed to stop core, as it would cause a service to not + * be run, as this is the only core currently running the service. + * The application must stop the service first, and then stop the + * lcore. + */ +int32_t rte_service_lcore_stop(uint32_t lcore_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Adds lcore to the list of service cores. + * + * This functions can be used at runtime in order to modify the service core + * mask. + * + * @retval 0 Success + * @retval -EBUSY lcore is busy, and not available for service core duty + * @retval -EALREADY lcore is already added to the service core list + * @retval -EINVAL Invalid lcore provided + */ +int32_t rte_service_lcore_add(uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Removes lcore from the list of service cores. + * + * This can fail if the core is not stopped, see *rte_service_core_stop*. + * + * @retval 0 Success + * @retval -EBUSY Lcore is not stopped, stop service core before removing. + * @retval -EINVAL failed to add lcore to service core mask. + */ +int32_t rte_service_lcore_del(uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the number of service cores currently available. + * + * This function returns the integer count of service cores available. The + * service core count can be used in mapping logic when creating mappings + * from service cores to services. + * + * See *rte_service_lcore_list* for details on retrieving the lcore_id of each + * service core. + * + * @return The number of service cores currently configured. + */ +int32_t rte_service_lcore_count(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Resets all service core mappings. This does not remove the service cores + * from duty, just unmaps all services / cores, and stops() the service cores. + * The runstate of services is not modified. + * + * @retval 0 Success + */ +int32_t rte_service_lcore_reset_all(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Enable or disable statistics collection for *service*. + * + * This function enables per core, per-service cycle count collection. + * @param service The service to enable statistics gathering on. + * @param enable Zero to disable statistics, non-zero to enable. + * @retval 0 Success + * @retval -EINVAL Invalid service pointer passed + */ +int32_t rte_service_set_stats_enable(struct rte_service_spec *service, + int32_t enable); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the list of currently enabled service cores. + * + * This function fills in an application supplied array, with each element + * indicating the lcore_id of a service core. + * + * Adding and removing service cores can be performed using + * *rte_service_lcore_add* and *rte_service_lcore_del*. + * @param [out] array An array of at least *rte_service_lcore_count* items. + * If statically allocating the buffer, use RTE_MAX_LCORE. + * @param [out] n The size of *array*. + * @retval >=0 Number of service cores that have been populated in the array + * @retval -ENOMEM The provided array is not large enough to fill in the + * service core list. No items have been populated, call this function + * with a size of at least *rte_service_core_count* items. + */ +int32_t rte_service_lcore_list(uint32_t array[], uint32_t n); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Dumps any information available about the service. If service is NULL, + * dumps info for all services. + */ +int32_t rte_service_dump(FILE *f, struct rte_service_spec *service); + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_SERVICE_H_ */ diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h new file mode 100644 index 00000000..7a946a1e --- /dev/null +++ b/lib/librte_eal/common/include/rte_service_component.h @@ -0,0 +1,144 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SERVICE_PRIVATE_H_ +#define _RTE_SERVICE_PRIVATE_H_ + +/* This file specifies the internal service specification. + * Include this file if you are writing a component that requires CPU cycles to + * operate, and you wish to run the component using service cores + */ + +#include <rte_service.h> + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Signature of callback function to run a service. + */ +typedef int32_t (*rte_service_func)(void *args); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * The specification of a service. + * + * This struct contains metadata about the service itself, the callback + * function to run one iteration of the service, a userdata pointer, flags etc. + */ +struct rte_service_spec { + /** The name of the service. This should be used by the application to + * understand what purpose this service provides. + */ + char name[RTE_SERVICE_NAME_MAX]; + /** The callback to invoke to run one iteration of the service. */ + rte_service_func callback; + /** The userdata pointer provided to the service callback. */ + void *callback_userdata; + /** Flags to indicate the capabilities of this service. See defines in + * the public header file for values of RTE_SERVICE_CAP_* + */ + uint32_t capabilities; + /** NUMA socket ID that this service is affinitized to */ + int socket_id; +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Register a new service. + * + * A service represents a component that the requires CPU time periodically to + * achieve its purpose. + * + * For example the eventdev SW PMD requires CPU cycles to perform its + * scheduling. This can be achieved by registering it as a service, and the + * application can then assign CPU resources to it using + * *rte_service_set_coremask*. + * + * @param spec The specification of the service to register + * @retval 0 Successfully registered the service. + * -EINVAL Attempted to register an invalid service (eg, no callback + * set) + */ +int32_t rte_service_register(const struct rte_service_spec *spec); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Unregister a service. + * + * The service being removed must be stopped before calling this function. + * + * @retval 0 The service was successfully unregistered. + * @retval -EBUSY The service is currently running, stop the service before + * calling unregister. No action has been taken. + */ +int32_t rte_service_unregister(struct rte_service_spec *service); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Private function to allow EAL to initialized default mappings. + * + * This function iterates all the services, and maps then to the available + * cores. Based on the capabilities of the services, they are set to run on the + * available cores in a round-robin manner. + * + * @retval 0 Success + * @retval -ENOTSUP No service lcores in use + * @retval -EINVAL Error while iterating over services + * @retval -ENODEV Error in enabling service lcore on a service + * @retval -ENOEXEC Error when starting services + */ +int32_t rte_service_start_with_defaults(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Initialize the service library. + * + * In order to use the service library, it must be initialized. EAL initializes + * the library at startup. + * + * @retval 0 Success + * @retval -EALREADY Service library is already initialized + */ +int32_t rte_service_init(void); + +#endif /* _RTE_SERVICE_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h index 28c6274c..373c41ac 100644 --- a/lib/librte_eal/common/include/rte_time.h +++ b/lib/librte_eal/common/include/rte_time.h @@ -52,7 +52,7 @@ struct rte_timecounter { uint64_t nsec_mask; /** Sub-nanoseconds count. */ uint64_t nsec_frac; - /** Bitmask for two's complement substraction of non-64 bit counters. */ + /** Bitmask for two's complement subtraction of non-64 bit counters. */ uint64_t cc_mask; /** Cycle to nanosecond divisor (power of two). */ uint32_t cc_shift; diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h index e6b678ea..29f5a523 100644 --- a/lib/librte_eal/common/include/rte_vdev.h +++ b/lib/librte_eal/common/include/rte_vdev.h @@ -46,11 +46,18 @@ struct rte_vdev_device { struct rte_device device; /**< Inherit core device */ }; +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_vdev_device. + */ +#define RTE_DEV_TO_VDEV(ptr) \ + container_of(ptr, struct rte_vdev_device, device) + static inline const char * rte_vdev_device_name(const struct rte_vdev_device *dev) { - if (dev && dev->device.devargs) - return dev->device.devargs->virt.drv_name; + if (dev && dev->device.name) + return dev->device.name; return NULL; } diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index c36d8526..a69a7075 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -61,12 +61,12 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 5 +#define RTE_VER_MONTH 8 /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 1 +#define RTE_VER_MINOR 0 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 42568e1d..15076905 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -51,7 +51,7 @@ #define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE) /* - * initialise a general malloc_elem header structure + * Initialize a general malloc_elem header structure */ void malloc_elem_init(struct malloc_elem *elem, @@ -69,7 +69,7 @@ malloc_elem_init(struct malloc_elem *elem, } /* - * initialise a dummy malloc_elem header for the end-of-memseg marker + * Initialize a dummy malloc_elem header for the end-of-memseg marker */ void malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) @@ -228,7 +228,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, elem->pad = old_elem_size; /* put a dummy header in padding, to point to real element header */ - if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything + if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything * is cache-line aligned */ new_elem->pad = elem->pad; new_elem->state = ELEM_PAD; @@ -314,17 +314,16 @@ malloc_elem_free(struct malloc_elem *elem) int malloc_elem_resize(struct malloc_elem *elem, size_t size) { - const size_t new_size = size + MALLOC_ELEM_OVERHEAD; + const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD; /* if we request a smaller size, then always return ok */ - const size_t current_size = elem->size - elem->pad; - if (current_size >= new_size) + if (elem->size >= new_size) return 0; struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); rte_spinlock_lock(&elem->heap->lock); if (next ->state != ELEM_FREE) goto err_return; - if (current_size + next->size < new_size) + if (elem->size + next->size < new_size) goto err_return; /* we now know the element fits, so remove from free list, @@ -333,7 +332,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size) elem_free_list_remove(next); join_elem(elem, next); - if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ + if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) { /* now we have a big block together. Lets cut it down a bit, by splitting */ struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c index 9765d1bd..cdd69560 100644 --- a/lib/librte_eal/common/rte_keepalive.c +++ b/lib/librte_eal/common/rte_keepalive.c @@ -38,7 +38,6 @@ #include <rte_log.h> #include <rte_keepalive.h> #include <rte_malloc.h> -#include <rte_cycles.h> struct rte_keepalive { /** Core Liveness. */ diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index f4a88352..5c0627bf 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -253,6 +253,8 @@ rte_malloc_virt2phy(const void *addr) { const struct malloc_elem *elem = malloc_elem_from_data(addr); if (elem == NULL) - return 0; + return RTE_BAD_PHYS_ADDR; + if (elem->ms->phys_addr == RTE_BAD_PHYS_ADDR) + return RTE_BAD_PHYS_ADDR; return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); } diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c new file mode 100644 index 00000000..7efb76dc --- /dev/null +++ b/lib/librte_eal/common/rte_service.c @@ -0,0 +1,706 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <unistd.h> +#include <inttypes.h> +#include <limits.h> +#include <string.h> +#include <dirent.h> + +#include <rte_service.h> +#include "include/rte_service_component.h" + +#include <rte_eal.h> +#include <rte_lcore.h> +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_cycles.h> +#include <rte_atomic.h> +#include <rte_memory.h> +#include <rte_malloc.h> + +#define RTE_SERVICE_NUM_MAX 64 + +#define SERVICE_F_REGISTERED (1 << 0) +#define SERVICE_F_STATS_ENABLED (1 << 1) + +/* runstates for services and lcores, denoting if they are active or not */ +#define RUNSTATE_STOPPED 0 +#define RUNSTATE_RUNNING 1 + +/* internal representation of a service */ +struct rte_service_spec_impl { + /* public part of the struct */ + struct rte_service_spec spec; + + /* atomic lock that when set indicates a service core is currently + * running this service callback. When not set, a core may take the + * lock and then run the service callback. + */ + rte_atomic32_t execute_lock; + + /* API set/get-able variables */ + int32_t runstate; + uint8_t internal_flags; + + /* per service statistics */ + uint32_t num_mapped_cores; + uint64_t calls; + uint64_t cycles_spent; +} __rte_cache_aligned; + +/* the internal values of a service core */ +struct core_state { + /* map of services IDs are run on this core */ + uint64_t service_mask; + uint8_t runstate; /* running or stopped */ + uint8_t is_service_core; /* set if core is currently a service core */ + + /* extreme statistics */ + uint64_t calls_per_service[RTE_SERVICE_NUM_MAX]; +} __rte_cache_aligned; + +static uint32_t rte_service_count; +static struct rte_service_spec_impl *rte_services; +static struct core_state *lcore_states; +static uint32_t rte_service_library_initialized; + +int32_t rte_service_init(void) +{ + if (rte_service_library_initialized) { + printf("service library init() called, init flag %d\n", + rte_service_library_initialized); + return -EALREADY; + } + + rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX, + sizeof(struct rte_service_spec_impl), + RTE_CACHE_LINE_SIZE); + if (!rte_services) { + printf("error allocating rte services array\n"); + return -ENOMEM; + } + + lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE, + sizeof(struct core_state), RTE_CACHE_LINE_SIZE); + if (!lcore_states) { + printf("error allocating core states array\n"); + return -ENOMEM; + } + + int i; + int count = 0; + struct rte_config *cfg = rte_eal_get_configuration(); + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (lcore_config[i].core_role == ROLE_SERVICE) { + if ((unsigned int)i == cfg->master_lcore) + continue; + rte_service_lcore_add(i); + count++; + } + } + + rte_service_library_initialized = 1; + return 0; +} + +/* returns 1 if service is registered and has not been unregistered + * Returns 0 if service never registered, or has been unregistered + */ +static inline int +service_valid(uint32_t id) +{ + return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED); +} + +/* returns 1 if statistics should be colleced for service + * Returns 0 if statistics should not be collected for service + */ +static inline int +service_stats_enabled(struct rte_service_spec_impl *impl) +{ + return !!(impl->internal_flags & SERVICE_F_STATS_ENABLED); +} + +static inline int +service_mt_safe(struct rte_service_spec_impl *s) +{ + return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE; +} + +int32_t rte_service_set_stats_enable(struct rte_service_spec *service, + int32_t enabled) +{ + struct rte_service_spec_impl *impl = + (struct rte_service_spec_impl *)service; + if (!impl) + return -EINVAL; + + if (enabled) + impl->internal_flags |= SERVICE_F_STATS_ENABLED; + else + impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED); + + return 0; +} + +uint32_t +rte_service_get_count(void) +{ + return rte_service_count; +} + +struct rte_service_spec * +rte_service_get_by_id(uint32_t id) +{ + struct rte_service_spec *service = NULL; + if (id < rte_service_count) + service = (struct rte_service_spec *)&rte_services[id]; + + return service; +} + +struct rte_service_spec *rte_service_get_by_name(const char *name) +{ + struct rte_service_spec *service = NULL; + int i; + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (service_valid(i) && + strcmp(name, rte_services[i].spec.name) == 0) { + service = (struct rte_service_spec *)&rte_services[i]; + break; + } + } + + return service; +} + +const char * +rte_service_get_name(const struct rte_service_spec *service) +{ + return service->name; +} + +int32_t +rte_service_probe_capability(const struct rte_service_spec *service, + uint32_t capability) +{ + return service->capabilities & capability; +} + +int32_t +rte_service_is_running(const struct rte_service_spec *spec) +{ + const struct rte_service_spec_impl *impl = + (const struct rte_service_spec_impl *)spec; + if (!impl) + return -EINVAL; + + return (impl->runstate == RUNSTATE_RUNNING) && + (impl->num_mapped_cores > 0); +} + +int32_t +rte_service_register(const struct rte_service_spec *spec) +{ + uint32_t i; + int32_t free_slot = -1; + + if (spec->callback == NULL || strlen(spec->name) == 0) + return -EINVAL; + + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (!service_valid(i)) { + free_slot = i; + break; + } + } + + if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX)) + return -ENOSPC; + + struct rte_service_spec_impl *s = &rte_services[free_slot]; + s->spec = *spec; + s->internal_flags |= SERVICE_F_REGISTERED; + + rte_smp_wmb(); + rte_service_count++; + + return 0; +} + +int32_t +rte_service_unregister(struct rte_service_spec *spec) +{ + struct rte_service_spec_impl *s = NULL; + struct rte_service_spec_impl *spec_impl = + (struct rte_service_spec_impl *)spec; + + uint32_t i; + uint32_t service_id; + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (&rte_services[i] == spec_impl) { + s = spec_impl; + service_id = i; + break; + } + } + + if (!s) + return -EINVAL; + + rte_service_count--; + rte_smp_wmb(); + + s->internal_flags &= ~(SERVICE_F_REGISTERED); + + for (i = 0; i < RTE_MAX_LCORE; i++) + lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id); + + memset(&rte_services[service_id], 0, + sizeof(struct rte_service_spec_impl)); + + return 0; +} + +int32_t +rte_service_start(struct rte_service_spec *service) +{ + struct rte_service_spec_impl *s = + (struct rte_service_spec_impl *)service; + s->runstate = RUNSTATE_RUNNING; + rte_smp_wmb(); + return 0; +} + +int32_t +rte_service_stop(struct rte_service_spec *service) +{ + struct rte_service_spec_impl *s = + (struct rte_service_spec_impl *)service; + s->runstate = RUNSTATE_STOPPED; + rte_smp_wmb(); + return 0; +} + +static int32_t +rte_service_runner_func(void *arg) +{ + RTE_SET_USED(arg); + uint32_t i; + const int lcore = rte_lcore_id(); + struct core_state *cs = &lcore_states[lcore]; + + while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) { + const uint64_t service_mask = cs->service_mask; + for (i = 0; i < rte_service_count; i++) { + struct rte_service_spec_impl *s = &rte_services[i]; + if (s->runstate != RUNSTATE_RUNNING || + !(service_mask & (UINT64_C(1) << i))) + continue; + + /* check do we need cmpset, if MT safe or <= 1 core + * mapped, atomic ops are not required. + */ + const int need_cmpset = !((service_mt_safe(s) == 0) && + (s->num_mapped_cores > 1)); + uint32_t *lock = (uint32_t *)&s->execute_lock; + + if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) { + void *userdata = s->spec.callback_userdata; + + if (service_stats_enabled(s)) { + uint64_t start = rte_rdtsc(); + s->spec.callback(userdata); + uint64_t end = rte_rdtsc(); + s->cycles_spent += end - start; + cs->calls_per_service[i]++; + s->calls++; + } else + s->spec.callback(userdata); + + if (need_cmpset) + rte_atomic32_clear(&s->execute_lock); + } + } + + rte_smp_rmb(); + } + + lcore_config[lcore].state = WAIT; + + return 0; +} + +int32_t +rte_service_lcore_count(void) +{ + int32_t count = 0; + uint32_t i; + for (i = 0; i < RTE_MAX_LCORE; i++) + count += lcore_states[i].is_service_core; + return count; +} + +int32_t +rte_service_lcore_list(uint32_t array[], uint32_t n) +{ + uint32_t count = rte_service_lcore_count(); + if (count > n) + return -ENOMEM; + + if (!array) + return -EINVAL; + + uint32_t i; + uint32_t idx = 0; + for (i = 0; i < RTE_MAX_LCORE; i++) { + struct core_state *cs = &lcore_states[i]; + if (cs->is_service_core) { + array[idx] = i; + idx++; + } + } + + return count; +} + +int32_t +rte_service_start_with_defaults(void) +{ + /* create a default mapping from cores to services, then start the + * services to make them transparent to unaware applications. + */ + uint32_t i; + int ret; + uint32_t count = rte_service_get_count(); + + int32_t lcore_iter = 0; + uint32_t ids[RTE_MAX_LCORE]; + int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); + + if (lcore_count == 0) + return -ENOTSUP; + + for (i = 0; (int)i < lcore_count; i++) + rte_service_lcore_start(ids[i]); + + for (i = 0; i < count; i++) { + struct rte_service_spec *s = rte_service_get_by_id(i); + if (!s) + return -EINVAL; + + /* do 1:1 core mapping here, with each service getting + * assigned a single core by default. Adding multiple services + * should multiplex to a single core, or 1:1 if there are the + * same amount of services as service-cores + */ + ret = rte_service_enable_on_lcore(s, ids[lcore_iter]); + if (ret) + return -ENODEV; + + lcore_iter++; + if (lcore_iter >= lcore_count) + lcore_iter = 0; + + ret = rte_service_start(s); + if (ret) + return -ENOEXEC; + } + + return 0; +} + +static int32_t +service_update(struct rte_service_spec *service, uint32_t lcore, + uint32_t *set, uint32_t *enabled) +{ + uint32_t i; + int32_t sid = -1; + + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if ((struct rte_service_spec *)&rte_services[i] == service && + service_valid(i)) { + sid = i; + break; + } + } + + if (sid == -1 || lcore >= RTE_MAX_LCORE) + return -EINVAL; + + if (!lcore_states[lcore].is_service_core) + return -EINVAL; + + uint64_t sid_mask = UINT64_C(1) << sid; + if (set) { + if (*set) { + lcore_states[lcore].service_mask |= sid_mask; + rte_services[sid].num_mapped_cores++; + } else { + lcore_states[lcore].service_mask &= ~(sid_mask); + rte_services[sid].num_mapped_cores--; + } + } + + if (enabled) + *enabled = (lcore_states[lcore].service_mask & (sid_mask)); + + rte_smp_wmb(); + + return 0; +} + +int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, + uint32_t lcore) +{ + uint32_t enabled; + int ret = service_update(service, lcore, 0, &enabled); + if (ret == 0) + return enabled; + return -EINVAL; +} + +int32_t +rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore) +{ + uint32_t on = 1; + return service_update(service, lcore, &on, 0); +} + +int32_t +rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore) +{ + uint32_t off = 0; + return service_update(service, lcore, &off, 0); +} + +int32_t rte_service_lcore_reset_all(void) +{ + /* loop over cores, reset all to mask 0 */ + uint32_t i; + for (i = 0; i < RTE_MAX_LCORE; i++) { + lcore_states[i].service_mask = 0; + lcore_states[i].is_service_core = 0; + lcore_states[i].runstate = RUNSTATE_STOPPED; + } + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) + rte_services[i].num_mapped_cores = 0; + + rte_smp_wmb(); + + return 0; +} + +static void +set_lcore_state(uint32_t lcore, int32_t state) +{ + /* mark core state in hugepage backed config */ + struct rte_config *cfg = rte_eal_get_configuration(); + cfg->lcore_role[lcore] = state; + + /* mark state in process local lcore_config */ + lcore_config[lcore].core_role = state; + + /* update per-lcore optimized state tracking */ + lcore_states[lcore].is_service_core = (state == ROLE_SERVICE); +} + +int32_t +rte_service_lcore_add(uint32_t lcore) +{ + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + if (lcore_states[lcore].is_service_core) + return -EALREADY; + + set_lcore_state(lcore, ROLE_SERVICE); + + /* ensure that after adding a core the mask and state are defaults */ + lcore_states[lcore].service_mask = 0; + lcore_states[lcore].runstate = RUNSTATE_STOPPED; + + rte_smp_wmb(); + return 0; +} + +int32_t +rte_service_lcore_del(uint32_t lcore) +{ + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + struct core_state *cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -EINVAL; + + if (cs->runstate != RUNSTATE_STOPPED) + return -EBUSY; + + set_lcore_state(lcore, ROLE_RTE); + + rte_smp_wmb(); + return 0; +} + +int32_t +rte_service_lcore_start(uint32_t lcore) +{ + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + struct core_state *cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -EINVAL; + + if (cs->runstate == RUNSTATE_RUNNING) + return -EALREADY; + + /* set core to run state first, and then launch otherwise it will + * return immediately as runstate keeps it in the service poll loop + */ + lcore_states[lcore].runstate = RUNSTATE_RUNNING; + + int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore); + /* returns -EBUSY if the core is already launched, 0 on success */ + return ret; +} + +int32_t +rte_service_lcore_stop(uint32_t lcore) +{ + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + if (lcore_states[lcore].runstate == RUNSTATE_STOPPED) + return -EALREADY; + + uint32_t i; + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + int32_t enabled = + lcore_states[i].service_mask & (UINT64_C(1) << i); + int32_t service_running = rte_services[i].runstate != + RUNSTATE_STOPPED; + int32_t only_core = rte_services[i].num_mapped_cores == 1; + + /* if the core is mapped, and the service is running, and this + * is the only core that is mapped, the service would cease to + * run if this core stopped, so fail instead. + */ + if (enabled && service_running && only_core) + return -EBUSY; + } + + lcore_states[lcore].runstate = RUNSTATE_STOPPED; + + return 0; +} + +static void +rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, + uint64_t all_cycles, uint32_t reset) +{ + /* avoid divide by zero */ + if (all_cycles == 0) + all_cycles = 1; + + int calls = 1; + if (s->calls != 0) + calls = s->calls; + + fprintf(f, " %s: stats %d\tcalls %"PRIu64"\tcycles %" + PRIu64"\tavg: %"PRIu64"\n", + s->spec.name, service_stats_enabled(s), s->calls, + s->cycles_spent, s->cycles_spent / calls); + + if (reset) { + s->cycles_spent = 0; + s->calls = 0; + } +} + +static void +service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset) +{ + uint32_t i; + struct core_state *cs = &lcore_states[lcore]; + + fprintf(f, "%02d\t", lcore); + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (!service_valid(i)) + continue; + fprintf(f, "%"PRIu64"\t", cs->calls_per_service[i]); + if (reset) + cs->calls_per_service[i] = 0; + } + fprintf(f, "\n"); +} + +int32_t rte_service_dump(FILE *f, struct rte_service_spec *service) +{ + uint32_t i; + + uint64_t total_cycles = 0; + for (i = 0; i < rte_service_count; i++) { + if (!service_valid(i)) + continue; + total_cycles += rte_services[i].cycles_spent; + } + + if (service) { + struct rte_service_spec_impl *s = + (struct rte_service_spec_impl *)service; + fprintf(f, "Service %s Summary\n", s->spec.name); + uint32_t reset = 0; + rte_service_dump_one(f, s, total_cycles, reset); + return 0; + } + + fprintf(f, "Services Summary\n"); + for (i = 0; i < rte_service_count; i++) { + uint32_t reset = 1; + rte_service_dump_one(f, &rte_services[i], total_cycles, reset); + } + + fprintf(f, "Service Cores Summary\n"); + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (lcore_config[i].core_role != ROLE_SERVICE) + continue; + + uint32_t reset = 0; + service_dump_calls_per_lcore(f, i, reset); + } + + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 640afd08..90bca4d6 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 4 +LIBABIVER := 5 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -50,6 +50,9 @@ LDLIBS += -ldl LDLIBS += -lpthread LDLIBS += -lgcc_s LDLIBS += -lrt +ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y) +LDLIBS += -lnuma +endif # specific to linuxapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c @@ -96,6 +99,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 7c78f2dc..48f12f44 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -46,7 +46,6 @@ #include <stddef.h> #include <errno.h> #include <limits.h> -#include <errno.h> #include <sys/mman.h> #include <sys/queue.h> #include <sys/stat.h> @@ -64,6 +63,7 @@ #include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> +#include <rte_service_component.h> #include <rte_log.h> #include <rte_random.h> #include <rte_cycles.h> @@ -74,7 +74,6 @@ #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> -#include <rte_common.h> #include <rte_version.h> #include <rte_atomic.h> #include <malloc_heap.h> @@ -890,6 +889,11 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_option_device_parse()) { + rte_errno = ENODEV; + return -1; + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -932,6 +936,14 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services so vdevs register service during bus_probe. */ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -939,6 +951,15 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default service/lcore mappings and start running. Ignore + * -ENOTSUP, as it indicates no service coremask passed to EAL. + */ + ret = rte_service_start_with_defaults(); + if (ret < 0 && ret != -ENOTSUP) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 2e3bd12a..3e9ac41e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -64,6 +64,7 @@ #include <rte_malloc.h> #include <rte_errno.h> #include <rte_spinlock.h> +#include <rte_pause.h> #include "eal_private.h" #include "eal_vfio.h" diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index ebe06833..52791282 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -2,6 +2,7 @@ * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2013 6WIND. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,36 +31,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* BSD LICENSE - * - * Copyright(c) 2013 6WIND. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #define _FILE_OFFSET_BITS 64 #include <errno.h> @@ -70,7 +41,6 @@ #include <stdint.h> #include <inttypes.h> #include <string.h> -#include <stdarg.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -78,11 +48,14 @@ #include <sys/file.h> #include <unistd.h> #include <limits.h> -#include <errno.h> #include <sys/ioctl.h> #include <sys/time.h> #include <signal.h> #include <setjmp.h> +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +#include <numa.h> +#include <numaif.h> +#endif #include <rte_log.h> #include <rte_memory.h> @@ -137,6 +110,13 @@ test_phys_addrs_available(void) if (rte_xen_dom0_supported()) return; + if (!rte_eal_has_hugepages()) { + RTE_LOG(ERR, EAL, + "Started without hugepages support, physical addresses not available\n"); + phys_addrs_available = false; + return; + } + physaddr = rte_mem_virt2phy(&tmp); if (physaddr == RTE_BAD_PHYS_ADDR) { RTE_LOG(ERR, EAL, @@ -147,16 +127,6 @@ test_phys_addrs_available(void) } } -/* Lock page in physical memory and prevent from swapping. */ -int -rte_mem_lock_page(const void *virt) -{ - unsigned long virtual = (unsigned long)virt; - int page_size = getpagesize(); - unsigned long aligned = (virtual & ~ (page_size - 1)); - return mlock((void*)aligned, page_size); -} - /* * Get physical address of any mapped virtual address in the current process. */ @@ -387,6 +357,14 @@ static int huge_wrap_sigsetjmp(void) return sigsetjmp(huge_jmpenv, 1); } +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +/* Callback for numa library. */ +void numa_error(char *where) +{ + RTE_LOG(ERR, EAL, "%s failed: %s\n", where, strerror(errno)); +} +#endif + /* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the @@ -395,18 +373,78 @@ static int huge_wrap_sigsetjmp(void) * map continguous physical blocks in contiguous virtual blocks. */ static unsigned -map_all_hugepages(struct hugepage_file *hugepg_tbl, - struct hugepage_info *hpi, int orig) +map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, + uint64_t *essential_memory __rte_unused, int orig) { int fd; unsigned i; void *virtaddr; void *vma_addr = NULL; size_t vma_len = 0; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + int node_id = -1; + int essential_prev = 0; + int oldpolicy; + struct bitmask *oldmask = numa_allocate_nodemask(); + bool have_numa = true; + unsigned long maxnode = 0; + + /* Check if kernel supports NUMA. */ + if (numa_available() != 0) { + RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n"); + have_numa = false; + } + + if (orig && have_numa) { + RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n"); + if (get_mempolicy(&oldpolicy, oldmask->maskp, + oldmask->size + 1, 0, 0) < 0) { + RTE_LOG(ERR, EAL, + "Failed to get current mempolicy: %s. " + "Assuming MPOL_DEFAULT.\n", strerror(errno)); + oldpolicy = MPOL_DEFAULT; + } + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + if (internal_config.socket_mem[i]) + maxnode = i + 1; + } +#endif for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (maxnode) { + unsigned int j; + + for (j = 0; j < maxnode; j++) + if (essential_memory[j]) + break; + + if (j == maxnode) { + node_id = (node_id + 1) % maxnode; + while (!internal_config.socket_mem[node_id]) { + node_id++; + node_id %= maxnode; + } + essential_prev = 0; + } else { + node_id = j; + essential_prev = essential_memory[j]; + + if (essential_memory[j] < hugepage_sz) + essential_memory[j] = 0; + else + essential_memory[j] -= hugepage_sz; + } + + RTE_LOG(DEBUG, EAL, + "Setting policy MPOL_PREFERRED for socket %d\n", + node_id); + numa_set_preferred(node_id); + } +#endif + if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; @@ -461,7 +499,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return i; + goto out; } /* map the segment, and populate page tables, @@ -472,7 +510,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); - return i; + goto out; } if (orig) { @@ -497,7 +535,12 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, munmap(virtaddr, hugepage_sz); close(fd); unlink(hugepg_tbl[i].filepath); - return i; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (maxnode) + essential_memory[node_id] = + essential_prev; +#endif + goto out; } *(int *)virtaddr = 0; } @@ -508,7 +551,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); - return i; + goto out; } close(fd); @@ -517,6 +560,22 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, vma_len -= hugepage_sz; } +out: +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (maxnode) { + RTE_LOG(DEBUG, EAL, + "Restoring previous memory policy: %d\n", oldpolicy); + if (oldpolicy == MPOL_DEFAULT) { + numa_set_localalloc(); + } else if (set_mempolicy(oldpolicy, oldmask->maskp, + oldmask->size + 1) < 0) { + RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n", + strerror(errno)); + numa_set_localalloc(); + } + } + numa_free_cpumask(oldmask); +#endif return i; } @@ -551,8 +610,8 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) f = fopen("/proc/self/numa_maps", "r"); if (f == NULL) { - RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps," - " consider that all memory is in socket_id 0\n"); + RTE_LOG(NOTICE, EAL, "NUMA support not available" + " consider that all memory is in socket_id 0\n"); return 0; } @@ -601,6 +660,11 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) if (hugepg_tbl[i].orig_va == va) { hugepg_tbl[i].socket_id = socket_id; hp_count++; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + RTE_LOG(DEBUG, EAL, + "Hugepage %s is on socket %d\n", + hugepg_tbl[i].filepath, socket_id); +#endif } } } @@ -995,7 +1059,7 @@ rte_eal_hugepage_init(void) strerror(errno)); return -1; } - mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -1039,6 +1103,11 @@ rte_eal_hugepage_init(void) huge_register_sigbus(); + /* make a copy of socket_mem, needed for balanced allocation. */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + memory[i] = internal_config.socket_mem[i]; + + /* map all hugepages and sort them */ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ unsigned pages_old, pages_new; @@ -1056,7 +1125,8 @@ rte_eal_hugepage_init(void) /* map all hugepages available */ pages_old = hpi->num_pages[0]; - pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); + pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, + memory, 1); if (pages_new < pages_old) { RTE_LOG(DEBUG, EAL, "%d not %d hugepages of size %u MB allocated\n", @@ -1099,7 +1169,7 @@ rte_eal_hugepage_init(void) sizeof(struct hugepage_file), cmp_physaddr); /* remap all hugepages */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, NULL, 0) != hpi->num_pages[0]) { RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 595622b2..8951ce74 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -310,22 +310,20 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) dev->max_vfs = (uint16_t)tmp; } - /* get numa node */ + /* get numa node, default to 0 if not present */ snprintf(filename, sizeof(filename), "%s/numa_node", dirname); - if (access(filename, R_OK) != 0) { - /* if no NUMA support, set default to 0 */ - dev->device.numa_node = 0; + + if (access(filename, F_OK) != -1) { + if (eal_parse_sysfs_value(filename, &tmp) == 0) + dev->device.numa_node = tmp; + else + dev->device.numa_node = -1; } else { - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->device.numa_node = tmp; + dev->device.numa_node = 0; } - rte_pci_device_name(addr, dev->name, sizeof(dev->name)); - dev->device.name = dev->name; + pci_name_set(dev); /* parse resources */ snprintf(filename, sizeof(filename), "%s/resource", dirname); @@ -373,6 +371,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); free(dev); @@ -430,10 +429,10 @@ parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) /* now convert to int values */ errno = 0; - addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = strtoul(splitaddr.domain, NULL, 16); + addr->bus = strtoul(splitaddr.bus, NULL, 16); + addr->devid = strtoul(splitaddr.devid, NULL, 16); + addr->function = strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 2be13195..aa9d96ed 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -214,7 +214,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) intr_idx = VFIO_PCI_NUM_IRQS; /* get interrupt type from internal config (MSI-X by default, can be - * overriden from the command line + * overridden from the command line */ switch (internal_config.vfio_intr_mode) { case RTE_INTR_MODE_MSIX: diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 9f88530e..6481eeea 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -49,7 +49,6 @@ #include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_lcore.h> #include "eal_private.h" @@ -184,7 +183,14 @@ eal_thread_loop(__attribute__((unused)) void *arg) ret = lcore_config[lcore_id].f(fct_arg); lcore_config[lcore_id].ret = ret; rte_wmb(); - lcore_config[lcore_id].state = FINISHED; + + /* when a service core returns, it should go directly to WAIT + * state, because the application will not lcore_wait() for it. + */ + if (lcore_config[lcore_id].core_role == ROLE_SERVICE) + lcore_config[lcore_id].state = WAIT; + else + lcore_config[lcore_id].state = FINISHED; } /* never reached */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c index bddbdb07..19db1cb5 100644 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -38,7 +38,6 @@ #include <stdint.h> #include <inttypes.h> #include <string.h> -#include <stdarg.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -46,7 +45,6 @@ #include <sys/file.h> #include <unistd.h> #include <limits.h> -#include <errno.h> #include <sys/ioctl.h> #include <sys/time.h> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 670bab3a..3a8f1540 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -198,3 +198,47 @@ DPDK_17.05 { vfio_get_group_no; } DPDK_17.02; + +DPDK_17.08 { + global: + + rte_bus_find; + rte_bus_find_by_device; + rte_bus_find_by_name; + rte_log_get_level; + +} DPDK_17.05; + +EXPERIMENTAL { + global: + + rte_eal_devargs_insert; + rte_eal_devargs_parse; + rte_eal_devargs_remove; + rte_eal_hotplug_add; + rte_eal_hotplug_remove; + rte_service_disable_on_lcore; + rte_service_dump; + rte_service_enable_on_lcore; + rte_service_get_by_id; + rte_service_get_by_name; + rte_service_get_count; + rte_service_get_enabled_on_lcore; + rte_service_is_running; + rte_service_lcore_add; + rte_service_lcore_count; + rte_service_lcore_del; + rte_service_lcore_list; + rte_service_lcore_reset_all; + rte_service_lcore_start; + rte_service_lcore_stop; + rte_service_probe_capability; + rte_service_register; + rte_service_reset; + rte_service_set_stats_enable; + rte_service_start; + rte_service_start_with_defaults; + rte_service_stop; + rte_service_unregister; + +} DPDK_17.08; diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index b9d427c5..07a19a31 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -170,6 +170,37 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info) return IRQ_HANDLED; } +/** + * This gets called while opening uio device file. + */ +static int +igbuio_pci_open(struct uio_info *info, struct inode *inode) +{ + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *dev = udev->pdev; + + pci_reset_function(dev); + + /* set bus master, which was cleared by the reset function */ + pci_set_master(dev); + + return 0; +} + +static int +igbuio_pci_release(struct uio_info *info, struct inode *inode) +{ + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *dev = udev->pdev; + + /* stop the device from further DMA */ + pci_clear_master(dev); + + pci_reset_function(dev); + + return 0; +} + #ifdef CONFIG_XEN_DOM0 static int igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) @@ -372,6 +403,8 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) udev->info.version = "0.1"; udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol; + udev->info.open = igbuio_pci_open; + udev->info.release = igbuio_pci_release; #ifdef CONFIG_XEN_DOM0 /* check if the driver run on Xen Dom0 */ if (xen_initial_domain()) diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c index d558af20..1c30d12b 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c @@ -1357,7 +1357,7 @@ static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, * @hw: pointer to the HW structure * * In the case of serdes shut down sfp and PCS on driver unload - * when management pass thru is not enabled. + * when management pass through is not enabled. **/ void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw) { diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index 5f1f3a6b..99338c5c 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -1133,7 +1133,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, /* initialize pointer to rings */ ring = q_vector->ring; - /* intialize ITR */ + /* initialize ITR */ if (rxr_count) { /* rx or rx/tx vector */ if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index 4c52da3c..e0a03542 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -1165,7 +1165,7 @@ static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bit #define pci_register_driver pci_module_init /* - * Most of the dma compat code is copied/modifed from the 2.4.37 + * Most of the dma compat code is copied/modified from the 2.4.37 * /include/linux/libata-compat.h header file */ /* These definitions mirror those in pci.h, so they can be used diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c index f00fe796..4808d06e 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c @@ -718,7 +718,7 @@ s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw) * @vmdq: VMDq pool to assign * * Puts an ethernet address into a receive address register, or - * finds the rar that it is aleady in; adds to the pool list + * finds the rar that it is already in; adds to the pool list **/ s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) { diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c index 88b33fa0..2c861de5 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c @@ -3007,7 +3007,7 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) * @vmdq: VMDq pool to assign * * Puts an ethernet address into a receive address register, or - * finds the rar that it is aleady in; adds to the pool list + * finds the rar that it is already in; adds to the pool list **/ s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) { diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h index 4c7a6408..f62a7b56 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h @@ -1108,7 +1108,7 @@ static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bit #define pci_register_driver pci_module_init /* - * Most of the dma compat code is copied/modifed from the 2.4.37 + * Most of the dma compat code is copied/modified from the 2.4.37 * /include/linux/libata-compat.h header file */ /* These definitions mirror those in pci.h, so they can be used diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c index f601d62e..4d9a0887 100644 --- a/lib/librte_efd/rte_efd.c +++ b/lib/librte_efd/rte_efd.c @@ -53,6 +53,8 @@ #include "rte_efd.h" #if defined(RTE_ARCH_X86) #include "rte_efd_x86.h" +#elif defined(RTE_ARCH_ARM64) +#include "rte_efd_arm64.h" #endif #define EFD_KEY(key_idx, table) (table->keys + ((key_idx) * table->key_len)) @@ -103,6 +105,7 @@ allocated memory enum efd_lookup_internal_function { EFD_LOOKUP_SCALAR = 0, EFD_LOOKUP_AVX2, + EFD_LOOKUP_NEON, EFD_LOOKUP_NUM }; @@ -674,6 +677,16 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len, table->lookup_fn = EFD_LOOKUP_AVX2; else #endif +#if defined(RTE_ARCH_ARM64) + /* + * For less than or equal to 16 bits, scalar function performs better + * than vectorised version + */ + if (RTE_EFD_VALUE_NUM_BITS > 16 && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) + table->lookup_fn = EFD_LOOKUP_NEON; + else +#endif table->lookup_fn = EFD_LOOKUP_SCALAR; /* @@ -1271,6 +1284,15 @@ efd_lookup_internal(const struct efd_online_group_entry * const group, group->lookup_table, hash_val_a, hash_val_b); + break; +#endif +#if defined(RTE_ARCH_ARM64) + case EFD_LOOKUP_NEON: + return efd_lookup_internal_neon(group->hash_idx, + group->lookup_table, + hash_val_a, + hash_val_b); + break; #endif case EFD_LOOKUP_SCALAR: /* Fall-through */ diff --git a/lib/librte_efd/rte_efd_arm64.h b/lib/librte_efd/rte_efd_arm64.h new file mode 100644 index 00000000..63289ac4 --- /dev/null +++ b/lib/librte_efd/rte_efd_arm64.h @@ -0,0 +1,76 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * rte_efd_arm64.h + * This file holds all arm64 specific EFD functions + */ + +#ifndef __RTE_EFD_ARM64_H__ +#define __RTE_EFD_ARM64_H__ + +#include <rte_vect.h> + +static inline efd_value_t +efd_lookup_internal_neon(const efd_hashfunc_t *group_hash_idx, + const efd_lookuptbl_t *group_lookup_table, + const uint32_t hash_val_a, const uint32_t hash_val_b) +{ + efd_value_t value = 0; + uint32_t i = 0; + uint32x4_t vhash_val_a = vmovq_n_u32(hash_val_a); + uint32x4_t vhash_val_b = vmovq_n_u32(hash_val_b); + int32x4_t vshift = {0, 1, 2, 3}; + uint32x4_t vmask = vdupq_n_u32(0x1); + int32x4_t vincr = vdupq_n_s32(4); + + for (; i < RTE_EFD_VALUE_NUM_BITS; i += 4) { + uint32x4_t vhash_idx = vshll_n_u16( + vld1_u16((uint16_t const *)&group_hash_idx[i]), 0); + uint32x4_t vlookup_table = vshll_n_u16( + vld1_u16((uint16_t const *)&group_lookup_table[i]), 0); + uint32x4_t vhash = vaddq_u32(vhash_val_a, + vmulq_u32(vhash_idx, vhash_val_b)); + int32x4_t vbucket_idx = vnegq_s32(vreinterpretq_s32_u32( + vshrq_n_u32(vhash, EFD_LOOKUPTBL_SHIFT))); + uint32x4_t vresult = vshlq_u32(vlookup_table, vbucket_idx); + + vresult = vandq_u32(vresult, vmask); + vresult = vshlq_u32(vresult, vshift); + value |= vaddvq_u32(vresult); + vshift = vaddq_s32(vshift, vincr); + } + + return value; +} + +#endif /* __RTE_EFD_ARM64_H__ */ diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile index 93fdde10..db692ae4 100644 --- a/lib/librte_ether/Makefile +++ b/lib/librte_ether/Makefile @@ -1,6 +1,6 @@ # BSD LICENSE # -# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# Copyright(c) 2010-2017 Intel Corporation. All rights reserved. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -45,6 +45,7 @@ LIBABIVER := 6 SRCS-y += rte_ethdev.c SRCS-y += rte_flow.c +SRCS-y += rte_tm.c # # Export include files @@ -56,5 +57,7 @@ SYMLINK-y-include += rte_eth_ctrl.h SYMLINK-y-include += rte_dev_info.h SYMLINK-y-include += rte_flow.h SYMLINK-y-include += rte_flow_driver.h +SYMLINK-y-include += rte_tm.h +SYMLINK-y-include += rte_tm_driver.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 83898a8f..0597641e 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -72,7 +72,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; static struct rte_eth_dev_data *rte_eth_dev_data; static uint8_t eth_dev_last_created_port; -static uint8_t nb_ports; /* spinlock for eth device callbacks */ static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER; @@ -129,6 +128,7 @@ struct rte_eth_dev_callback { TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */ rte_eth_dev_cb_fn cb_fn; /**< Callback address */ void *cb_arg; /**< Parameter for callback */ + void *ret_param; /**< Return parameter */ enum rte_eth_event_type event; /**< Interrupt event type */ uint32_t active; /**< Callback is executing */ }; @@ -178,9 +178,11 @@ rte_eth_dev_allocated(const char *name) unsigned i; for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) && - strcmp(rte_eth_devices[i].data->name, name) == 0) - return &rte_eth_devices[i]; + if (rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED && + rte_eth_devices[i].device) { + if (!strcmp(rte_eth_devices[i].device->name, name)) + return &rte_eth_devices[i]; + } } return NULL; } @@ -207,7 +209,6 @@ eth_dev_get(uint8_t port_id) TAILQ_INIT(&(eth_dev->link_intr_cbs)); eth_dev_last_created_port = port_id; - nb_ports++; return eth_dev; } @@ -280,7 +281,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) return -EINVAL; eth_dev->state = RTE_ETH_DEV_UNUSED; - nb_ports--; return 0; } @@ -288,7 +288,8 @@ int rte_eth_dev_is_valid_port(uint8_t port_id) { if (port_id >= RTE_MAX_ETHPORTS || - rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED) + (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && + rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED)) return 0; else return 1; @@ -304,13 +305,21 @@ rte_eth_dev_socket_id(uint8_t port_id) uint8_t rte_eth_dev_count(void) { - return nb_ports; + uint8_t p; + uint8_t count; + + count = 0; + + RTE_ETH_FOREACH_DEV(p) + count++; + + return count; } int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) { - char *tmp; + const char *tmp; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -321,7 +330,7 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) /* shouldn't check 'rte_eth_devices[i].data', * because it might be overwritten by VDEV PMD */ - tmp = rte_eth_dev_data[port_id].name; + tmp = rte_eth_devices[port_id].device->name; strcpy(name, tmp); return 0; } @@ -329,6 +338,7 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) int rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) { + int ret; int i; if (name == NULL) { @@ -336,16 +346,14 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -EINVAL; } - if (!nb_ports) - return -ENODEV; - - *port_id = RTE_MAX_ETHPORTS; RTE_ETH_FOREACH_DEV(i) { - if (!strncmp(name, - rte_eth_dev_data[i].name, strlen(name))) { + if (!rte_eth_devices[i].device) + continue; + ret = strncmp(name, rte_eth_devices[i].device->name, + strlen(name)); + if (ret == 0) { *port_id = i; - return 0; } } @@ -359,16 +367,6 @@ rte_eth_dev_is_detachable(uint8_t port_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - switch (rte_eth_devices[port_id].data->kdrv) { - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - case RTE_KDRV_NIC_UIO: - case RTE_KDRV_NONE: - case RTE_KDRV_VFIO: - break; - default: - return -ENOTSUP; - } dev_flags = rte_eth_devices[port_id].data->dev_flags; if ((dev_flags & RTE_ETH_DEV_DETACHABLE) && (!(dev_flags & RTE_ETH_DEV_BONDED_SLAVE))) @@ -438,12 +436,14 @@ rte_eth_dev_detach(uint8_t port_id, char *name) if (rte_eth_dev_is_detachable(port_id)) goto err; - snprintf(name, sizeof(rte_eth_devices[port_id].data->name), - "%s", rte_eth_devices[port_id].data->name); - ret = rte_eal_dev_detach(name); + snprintf(name, RTE_DEV_NAME_MAX_LEN, "%s", + rte_eth_devices[port_id].device->name); + + ret = rte_eal_dev_detach(rte_eth_devices[port_id].device); if (ret < 0) goto err; + rte_eth_devices[port_id].state = RTE_ETH_DEV_UNUSED; return 0; err: @@ -753,13 +753,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, if ((dev_conf->intr_conf.lsc == 1) && (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n", - dev->data->drv_name); + dev->device->driver->name); return -EINVAL; } if ((dev_conf->intr_conf.rmv == 1) && (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) { RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n", - dev->data->drv_name); + dev->device->driver->name); return -EINVAL; } @@ -1900,7 +1900,7 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->driver_name = dev->data->drv_name; + dev_info->driver_name = dev->device->driver->name; dev_info->nb_rx_queues = dev->data->nb_rx_queues; dev_info->nb_tx_queues = dev->data->nb_tx_queues; } @@ -1975,6 +1975,7 @@ int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) { struct rte_eth_dev *dev; + int ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; @@ -1990,7 +1991,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) } RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP); - return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); + ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); + if (ret == 0) { + struct rte_vlan_filter_conf *vfc; + int vidx; + int vbit; + + vfc = &dev->data->vlan_filter_conf; + vidx = vlan_id / 64; + vbit = vlan_id % 64; + + if (on) + vfc->ids[vidx] |= UINT64_C(1) << vbit; + else + vfc->ids[vidx] &= ~(UINT64_C(1) << vbit); + } + + return ret; } int @@ -2351,6 +2368,7 @@ get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) struct rte_eth_dev *dev = &rte_eth_devices[port_id]; unsigned i; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); rte_eth_dev_info_get(port_id, &dev_info); for (i = 0; i < dev_info.max_mac_addrs; i++) @@ -2718,12 +2736,13 @@ rte_eth_dev_callback_unregister(uint8_t port_id, return ret; } -void +int _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event, void *cb_arg) + enum rte_eth_event_type event, void *cb_arg, void *ret_param) { struct rte_eth_dev_callback *cb_lst; struct rte_eth_dev_callback dev_cb; + int rc = 0; rte_spinlock_lock(&rte_eth_dev_cb_lock); TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { @@ -2733,14 +2752,17 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev, cb_lst->active = 1; if (cb_arg != NULL) dev_cb.cb_arg = cb_arg; + if (ret_param != NULL) + dev_cb.ret_param = ret_param; rte_spinlock_unlock(&rte_eth_dev_cb_lock); - dev_cb.cb_fn(dev->data->port_id, dev_cb.event, - dev_cb.cb_arg); + rc = dev_cb.cb_fn(dev->data->port_id, dev_cb.event, + dev_cb.cb_arg, dev_cb.ret_param); rte_spinlock_lock(&rte_eth_dev_cb_lock); cb_lst->active = 0; } rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return rc; } int @@ -2789,7 +2811,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->data->drv_name, ring_name, + dev->device->driver->name, ring_name, dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); @@ -2872,128 +2894,6 @@ rte_eth_dev_rx_intr_disable(uint8_t port_id, return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id); } -#ifdef RTE_NIC_BYPASS -int rte_eth_dev_bypass_init(uint8_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_init, -ENOTSUP); - (*dev->dev_ops->bypass_init)(dev); - return 0; -} - -int -rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP); - (*dev->dev_ops->bypass_state_show)(dev, state); - return 0; -} - -int -rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_set, -ENOTSUP); - (*dev->dev_ops->bypass_state_set)(dev, new_state); - return 0; -} - -int -rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP); - (*dev->dev_ops->bypass_event_show)(dev, event, state); - return 0; -} - -int -rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_event_set, -ENOTSUP); - (*dev->dev_ops->bypass_event_set)(dev, event, state); - return 0; -} - -int -rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_set, -ENOTSUP); - (*dev->dev_ops->bypass_wd_timeout_set)(dev, timeout); - return 0; -} - -int -rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_ver_show, -ENOTSUP); - (*dev->dev_ops->bypass_ver_show)(dev, ver); - return 0; -} - -int -rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_show, -ENOTSUP); - (*dev->dev_ops->bypass_wd_timeout_show)(dev, wd_timeout); - return 0; -} - -int -rte_eth_dev_bypass_wd_reset(uint8_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_reset, -ENOTSUP); - (*dev->dev_ops->bypass_wd_reset)(dev); - return 0; -} -#endif int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type) @@ -3472,3 +3372,40 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, -ENOTSUP); return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en); } + +static void +rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc, + const struct rte_eth_desc_lim *desc_lim) +{ + if (desc_lim->nb_align != 0) + *nb_desc = RTE_ALIGN_CEIL(*nb_desc, desc_lim->nb_align); + + if (desc_lim->nb_max != 0) + *nb_desc = RTE_MIN(*nb_desc, desc_lim->nb_max); + + *nb_desc = RTE_MAX(*nb_desc, desc_lim->nb_min); +} + +int +rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id, + uint16_t *nb_rx_desc, + uint16_t *nb_tx_desc) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + if (nb_rx_desc != NULL) + rte_eth_dev_adjust_nb_desc(nb_rx_desc, &dev_info.rx_desc_lim); + + if (nb_tx_desc != NULL) + rte_eth_dev_adjust_nb_desc(nb_tx_desc, &dev_info.tx_desc_lim); + + return 0; +} diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 0f38b45f..0adf3274 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -118,7 +118,7 @@ * - NIC queue statistics mappings * * Any other configuration will not be stored and will need to be re-entered - * after a call to rte_eth_dev_start(). + * before a call to rte_eth_dev_start(). * * Finally, a network application can close an Ethernet device by invoking the * rte_eth_dev_close() function. @@ -172,8 +172,6 @@ extern "C" { #include <stdint.h> -#include <rte_dev.h> - /* Use this macro to check if LRO API is supported */ #define RTE_ETHDEV_HAS_LRO_SUPPORT @@ -374,6 +372,14 @@ enum rte_vlan_type { }; /** + * A structure used to describe a vlan filter. + * If the bit corresponding to a VID is set, such VID is on. + */ +struct rte_vlan_filter_conf { + uint64_t ids[64]; +}; + +/** * A structure used to configure the Receive Side Scaling (RSS) feature * of an Ethernet port. * If not NULL, the *rss_key* pointer of the *rss_conf* structure points @@ -629,6 +635,24 @@ struct rte_eth_vmdq_dcb_conf { /**< Selects a queue in a pool */ }; +/** + * A structure used to configure the VMDQ feature of an Ethernet port when + * not combined with the DCB feature. + * + * Using this feature, packets are routed to a pool of queues. By default, + * the pool selection is based on the MAC address, the vlan id in the + * vlan tag as specified in the pool_map array. + * Passing the ETH_VMDQ_ACCEPT_UNTAG in the rx_mode field allows pool + * selection using only the MAC address. MAC address to pool mapping is done + * using the rte_eth_dev_mac_addr_add function, with the pool parameter + * corresponding to the pool id. + * + * Queue selection within the selected pool will be done using RSS when + * it is enabled or revert to the first queue of the pool if not. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules or any pool MAC address. + */ struct rte_eth_vmdq_rx_conf { enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */ uint8_t enable_default_pool; /**< If non-zero, use a default pool */ @@ -901,6 +925,10 @@ struct rte_eth_conf { #define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 +#define DEV_TX_OFFLOAD_MT_LOCKFREE 0x00004000 +/**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same + * tx queue without SW lock. + */ struct rte_pci_device; @@ -1048,6 +1076,8 @@ TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); } \ } while (0) +#define RTE_ETH_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) + /** * l2 tunnel configuration. */ @@ -1381,59 +1411,6 @@ typedef int (*eth_l2_tunnel_offload_set_t) uint8_t en); /**< @internal enable/disable the l2 tunnel offload functions */ -#ifdef RTE_NIC_BYPASS - -enum { - RTE_BYPASS_MODE_NONE, - RTE_BYPASS_MODE_NORMAL, - RTE_BYPASS_MODE_BYPASS, - RTE_BYPASS_MODE_ISOLATE, - RTE_BYPASS_MODE_NUM, -}; - -#define RTE_BYPASS_MODE_VALID(x) \ - ((x) > RTE_BYPASS_MODE_NONE && (x) < RTE_BYPASS_MODE_NUM) - -enum { - RTE_BYPASS_EVENT_NONE, - RTE_BYPASS_EVENT_START, - RTE_BYPASS_EVENT_OS_ON = RTE_BYPASS_EVENT_START, - RTE_BYPASS_EVENT_POWER_ON, - RTE_BYPASS_EVENT_OS_OFF, - RTE_BYPASS_EVENT_POWER_OFF, - RTE_BYPASS_EVENT_TIMEOUT, - RTE_BYPASS_EVENT_NUM -}; - -#define RTE_BYPASS_EVENT_VALID(x) \ - ((x) > RTE_BYPASS_EVENT_NONE && (x) < RTE_BYPASS_MODE_NUM) - -enum { - RTE_BYPASS_TMT_OFF, /* timeout disabled. */ - RTE_BYPASS_TMT_1_5_SEC, /* timeout for 1.5 seconds */ - RTE_BYPASS_TMT_2_SEC, /* timeout for 2 seconds */ - RTE_BYPASS_TMT_3_SEC, /* timeout for 3 seconds */ - RTE_BYPASS_TMT_4_SEC, /* timeout for 4 seconds */ - RTE_BYPASS_TMT_8_SEC, /* timeout for 8 seconds */ - RTE_BYPASS_TMT_16_SEC, /* timeout for 16 seconds */ - RTE_BYPASS_TMT_32_SEC, /* timeout for 32 seconds */ - RTE_BYPASS_TMT_NUM -}; - -#define RTE_BYPASS_TMT_VALID(x) \ - ((x) == RTE_BYPASS_TMT_OFF || \ - ((x) > RTE_BYPASS_TMT_OFF && (x) < RTE_BYPASS_TMT_NUM)) - -typedef void (*bypass_init_t)(struct rte_eth_dev *dev); -typedef int32_t (*bypass_state_set_t)(struct rte_eth_dev *dev, uint32_t *new_state); -typedef int32_t (*bypass_state_show_t)(struct rte_eth_dev *dev, uint32_t *state); -typedef int32_t (*bypass_event_set_t)(struct rte_eth_dev *dev, uint32_t state, uint32_t event); -typedef int32_t (*bypass_event_show_t)(struct rte_eth_dev *dev, uint32_t event_shift, uint32_t *event); -typedef int32_t (*bypass_wd_timeout_set_t)(struct rte_eth_dev *dev, uint32_t timeout); -typedef int32_t (*bypass_wd_timeout_show_t)(struct rte_eth_dev *dev, uint32_t *wd_timeout); -typedef int32_t (*bypass_ver_show_t)(struct rte_eth_dev *dev, uint32_t *ver); -typedef int32_t (*bypass_wd_reset_t)(struct rte_eth_dev *dev); -#endif typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, enum rte_filter_type filter_type, @@ -1441,6 +1418,9 @@ typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, void *arg); /**< @internal Take operations to assigned filter type on an Ethernet device */ +typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops); +/**< @internal Get Traffic Management (TM) operations on an Ethernet device */ + typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, struct rte_eth_dcb_info *dcb_info); /**< @internal Get dcb information on an Ethernet device */ @@ -1460,7 +1440,7 @@ struct eth_dev_ops { eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ - eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */ + eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OFF. */ eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ @@ -1540,18 +1520,6 @@ struct eth_dev_ops { eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ - /* bypass control */ -#ifdef RTE_NIC_BYPASS - bypass_init_t bypass_init; - bypass_state_set_t bypass_state_set; - bypass_state_show_t bypass_state_show; - bypass_event_set_t bypass_event_set; - bypass_event_show_t bypass_event_show; - bypass_wd_timeout_set_t bypass_wd_timeout_set; - bypass_wd_timeout_show_t bypass_wd_timeout_show; - bypass_ver_show_t bypass_ver_show; - bypass_wd_reset_t bypass_wd_reset; -#endif eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ @@ -1573,6 +1541,9 @@ struct eth_dev_ops { /**< Get extended device statistic values by ID. */ eth_xstats_get_names_by_id_t xstats_get_names_by_id; /**< Get name of extended device statistics by ID. */ + + eth_tm_ops_get_t tm_ops_get; + /**< Get Traffic Management (TM) operations. */ }; /** @@ -1644,6 +1615,7 @@ struct rte_eth_rxtx_callback { enum rte_eth_dev_state { RTE_ETH_DEV_UNUSED = 0, RTE_ETH_DEV_ATTACHED, + RTE_ETH_DEV_DEFERRED, }; /** @@ -1687,7 +1659,7 @@ struct rte_eth_dev_sriov { }; #define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov) -#define RTE_ETH_NAME_MAX_LEN (32) +#define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN /** * @internal @@ -1737,7 +1709,8 @@ struct rte_eth_dev_data { uint32_t dev_flags; /**< Capabilities */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ int numa_node; /**< NUMA node connection */ - const char *drv_name; /**< Driver name */ + struct rte_vlan_filter_conf vlan_filter_conf; + /**< VLAN filter configuration. */ }; /** Device supports hotplug detach */ @@ -1777,13 +1750,12 @@ uint8_t rte_eth_find_next(uint8_t port_id); /** * Get the total number of Ethernet devices that have been successfully - * initialized by the [matching] Ethernet driver during the PCI probing phase. - * All devices whose port identifier is in the range - * [0, rte_eth_dev_count() - 1] can be operated on by network applications - * immediately after invoking rte_eal_init(). - * If the application unplugs a port using hotplug function, The enabled port - * numbers may be noncontiguous. In the case, the applications need to manage - * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro. + * initialized by the matching Ethernet driver during the PCI probing phase + * and that are available for applications to use. These devices must be + * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with + * non-contiguous ranges of devices. + * These non-contiguous ranges can be created by calls to hotplug functions or + * by some PMDs. * * @return * - The total number of usable Ethernet devices. @@ -1859,7 +1831,8 @@ int rte_eth_dev_attach(const char *devargs, uint8_t *port_id); * @param port_id * The port identifier of the device to detach. * @param devname - * A pointer to a device name actually detached. + * A pointer to a buffer that will be filled with the device name. + * This buffer must be at least RTE_DEV_NAME_MAX_LEN long. * @return * 0 on success and devname is filled, negative on error */ @@ -2358,7 +2331,7 @@ rte_eth_xstats_get_names_by_id(uint8_t port_id, * @param port_id * The port identifier of the Ethernet device. * @param ids - * A pointer to an ids array passed by application. This tells wich + * A pointer to an ids array passed by application. This tells which * statistics values function should retrieve. This parameter * can be set to NULL if n is 0. In this case function will retrieve * all avalible statistics. @@ -2997,6 +2970,10 @@ static inline int rte_eth_tx_descriptor_status(uint8_t port_id, * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers * of those packets whose transmission was effectively completed. * + * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can + * invoke this function concurrently on the same tx queue without SW lock. + * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags + * * @param port_id * The port identifier of the Ethernet device. * @param queue_id @@ -3266,7 +3243,7 @@ rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id, * causing N packets to be sent, and the error callback to be called for * the rest. */ -static inline uint16_t __attribute__((always_inline)) +static __rte_always_inline uint16_t rte_eth_tx_buffer(uint8_t port_id, uint16_t queue_id, struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt) { @@ -3401,8 +3378,8 @@ enum rte_eth_event_type { RTE_ETH_EVENT_MAX /**< max value of this enum */ }; -typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ - enum rte_eth_event_type event, void *cb_arg); +typedef int (*rte_eth_dev_cb_fn)(uint8_t port_id, + enum rte_eth_event_type event, void *cb_arg, void *ret_param); /**< user application callback to be registered for interrupts */ @@ -3419,11 +3396,6 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ * @param cb_arg * Pointer to the parameters for the registered callback. * - * The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX. - * This even occurs when a message from the VF is received by the PF. - * The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param. - * This struct is defined in rte_pmd_ixgbe.h. - * * @return * - On success, zero. * - On failure, a negative value. @@ -3463,15 +3435,17 @@ int rte_eth_dev_callback_unregister(uint8_t port_id, * @param event * Eth device interrupt event type. * @param cb_arg - * Update callback parameter to pass data back to user application. + * callback parameter. + * @param ret_param + * To pass data back to user application. * This allows the user application to decide if a particular function * is permitted or not. * * @return - * void + * int */ -void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event, void *cb_arg); +int _rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event, void *cb_arg, void *ret_param); /** * When there is no rx packet coming in Rx Queue for a long time, we can @@ -3827,171 +3801,6 @@ int rte_eth_mirror_rule_reset(uint8_t port_id, int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate); -/** - * Initialize bypass logic. This function needs to be called before - * executing any other bypass API. - * - * @param port - * The port identifier of the Ethernet device. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_init(uint8_t port); - -/** - * Return bypass state. - * - * @param port - * The port identifier of the Ethernet device. - * @param state - * The return bypass state. - * - (1) Normal mode - * - (2) Bypass mode - * - (3) Isolate mode - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_state_show(uint8_t port, uint32_t *state); - -/** - * Set bypass state - * - * @param port - * The port identifier of the Ethernet device. - * @param new_state - * The current bypass state. - * - (1) Normal mode - * - (2) Bypass mode - * - (3) Isolate mode - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_state_set(uint8_t port, uint32_t *new_state); - -/** - * Return bypass state when given event occurs. - * - * @param port - * The port identifier of the Ethernet device. - * @param event - * The bypass event - * - (1) Main power on (power button is pushed) - * - (2) Auxiliary power on (power supply is being plugged) - * - (3) Main power off (system shutdown and power supply is left plugged in) - * - (4) Auxiliary power off (power supply is being unplugged) - * - (5) Display or set the watchdog timer - * @param state - * The bypass state when given event occurred. - * - (1) Normal mode - * - (2) Bypass mode - * - (3) Isolate mode - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_event_show(uint8_t port, uint32_t event, uint32_t *state); - -/** - * Set bypass state when given event occurs. - * - * @param port - * The port identifier of the Ethernet device. - * @param event - * The bypass event - * - (1) Main power on (power button is pushed) - * - (2) Auxiliary power on (power supply is being plugged) - * - (3) Main power off (system shutdown and power supply is left plugged in) - * - (4) Auxiliary power off (power supply is being unplugged) - * - (5) Display or set the watchdog timer - * @param state - * The assigned state when given event occurs. - * - (1) Normal mode - * - (2) Bypass mode - * - (3) Isolate mode - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_event_store(uint8_t port, uint32_t event, uint32_t state); - -/** - * Set bypass watchdog timeout count. - * - * @param port - * The port identifier of the Ethernet device. - * @param timeout - * The timeout to be set. - * - (0) 0 seconds (timer is off) - * - (1) 1.5 seconds - * - (2) 2 seconds - * - (3) 3 seconds - * - (4) 4 seconds - * - (5) 8 seconds - * - (6) 16 seconds - * - (7) 32 seconds - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_wd_timeout_store(uint8_t port, uint32_t timeout); - -/** - * Get bypass firmware version. - * - * @param port - * The port identifier of the Ethernet device. - * @param ver - * The firmware version - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_ver_show(uint8_t port, uint32_t *ver); - -/** - * Return bypass watchdog timeout in seconds - * - * @param port - * The port identifier of the Ethernet device. - * @param wd_timeout - * The return watchdog timeout. "0" represents timer expired - * - (0) 0 seconds (timer is off) - * - (1) 1.5 seconds - * - (2) 2 seconds - * - (3) 3 seconds - * - (4) 4 seconds - * - (5) 8 seconds - * - (6) 16 seconds - * - (7) 32 seconds - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout); - -/** - * Reset bypass watchdog timer - * - * @param port - * The port identifier of the Ethernet device. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_bypass_wd_reset(uint8_t port); - /** * Configuration of Receive Side Scaling hash computation of Ethernet device. * @@ -4587,7 +4396,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, * @param port_id * pointer to port identifier of the device * @return -* - (0) if successful. +* - (0) if successful and port_id is filled. * - (-ENODEV or -EINVAL) on failure. */ int @@ -4607,6 +4416,26 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); +/** + * Check that numbers of Rx and Tx descriptors satisfy descriptors limits from + * the ethernet device information, otherwise adjust them to boundaries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param nb_rx_desc + * A pointer to a uint16_t where the number of receive + * descriptors stored. + * @param nb_tx_desc + * A pointer to a uint16_t where the number of transmit + * descriptors stored. + * @return + * - (0) if successful. + * - (-ENOTSUP, -ENODEV or -EINVAL) on failure. + */ +int rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id, + uint16_t *nb_rx_desc, + uint16_t *nb_tx_desc); + #ifdef __cplusplus } #endif diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h index d3bc03cf..56b10721 100644 --- a/lib/librte_ether/rte_ethdev_pci.h +++ b/lib/librte_ether/rte_ethdev_pci.h @@ -45,9 +45,6 @@ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. * @param pci_dev * The *pci_dev* pointer is the address of the *rte_pci_device* structure. - * - * @return - * - 0 on success, negative on error */ static inline void rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, @@ -69,7 +66,6 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, eth_dev->data->kdrv = pci_dev->kdrv; eth_dev->data->numa_node = pci_dev->device.numa_node; - eth_dev->data->drv_name = pci_dev->driver->driver.name; } /** @@ -118,7 +114,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) } eth_dev->device = &dev->device; - eth_dev->intr_handle = &dev->intr_handle; rte_eth_copy_pci_info(eth_dev, dev); return eth_dev; } @@ -134,6 +129,12 @@ rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) eth_dev->data->dev_private = NULL; + /* + * Secondary process will check the name to attach. + * Clear this field to avoid attaching a released ports. + */ + eth_dev->data->name[0] = '\0'; + eth_dev->device = NULL; eth_dev->intr_handle = NULL; } diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ether/rte_ethdev_vdev.h index fa2cb61e..4d2c3e2b 100644 --- a/lib/librte_ether/rte_ethdev_vdev.h +++ b/lib/librte_ether/rte_ethdev_vdev.h @@ -77,7 +77,6 @@ rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size) eth_dev->data->kdrv = RTE_KDRV_NONE; eth_dev->data->numa_node = dev->device.numa_node; - eth_dev->data->drv_name = dev->device.driver->name; return eth_dev; } diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map index d6726bb1..42837285 100644 --- a/lib/librte_ether/rte_ether_version.map +++ b/lib/librte_ether/rte_ether_version.map @@ -1,7 +1,6 @@ DPDK_2.2 { global: - _rte_eth_dev_callback_process; rte_eth_add_rx_callback; rte_eth_add_tx_callback; rte_eth_allmulticast_disable; @@ -10,14 +9,6 @@ DPDK_2.2 { rte_eth_dev_allocate; rte_eth_dev_allocated; rte_eth_dev_attach; - rte_eth_dev_bypass_event_show; - rte_eth_dev_bypass_event_store; - rte_eth_dev_bypass_init; - rte_eth_dev_bypass_state_set; - rte_eth_dev_bypass_state_show; - rte_eth_dev_bypass_ver_show; - rte_eth_dev_bypass_wd_reset; - rte_eth_dev_bypass_wd_timeout_show; rte_eth_dev_callback_register; rte_eth_dev_callback_unregister; rte_eth_dev_close; @@ -70,7 +61,6 @@ DPDK_2.2 { rte_eth_dev_uc_all_hash_table_set; rte_eth_dev_uc_hash_table_set; rte_eth_dev_vlan_filter; - rte_eth_dev_wd_timeout_store; rte_eth_dma_zone_reserve; rte_eth_led_off; rte_eth_led_on; @@ -151,8 +141,49 @@ DPDK_17.05 { rte_eth_dev_attach_secondary; rte_eth_find_next; + rte_eth_tx_done_cleanup; rte_eth_xstats_get_by_id; rte_eth_xstats_get_id_by_name; rte_eth_xstats_get_names_by_id; } DPDK_17.02; + +DPDK_17.08 { + global: + + _rte_eth_dev_callback_process; + rte_eth_dev_adjust_nb_rx_tx_desc; + rte_flow_copy; + rte_flow_isolate; + rte_tm_capabilities_get; + rte_tm_get_leaf_nodes; + rte_tm_hierarchy_commit; + rte_tm_level_capabilities_get; + rte_tm_mark_ip_dscp; + rte_tm_mark_ip_ecn; + rte_tm_mark_vlan_dei; + rte_tm_node_add; + rte_tm_node_capabilities_get; + rte_tm_node_cman_update; + rte_tm_node_delete; + rte_tm_node_parent_update; + rte_tm_node_resume; + rte_tm_node_shaper_update; + rte_tm_node_shared_shaper_update; + rte_tm_node_shared_wred_context_update; + rte_tm_node_stats_read; + rte_tm_node_stats_update; + rte_tm_node_suspend; + rte_tm_node_type_get; + rte_tm_node_wfq_weight_mode_update; + rte_tm_node_wred_context_update; + rte_tm_shaper_profile_add; + rte_tm_shaper_profile_delete; + rte_tm_shared_shaper_add_update; + rte_tm_shared_shaper_delete; + rte_tm_shared_wred_context_add_update; + rte_tm_shared_wred_context_delete; + rte_tm_wred_profile_add; + rte_tm_wred_profile_delete; + +} DPDK_17.05; diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c index aaa70d68..2001fbbf 100644 --- a/lib/librte_ether/rte_flow.c +++ b/lib/librte_ether/rte_flow.c @@ -31,14 +31,81 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <errno.h> +#include <stddef.h> #include <stdint.h> +#include <string.h> +#include <rte_common.h> #include <rte_errno.h> #include <rte_branch_prediction.h> #include "rte_ethdev.h" #include "rte_flow_driver.h" #include "rte_flow.h" +/** + * Flow elements description tables. + */ +struct rte_flow_desc_data { + const char *name; + size_t size; +}; + +/** Generate flow_item[] entry. */ +#define MK_FLOW_ITEM(t, s) \ + [RTE_FLOW_ITEM_TYPE_ ## t] = { \ + .name = # t, \ + .size = s, \ + } + +/** Information about known flow pattern items. */ +static const struct rte_flow_desc_data rte_flow_desc_item[] = { + MK_FLOW_ITEM(END, 0), + MK_FLOW_ITEM(VOID, 0), + MK_FLOW_ITEM(INVERT, 0), + MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)), + MK_FLOW_ITEM(PF, 0), + MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)), + MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)), + MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */ + MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)), + MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)), + MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)), + MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)), + MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)), + MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)), + MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)), + MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)), + MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)), + MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)), + MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)), + MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)), + MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)), +}; + +/** Generate flow_action[] entry. */ +#define MK_FLOW_ACTION(t, s) \ + [RTE_FLOW_ACTION_TYPE_ ## t] = { \ + .name = # t, \ + .size = s, \ + } + +/** Information about known flow actions. */ +static const struct rte_flow_desc_data rte_flow_desc_action[] = { + MK_FLOW_ACTION(END, 0), + MK_FLOW_ACTION(VOID, 0), + MK_FLOW_ACTION(PASSTHRU, 0), + MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)), + MK_FLOW_ACTION(FLAG, 0), + MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)), + MK_FLOW_ACTION(DROP, 0), + MK_FLOW_ACTION(COUNT, 0), + MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)), + MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */ + MK_FLOW_ACTION(PF, 0), + MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)), +}; + /* Get generic flow operations structure from a port. */ const struct rte_flow_ops * rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error) @@ -157,3 +224,185 @@ rte_flow_query(uint8_t port_id, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, rte_strerror(ENOSYS)); } + +/* Restrict ingress traffic to the defined flow rules. */ +int +rte_flow_isolate(uint8_t port_id, + int set, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->isolate)) + return ops->isolate(dev, set, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/** Compute storage space needed by item specification. */ +static void +flow_item_spec_size(const struct rte_flow_item *item, + size_t *size, size_t *pad) +{ + if (!item->spec) { + *size = 0; + goto empty; + } + switch (item->type) { + union { + const struct rte_flow_item_raw *raw; + } spec; + + /* Not a fall-through */ + case RTE_FLOW_ITEM_TYPE_RAW: + spec.raw = item->spec; + *size = offsetof(struct rte_flow_item_raw, pattern) + + spec.raw->length * sizeof(*spec.raw->pattern); + break; + default: + *size = rte_flow_desc_item[item->type].size; + break; + } +empty: + *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size; +} + +/** Compute storage space needed by action configuration. */ +static void +flow_action_conf_size(const struct rte_flow_action *action, + size_t *size, size_t *pad) +{ + if (!action->conf) { + *size = 0; + goto empty; + } + switch (action->type) { + union { + const struct rte_flow_action_rss *rss; + } conf; + + /* Not a fall-through. */ + case RTE_FLOW_ACTION_TYPE_RSS: + conf.rss = action->conf; + *size = offsetof(struct rte_flow_action_rss, queue) + + conf.rss->num * sizeof(*conf.rss->queue); + break; + default: + *size = rte_flow_desc_action[action->type].size; + break; + } +empty: + *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size; +} + +/** Store a full rte_flow description. */ +size_t +rte_flow_copy(struct rte_flow_desc *desc, size_t len, + const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions) +{ + struct rte_flow_desc *fd = NULL; + size_t tmp; + size_t pad; + size_t off1 = 0; + size_t off2 = 0; + size_t size = 0; + +store: + if (items) { + const struct rte_flow_item *item; + + item = items; + if (fd) + fd->items = (void *)&fd->data[off1]; + do { + struct rte_flow_item *dst = NULL; + + if ((size_t)item->type >= + RTE_DIM(rte_flow_desc_item) || + !rte_flow_desc_item[item->type].name) { + rte_errno = ENOTSUP; + return 0; + } + if (fd) + dst = memcpy(fd->data + off1, item, + sizeof(*item)); + off1 += sizeof(*item); + flow_item_spec_size(item, &tmp, &pad); + if (item->spec) { + if (fd) + dst->spec = memcpy(fd->data + off2, + item->spec, tmp); + off2 += tmp + pad; + } + if (item->last) { + if (fd) + dst->last = memcpy(fd->data + off2, + item->last, tmp); + off2 += tmp + pad; + } + if (item->mask) { + if (fd) + dst->mask = memcpy(fd->data + off2, + item->mask, tmp); + off2 += tmp + pad; + } + off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); + } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END); + off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); + } + if (actions) { + const struct rte_flow_action *action; + + action = actions; + if (fd) + fd->actions = (void *)&fd->data[off1]; + do { + struct rte_flow_action *dst = NULL; + + if ((size_t)action->type >= + RTE_DIM(rte_flow_desc_action) || + !rte_flow_desc_action[action->type].name) { + rte_errno = ENOTSUP; + return 0; + } + if (fd) + dst = memcpy(fd->data + off1, action, + sizeof(*action)); + off1 += sizeof(*action); + flow_action_conf_size(action, &tmp, &pad); + if (action->conf) { + if (fd) + dst->conf = memcpy(fd->data + off2, + action->conf, tmp); + off2 += tmp + pad; + } + off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); + } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END); + } + if (fd != NULL) + return size; + off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); + tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data), + sizeof(double)); + size = tmp + off1 + off2; + if (size > len) + return size; + fd = desc; + if (fd != NULL) { + *fd = (const struct rte_flow_desc) { + .size = size, + .attr = *attr, + }; + tmp -= offsetof(struct rte_flow_desc, data); + off2 = tmp + off1; + off1 = tmp; + goto store; + } + return 0; +} diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h index c47edbc9..bba6169f 100644 --- a/lib/librte_ether/rte_flow.h +++ b/lib/librte_ether/rte_flow.h @@ -297,6 +297,18 @@ enum rte_flow_item_type { * See struct rte_flow_item_gre. */ RTE_FLOW_ITEM_TYPE_GRE, + + /** + * [META] + * + * Fuzzy pattern match, expect faster than default. + * + * This is for device that support fuzzy matching option. + * Usually a fuzzy matching is fast but the cost is accuracy. + * + * See struct rte_flow_item_fuzzy. + */ + RTE_FLOW_ITEM_TYPE_FUZZY, }; /** @@ -429,7 +441,7 @@ static const struct rte_flow_item_raw rte_flow_item_raw_mask = { struct rte_flow_item_eth { struct ether_addr dst; /**< Destination MAC. */ struct ether_addr src; /**< Source MAC. */ - uint16_t type; /**< EtherType. */ + rte_be16_t type; /**< EtherType. */ }; /** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ @@ -437,7 +449,7 @@ struct rte_flow_item_eth { static const struct rte_flow_item_eth rte_flow_item_eth_mask = { .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", - .type = 0x0000, + .type = RTE_BE16(0x0000), }; #endif @@ -450,15 +462,15 @@ static const struct rte_flow_item_eth rte_flow_item_eth_mask = { * RTE_FLOW_ITEM_TYPE_VLAN. */ struct rte_flow_item_vlan { - uint16_t tpid; /**< Tag protocol identifier. */ - uint16_t tci; /**< Tag control information. */ + rte_be16_t tpid; /**< Tag protocol identifier. */ + rte_be16_t tci; /**< Tag control information. */ }; /** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ #ifndef __cplusplus static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { - .tpid = 0x0000, - .tci = 0xffff, + .tpid = RTE_BE16(0x0000), + .tci = RTE_BE16(0xffff), }; #endif @@ -477,8 +489,8 @@ struct rte_flow_item_ipv4 { #ifndef __cplusplus static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { .hdr = { - .src_addr = 0xffffffff, - .dst_addr = 0xffffffff, + .src_addr = RTE_BE32(0xffffffff), + .dst_addr = RTE_BE32(0xffffffff), }, }; #endif @@ -540,8 +552,8 @@ struct rte_flow_item_udp { #ifndef __cplusplus static const struct rte_flow_item_udp rte_flow_item_udp_mask = { .hdr = { - .src_port = 0xffff, - .dst_port = 0xffff, + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), }, }; #endif @@ -559,8 +571,8 @@ struct rte_flow_item_tcp { #ifndef __cplusplus static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { .hdr = { - .src_port = 0xffff, - .dst_port = 0xffff, + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), }, }; #endif @@ -578,8 +590,8 @@ struct rte_flow_item_sctp { #ifndef __cplusplus static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { .hdr = { - .src_port = 0xffff, - .dst_port = 0xffff, + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), }, }; #endif @@ -609,14 +621,14 @@ static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { * Matches a E-tag header. */ struct rte_flow_item_e_tag { - uint16_t tpid; /**< Tag protocol identifier (0x893F). */ + rte_be16_t tpid; /**< Tag protocol identifier (0x893F). */ /** * E-Tag control information (E-TCI). * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). */ - uint16_t epcp_edei_in_ecid_b; + rte_be16_t epcp_edei_in_ecid_b; /** Reserved (2b), GRP (2b), E-CID base (12b). */ - uint16_t rsvd_grp_ecid_b; + rte_be16_t rsvd_grp_ecid_b; uint8_t in_ecid_e; /**< Ingress E-CID ext. */ uint8_t ecid_e; /**< E-CID ext. */ }; @@ -624,13 +636,7 @@ struct rte_flow_item_e_tag { /** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */ #ifndef __cplusplus static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = { -#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN - .rsvd_grp_ecid_b = 0x3fff, -#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN - .rsvd_grp_ecid_b = 0xff3f, -#else -#error Unsupported endianness. -#endif + .rsvd_grp_ecid_b = RTE_BE16(0x3fff), }; #endif @@ -646,8 +652,8 @@ struct rte_flow_item_nvgre { * * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. */ - uint16_t c_k_s_rsvd0_ver; - uint16_t protocol; /**< Protocol type (0x6558). */ + rte_be16_t c_k_s_rsvd0_ver; + rte_be16_t protocol; /**< Protocol type (0x6558). */ uint8_t tni[3]; /**< Virtual subnet ID. */ uint8_t flow_id; /**< Flow ID. */ }; @@ -689,14 +695,42 @@ struct rte_flow_item_gre { * Checksum (1b), reserved 0 (12b), version (3b). * Refer to RFC 2784. */ - uint16_t c_rsvd0_ver; - uint16_t protocol; /**< Protocol type. */ + rte_be16_t c_rsvd0_ver; + rte_be16_t protocol; /**< Protocol type. */ }; /** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */ #ifndef __cplusplus static const struct rte_flow_item_gre rte_flow_item_gre_mask = { - .protocol = 0xffff, + .protocol = RTE_BE16(0xffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_FUZZY + * + * Fuzzy pattern match, expect faster than default. + * + * This is for device that support fuzzy match option. + * Usually a fuzzy match is fast but the cost is accuracy. + * i.e. Signature Match only match pattern's hash value, but it is + * possible two different patterns have the same hash value. + * + * Matching accuracy level can be configure by threshold. + * Driver can divide the range of threshold and map to different + * accuracy levels that device support. + * + * Threshold 0 means perfect match (no fuzziness), while threshold + * 0xffffffff means fuzziest match. + */ +struct rte_flow_item_fuzzy { + uint32_t thresh; /**< Accuracy threshold. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_FUZZY. */ +#ifndef __cplusplus +static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = { + .thresh = 0xffffffff, }; #endif @@ -1191,6 +1225,90 @@ rte_flow_query(uint8_t port_id, void *data, struct rte_flow_error *error); +/** + * Restrict ingress traffic to the defined flow rules. + * + * Isolated mode guarantees that all ingress traffic comes from defined flow + * rules only (current and future). + * + * Besides making ingress more deterministic, it allows PMDs to safely reuse + * resources otherwise assigned to handle the remaining traffic, such as + * global RSS configuration settings, VLAN filters, MAC address entries, + * legacy filter API rules and so on in order to expand the set of possible + * flow rule types. + * + * Calling this function as soon as possible after device initialization, + * ideally before the first call to rte_eth_dev_configure(), is recommended + * to avoid possible failures due to conflicting settings. + * + * Once effective, leaving isolated mode may not be possible depending on + * PMD implementation. + * + * Additionally, the following functionality has no effect on the underlying + * port and may return errors such as ENOTSUP ("not supported"): + * + * - Toggling promiscuous mode. + * - Toggling allmulticast mode. + * - Configuring MAC addresses. + * - Configuring multicast addresses. + * - Configuring VLAN filters. + * - Configuring Rx filters through the legacy API (e.g. FDIR). + * - Configuring global RSS settings. + * + * @param port_id + * Port identifier of Ethernet device. + * @param set + * Nonzero to enter isolated mode, attempt to leave it otherwise. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_isolate(uint8_t port_id, int set, struct rte_flow_error *error); + +/** + * Generic flow representation. + * + * This form is sufficient to describe an rte_flow independently from any + * PMD implementation and allows for replayability and identification. + */ +struct rte_flow_desc { + size_t size; /**< Allocated space including data[]. */ + struct rte_flow_attr attr; /**< Attributes. */ + struct rte_flow_item *items; /**< Items. */ + struct rte_flow_action *actions; /**< Actions. */ + uint8_t data[]; /**< Storage for items/actions. */ +}; + +/** + * Copy an rte_flow rule description. + * + * @param[in] fd + * Flow rule description. + * @param[in] len + * Total size of allocated data for the flow description. + * @param[in] attr + * Flow rule attributes. + * @param[in] items + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * + * @return + * If len is greater or equal to the size of the flow, the total size of the + * flow description and its data. + * If len is lower than the size of the flow, the number of bytes that would + * have been written to desc had it been sufficient. Nothing is written. + */ +size_t +rte_flow_copy(struct rte_flow_desc *fd, size_t len, + const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions); + #ifdef __cplusplus } #endif diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h index da5749d5..4d95391d 100644 --- a/lib/librte_ether/rte_flow_driver.h +++ b/lib/librte_ether/rte_flow_driver.h @@ -120,6 +120,11 @@ struct rte_flow_ops { enum rte_flow_action_type, void *, struct rte_flow_error *); + /** See rte_flow_isolate(). */ + int (*isolate) + (struct rte_eth_dev *, + int, + struct rte_flow_error *); }; /** diff --git a/lib/librte_ether/rte_tm.c b/lib/librte_ether/rte_tm.c new file mode 100644 index 00000000..71679650 --- /dev/null +++ b/lib/librte_ether/rte_tm.c @@ -0,0 +1,438 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_tm_driver.h" +#include "rte_tm.h" + +/* Get generic traffic manager operations structure from a port. */ +const struct rte_tm_ops * +rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_tm_ops *ops; + + if (!rte_eth_dev_is_valid_port(port_id)) { + rte_tm_error_set(error, + ENODEV, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENODEV)); + return NULL; + } + + if ((dev->dev_ops->tm_ops_get == NULL) || + (dev->dev_ops->tm_ops_get(dev, &ops) != 0) || + (ops == NULL)) { + rte_tm_error_set(error, + ENOSYS, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENOSYS)); + return NULL; + } + + return ops; +} + +#define RTE_TM_FUNC(port_id, func) \ +({ \ + const struct rte_tm_ops *ops = \ + rte_tm_ops_get(port_id, error); \ + if (ops == NULL) \ + return -rte_errno; \ + \ + if (ops->func == NULL) \ + return -rte_tm_error_set(error, \ + ENOSYS, \ + RTE_TM_ERROR_TYPE_UNSPECIFIED, \ + NULL, \ + rte_strerror(ENOSYS)); \ + \ + ops->func; \ +}) + +/* Get number of leaf nodes */ +int +rte_tm_get_number_of_leaf_nodes(uint8_t port_id, + uint32_t *n_leaf_nodes, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_tm_ops *ops = + rte_tm_ops_get(port_id, error); + + if (ops == NULL) + return -rte_errno; + + if (n_leaf_nodes == NULL) { + rte_tm_error_set(error, + EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(EINVAL)); + return -rte_errno; + } + + *n_leaf_nodes = dev->data->nb_tx_queues; + return 0; +} + +/* Check node type (leaf or non-leaf) */ +int +rte_tm_node_type_get(uint8_t port_id, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_type_get)(dev, + node_id, is_leaf, error); +} + +/* Get capabilities */ +int rte_tm_capabilities_get(uint8_t port_id, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, capabilities_get)(dev, + cap, error); +} + +/* Get level capabilities */ +int rte_tm_level_capabilities_get(uint8_t port_id, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, level_capabilities_get)(dev, + level_id, cap, error); +} + +/* Get node capabilities */ +int rte_tm_node_capabilities_get(uint8_t port_id, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_capabilities_get)(dev, + node_id, cap, error); +} + +/* Add WRED profile */ +int rte_tm_wred_profile_add(uint8_t port_id, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, wred_profile_add)(dev, + wred_profile_id, profile, error); +} + +/* Delete WRED profile */ +int rte_tm_wred_profile_delete(uint8_t port_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, wred_profile_delete)(dev, + wred_profile_id, error); +} + +/* Add/update shared WRED context */ +int rte_tm_shared_wred_context_add_update(uint8_t port_id, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_wred_context_add_update)(dev, + shared_wred_context_id, wred_profile_id, error); +} + +/* Delete shared WRED context */ +int rte_tm_shared_wred_context_delete(uint8_t port_id, + uint32_t shared_wred_context_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_wred_context_delete)(dev, + shared_wred_context_id, error); +} + +/* Add shaper profile */ +int rte_tm_shaper_profile_add(uint8_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shaper_profile_add)(dev, + shaper_profile_id, profile, error); +} + +/* Delete WRED profile */ +int rte_tm_shaper_profile_delete(uint8_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shaper_profile_delete)(dev, + shaper_profile_id, error); +} + +/* Add shared shaper */ +int rte_tm_shared_shaper_add_update(uint8_t port_id, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_shaper_add_update)(dev, + shared_shaper_id, shaper_profile_id, error); +} + +/* Delete shared shaper */ +int rte_tm_shared_shaper_delete(uint8_t port_id, + uint32_t shared_shaper_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_shaper_delete)(dev, + shared_shaper_id, error); +} + +/* Add node to port traffic manager hierarchy */ +int rte_tm_node_add(uint8_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_add)(dev, + node_id, parent_node_id, priority, weight, level_id, + params, error); +} + +/* Delete node from traffic manager hierarchy */ +int rte_tm_node_delete(uint8_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_delete)(dev, + node_id, error); +} + +/* Suspend node */ +int rte_tm_node_suspend(uint8_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_suspend)(dev, + node_id, error); +} + +/* Resume node */ +int rte_tm_node_resume(uint8_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_resume)(dev, + node_id, error); +} + +/* Commit the initial port traffic manager hierarchy */ +int rte_tm_hierarchy_commit(uint8_t port_id, + int clear_on_fail, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, hierarchy_commit)(dev, + clear_on_fail, error); +} + +/* Update node parent */ +int rte_tm_node_parent_update(uint8_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_parent_update)(dev, + node_id, parent_node_id, priority, weight, error); +} + +/* Update node private shaper */ +int rte_tm_node_shaper_update(uint8_t port_id, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shaper_update)(dev, + node_id, shaper_profile_id, error); +} + +/* Update node shared shapers */ +int rte_tm_node_shared_shaper_update(uint8_t port_id, + uint32_t node_id, + uint32_t shared_shaper_id, + int add, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shared_shaper_update)(dev, + node_id, shared_shaper_id, add, error); +} + +/* Update node stats */ +int rte_tm_node_stats_update(uint8_t port_id, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_stats_update)(dev, + node_id, stats_mask, error); +} + +/* Update WFQ weight mode */ +int rte_tm_node_wfq_weight_mode_update(uint8_t port_id, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_wfq_weight_mode_update)(dev, + node_id, wfq_weight_mode, n_sp_priorities, error); +} + +/* Update node congestion management mode */ +int rte_tm_node_cman_update(uint8_t port_id, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_cman_update)(dev, + node_id, cman, error); +} + +/* Update node private WRED context */ +int rte_tm_node_wred_context_update(uint8_t port_id, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_wred_context_update)(dev, + node_id, wred_profile_id, error); +} + +/* Update node shared WRED context */ +int rte_tm_node_shared_wred_context_update(uint8_t port_id, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shared_wred_context_update)(dev, + node_id, shared_wred_context_id, add, error); +} + +/* Read and/or clear stats counters for specific node */ +int rte_tm_node_stats_read(uint8_t port_id, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_stats_read)(dev, + node_id, stats, stats_mask, clear, error); +} + +/* Packet marking - VLAN DEI */ +int rte_tm_mark_vlan_dei(uint8_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_vlan_dei)(dev, + mark_green, mark_yellow, mark_red, error); +} + +/* Packet marking - IPv4/IPv6 ECN */ +int rte_tm_mark_ip_ecn(uint8_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_ip_ecn)(dev, + mark_green, mark_yellow, mark_red, error); +} + +/* Packet marking - IPv4/IPv6 DSCP */ +int rte_tm_mark_ip_dscp(uint8_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_ip_dscp)(dev, + mark_green, mark_yellow, mark_red, error); +} diff --git a/lib/librte_ether/rte_tm.h b/lib/librte_ether/rte_tm.h new file mode 100644 index 00000000..ebbfa1ee --- /dev/null +++ b/lib/librte_ether/rte_tm.h @@ -0,0 +1,1912 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 Cavium. + * Copyright(c) 2017 NXP. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TM_H__ +#define __INCLUDE_RTE_TM_H__ + +/** + * @file + * RTE Generic Traffic Manager API + * + * This interface provides the ability to configure the traffic manager in a + * generic way. It includes features such as: hierarchical scheduling, + * traffic shaping, congestion management, packet marking, etc. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#include <stdint.h> + +#include <rte_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Ethernet framing overhead. + * + * Overhead fields per Ethernet frame: + * 1. Preamble: 7 bytes; + * 2. Start of Frame Delimiter (SFD): 1 byte; + * 3. Inter-Frame Gap (IFG): 12 bytes. + * + * One of the typical values for the *pkt_length_adjust* field of the shaper + * profile. + * + * @see struct rte_tm_shaper_params + */ +#define RTE_TM_ETH_FRAMING_OVERHEAD 20 + +/** + * Ethernet framing overhead including the Frame Check Sequence (FCS) field. + * Useful when FCS is generated and added at the end of the Ethernet frame on + * TX side without any SW intervention. + * + * One of the typical values for the pkt_length_adjust field of the shaper + * profile. + * + * @see struct rte_tm_shaper_params + */ +#define RTE_TM_ETH_FRAMING_OVERHEAD_FCS 24 + +/** + * Invalid WRED profile ID. + * + * @see struct rte_tm_node_params + * @see rte_tm_node_add() + * @see rte_tm_node_wred_context_update() + */ +#define RTE_TM_WRED_PROFILE_ID_NONE UINT32_MAX + +/** + *Invalid shaper profile ID. + * + * @see struct rte_tm_node_params + * @see rte_tm_node_add() + * @see rte_tm_node_shaper_update() + */ +#define RTE_TM_SHAPER_PROFILE_ID_NONE UINT32_MAX + +/** + * Node ID for the parent of the root node. + * + * @see rte_tm_node_add() + */ +#define RTE_TM_NODE_ID_NULL UINT32_MAX + +/** + * Node level ID used to disable level ID checking. + * + * @see rte_tm_node_add() + */ +#define RTE_TM_NODE_LEVEL_ID_ANY UINT32_MAX + +/** + * Color + */ +enum rte_tm_color { + RTE_TM_GREEN = 0, /**< Green */ + RTE_TM_YELLOW, /**< Yellow */ + RTE_TM_RED, /**< Red */ + RTE_TM_COLORS /**< Number of colors */ +}; + +/** + * Node statistics counter type + */ +enum rte_tm_stats_type { + /** Number of packets scheduled from current node. */ + RTE_TM_STATS_N_PKTS = 1 << 0, + + /** Number of bytes scheduled from current node. */ + RTE_TM_STATS_N_BYTES = 1 << 1, + + /** Number of green packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_GREEN_DROPPED = 1 << 2, + + /** Number of yellow packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_YELLOW_DROPPED = 1 << 3, + + /** Number of red packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_RED_DROPPED = 1 << 4, + + /** Number of green bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_GREEN_DROPPED = 1 << 5, + + /** Number of yellow bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_YELLOW_DROPPED = 1 << 6, + + /** Number of red bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_RED_DROPPED = 1 << 7, + + /** Number of packets currently waiting in the packet queue of current + * leaf node. + */ + RTE_TM_STATS_N_PKTS_QUEUED = 1 << 8, + + /** Number of bytes currently waiting in the packet queue of current + * leaf node. + */ + RTE_TM_STATS_N_BYTES_QUEUED = 1 << 9, +}; + +/** + * Node statistics counters + */ +struct rte_tm_node_stats { + /** Number of packets scheduled from current node. */ + uint64_t n_pkts; + + /** Number of bytes scheduled from current node. */ + uint64_t n_bytes; + + /** Statistics counters for leaf nodes only. */ + struct { + /** Number of packets dropped by current leaf node per each + * color. + */ + uint64_t n_pkts_dropped[RTE_TM_COLORS]; + + /** Number of bytes dropped by current leaf node per each + * color. + */ + uint64_t n_bytes_dropped[RTE_TM_COLORS]; + + /** Number of packets currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_pkts_queued; + + /** Number of bytes currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_bytes_queued; + } leaf; +}; + +/** + * Traffic manager dynamic updates + */ +enum rte_tm_dynamic_update_type { + /** Dynamic parent node update. The new parent node is located on same + * hierarchy level as the former parent node. Consequently, the node + * whose parent is changed preserves its hierarchy level. + */ + RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL = 1 << 0, + + /** Dynamic parent node update. The new parent node is located on + * different hierarchy level than the former parent node. Consequently, + * the node whose parent is changed also changes its hierarchy level. + */ + RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL = 1 << 1, + + /** Dynamic node add/delete. */ + RTE_TM_UPDATE_NODE_ADD_DELETE = 1 << 2, + + /** Suspend/resume nodes. */ + RTE_TM_UPDATE_NODE_SUSPEND_RESUME = 1 << 3, + + /** Dynamic switch between byte-based and packet-based WFQ weights. */ + RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE = 1 << 4, + + /** Dynamic update on number of SP priorities. */ + RTE_TM_UPDATE_NODE_N_SP_PRIORITIES = 1 << 5, + + /** Dynamic update of congestion management mode for leaf nodes. */ + RTE_TM_UPDATE_NODE_CMAN = 1 << 6, + + /** Dynamic update of the set of enabled stats counter types. */ + RTE_TM_UPDATE_NODE_STATS = 1 << 7, +}; + +/** + * Traffic manager capabilities + */ +struct rte_tm_capabilities { + /** Maximum number of nodes. */ + uint32_t n_nodes_max; + + /** Maximum number of levels (i.e. number of nodes connecting the root + * node with any leaf node, including the root and the leaf). + */ + uint32_t n_levels_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes + * (with the exception of the root node) have identical capability set. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes have + * identical capability set. + */ + int leaf_nodes_identical; + + /** Maximum number of shapers, either private or shared. In case the + * implementation does not share any resources between private and + * shared shapers, it is typically equal to the sum of + * *shaper_private_n_max* and *shaper_shared_n_max*. The + * value of zero indicates that traffic shaping is not supported. + */ + uint32_t shaper_n_max; + + /** Maximum number of private shapers. Indicates the maximum number of + * nodes that can concurrently have their private shaper enabled. The + * value of zero indicates that private shapers are not supported. + */ + uint32_t shaper_private_n_max; + + /** Maximum number of private shapers that support dual rate shaping. + * Indicates the maximum number of nodes that can concurrently have + * their private shaper enabled with dual rate support. Only valid when + * private shapers are supported. The value of zero indicates that dual + * rate shaping is not available for private shapers. The maximum value + * is *shaper_private_n_max*. + */ + int shaper_private_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers. The value of zero indicates that + * shared shapers are not supported. + */ + uint32_t shaper_shared_n_max; + + /** Maximum number of nodes that can share the same shared shaper. + * Only valid when shared shapers are supported. + */ + uint32_t shaper_shared_n_nodes_per_shaper_max; + + /** Maximum number of shared shapers a node can be part of. This + * parameter indicates that there is at least one node that can be + * configured with this many shared shapers, which might not be true for + * all the nodes. Only valid when shared shapers are supported, in which + * case it ranges from 1 to *shaper_shared_n_max*. + */ + uint32_t shaper_shared_n_shapers_per_node_max; + + /** Maximum number of shared shapers that can be configured with dual + * rate shaping. The value of zero indicates that dual rate shaping + * support is not available for shared shapers. + */ + uint32_t shaper_shared_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any shared + * shaper. Only valid when shared shapers are supported. + */ + uint64_t shaper_shared_rate_min; + + /** Maximum committed/peak rate (bytes per second) for any shared + * shaper. Only valid when shared shapers are supported. + */ + uint64_t shaper_shared_rate_max; + + /** Minimum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_min; + + /** Maximum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_max; + + /** Maximum number of children nodes. This parameter indicates that + * there is at least one non-leaf node that can be configured with this + * many children nodes, which might not be true for all the non-leaf + * nodes. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This parameter + * indicates that there is at least one non-leaf node that can be + * configured with this many priority levels for managing its children + * nodes, which might not be true for all the non-leaf nodes. The value + * of zero is invalid. The value of 1 indicates that only priority 0 is + * supported, which essentially means that Strict Priority (SP) + * algorithm is not supported. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the same priority at + * any given time, i.e. maximum size of the WFQ sibling node group. This + * parameter indicates there is at least one non-leaf node that meets + * this condition, which might not be true for all the non-leaf nodes. + * The value of zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have more than one child + * node at any given time, i.e. maximum number of WFQ sibling node + * groups that have two or more members. This parameter indicates there + * is at least one non-leaf node that meets this condition, which might + * not be true for all the non-leaf nodes. The value of zero states that + * WFQ algorithm is not supported. The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels have at most one + * child node, so there can be only one priority level with two or + * more sibling nodes making up a WFQ group. The maximum value is: + * min(floor(*sched_n_children_max* / 2), *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that all sibling nodes + * with same priority have the same WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** Head drop algorithm support. When non-zero, this parameter + * indicates that there is at least one leaf node that supports the head + * drop algorithm, which might not be true for all the leaf nodes. + */ + int cman_head_drop_supported; + + /** Maximum number of WRED contexts, either private or shared. In case + * the implementation does not share any resources between private and + * shared WRED contexts, it is typically equal to the sum of + * *cman_wred_context_private_n_max* and + * *cman_wred_context_shared_n_max*. The value of zero indicates that + * WRED is not supported. + */ + uint32_t cman_wred_context_n_max; + + /** Maximum number of private WRED contexts. Indicates the maximum + * number of leaf nodes that can concurrently have their private WRED + * context enabled. The value of zero indicates that private WRED + * contexts are not supported. + */ + uint32_t cman_wred_context_private_n_max; + + /** Maximum number of shared WRED contexts. The value of zero + * indicates that shared WRED contexts are not supported. + */ + uint32_t cman_wred_context_shared_n_max; + + /** Maximum number of leaf nodes that can share the same WRED context. + * Only valid when shared WRED contexts are supported. + */ + uint32_t cman_wred_context_shared_n_nodes_per_context_max; + + /** Maximum number of shared WRED contexts a leaf node can be part of. + * This parameter indicates that there is at least one leaf node that + * can be configured with this many shared WRED contexts, which might + * not be true for all the leaf nodes. Only valid when shared WRED + * contexts are supported, in which case it ranges from 1 to + * *cman_wred_context_shared_n_max*. + */ + uint32_t cman_wred_context_shared_n_contexts_per_node_max; + + /** Support for VLAN DEI packet marking (per color). */ + int mark_vlan_dei_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 ECN marking of TCP packets (per color). */ + int mark_ip_ecn_tcp_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 ECN marking of SCTP packets (per color). */ + int mark_ip_ecn_sctp_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 DSCP packet marking (per color). */ + int mark_ip_dscp_supported[RTE_TM_COLORS]; + + /** Set of supported dynamic update operations. + * @see enum rte_tm_dynamic_update_type + */ + uint64_t dynamic_update_mask; + + /** Set of supported statistics counter types. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Traffic manager level capabilities + */ +struct rte_tm_level_capabilities { + /** Maximum number of nodes for the current hierarchy level. */ + uint32_t n_nodes_max; + + /** Maximum number of non-leaf nodes for the current hierarchy level. + * The value of 0 indicates that current level only supports leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_nonleaf_max; + + /** Maximum number of leaf nodes for the current hierarchy level. The + * value of 0 indicates that current level only supports non-leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_leaf_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes on + * this level have identical capability set. Valid only when + * *n_nodes_nonleaf_max* is non-zero. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes on this + * level have identical capability set. Valid only when + * *n_nodes_leaf_max* is non-zero. + */ + int leaf_nodes_identical; + + RTE_STD_C11 + union { + /** Items valid only for the non-leaf nodes on this level. */ + struct { + /** Private shaper support. When non-zero, it indicates + * there is at least one non-leaf node on this level + * with private shaper support, which may not be the + * case for all the non-leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the non-leaf + * nodes on the current level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level with dual rate private shaper support, which + * may not be the case for all the non-leaf nodes on + * this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes of this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes on this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers that any non-leaf + * node on this level can be part of. The value of zero + * indicates that shared shapers are not supported by + * the non-leaf nodes on this level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level that meets this condition, which may not be the + * case for all the non-leaf nodes on this level. + */ + uint32_t shaper_shared_n_max; + + /** Maximum number of children nodes. This parameter + * indicates that there is at least one non-leaf node on + * this level that can be configured with this many + * children nodes, which might not be true for all the + * non-leaf nodes on this level. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This + * parameter indicates that there is at least one + * non-leaf node on this level that can be configured + * with this many priority levels for managing its + * children nodes, which might not be true for all the + * non-leaf nodes on this level. The value of zero is + * invalid. The value of 1 indicates that only priority + * 0 is supported, which essentially means that Strict + * Priority (SP) algorithm is not supported on this + * level. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size of + * the WFQ sibling node group. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which may not be true for + * all the non-leaf nodes on this level. The value of + * zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported on this level. The maximum + * value is *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that + * have two or more members. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which might not be true + * for all the non-leaf nodes. The value of zero states + * that WFQ algorithm is not supported on this level. + * The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels on + * this level have at most one child node, so there can + * be only one priority level with two or more sibling + * nodes making up a WFQ group on this level. The + * maximum value is: + * min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes on this level with same priority + * have the same WFQ weight, so on this level WFQ is + * reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** Mask of statistics counter types supported by the + * non-leaf nodes on this level. Every supported + * statistics counter type is supported by at least one + * non-leaf node on this level, which may not be true + * for all the non-leaf nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } nonleaf; + + /** Items valid only for the leaf nodes on this level. */ + struct { + /** Private shaper support. When non-zero, it indicates + * there is at least one leaf node on this level with + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the leaf nodes + * on this level. When non-zero, it indicates there is + * at least one leaf node on this level with dual rate + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes of this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes on this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers that any leaf node + * on this level can be part of. The value of zero + * indicates that shared shapers are not supported by + * the leaf nodes on this level. When non-zero, it + * indicates there is at least one leaf node on this + * level that meets this condition, which may not be the + * case for all the leaf nodes on this level. + */ + uint32_t shaper_shared_n_max; + + /** Head drop algorithm support. When non-zero, this + * parameter indicates that there is at least one leaf + * node on this level that supports the head drop + * algorithm, which might not be true for all the leaf + * nodes on this level. + */ + int cman_head_drop_supported; + + /** Private WRED context support. When non-zero, it + * indicates there is at least one node on this level + * with private WRED context support, which may not be + * true for all the leaf nodes on this level. + */ + int cman_wred_context_private_supported; + + /** Maximum number of shared WRED contexts that any + * leaf node on this level can be part of. The value of + * zero indicates that shared WRED contexts are not + * supported by the leaf nodes on this level. When + * non-zero, it indicates there is at least one leaf + * node on this level that meets this condition, which + * may not be the case for all the leaf nodes on this + * level. + */ + uint32_t cman_wred_context_shared_n_max; + + /** Mask of statistics counter types supported by the + * leaf nodes on this level. Every supported statistics + * counter type is supported by at least one leaf node + * on this level, which may not be true for all the leaf + * nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } leaf; + }; +}; + +/** + * Traffic manager node capabilities + */ +struct rte_tm_node_capabilities { + /** Private shaper support for the current node. */ + int shaper_private_supported; + + /** Dual rate shaping support for private shaper of current node. + * Valid only when private shaper is supported by the current node. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for private + * shaper of current node. Valid only when private shaper is supported + * by the current node. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for private + * shaper of current node. Valid only when private shaper is supported + * by the current node. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers the current node can be part of. + * The value of zero indicates that shared shapers are not supported by + * the current node. + */ + uint32_t shaper_shared_n_max; + + RTE_STD_C11 + union { + /** Items valid only for non-leaf nodes. */ + struct { + /** Maximum number of children nodes. */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. The + * value of zero is invalid. The value of 1 indicates + * that only priority 0 is supported, which essentially + * means that Strict Priority (SP) algorithm is not + * supported. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size + * of the WFQ sibling node group. The value of zero + * is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that have + * two or more members. The value of zero states that + * WFQ algorithm is not supported. The value of 1 + * indicates that (*sched_sp_n_priorities_max* - 1) + * priority levels have at most one child node, so there + * can be only one priority level with two or more + * sibling nodes making up a WFQ group. The maximum + * value is: min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes with same priority have the same + * WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + } nonleaf; + + /** Items valid only for leaf nodes. */ + struct { + /** Head drop algorithm support for current node. */ + int cman_head_drop_supported; + + /** Private WRED context support for current node. */ + int cman_wred_context_private_supported; + + /** Maximum number of shared WRED contexts the current + * node can be part of. The value of zero indicates that + * shared WRED contexts are not supported by the current + * node. + */ + uint32_t cman_wred_context_shared_n_max; + } leaf; + }; + + /** Mask of statistics counter types supported by the current node. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Congestion management (CMAN) mode + * + * This is used for controlling the admission of packets into a packet queue or + * group of packet queues on congestion. On request of writing a new packet + * into the current queue while the queue is full, the *tail drop* algorithm + * drops the new packet while leaving the queue unmodified, as opposed to *head + * drop* algorithm, which drops the packet at the head of the queue (the oldest + * packet waiting in the queue) and admits the new packet at the tail of the + * queue. + * + * The *Random Early Detection (RED)* algorithm works by proactively dropping + * more and more input packets as the queue occupancy builds up. When the queue + * is full or almost full, RED effectively works as *tail drop*. The *Weighted + * RED* algorithm uses a separate set of RED thresholds for each packet color. + */ +enum rte_tm_cman_mode { + RTE_TM_CMAN_TAIL_DROP = 0, /**< Tail drop */ + RTE_TM_CMAN_HEAD_DROP, /**< Head drop */ + RTE_TM_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */ +}; + +/** + * Random Early Detection (RED) profile + */ +struct rte_tm_red_params { + /** Minimum queue threshold */ + uint16_t min_th; + + /** Maximum queue threshold */ + uint16_t max_th; + + /** Inverse of packet marking probability maximum value (maxp), i.e. + * maxp_inv = 1 / maxp + */ + uint16_t maxp_inv; + + /** Negated log2 of queue weight (wq), i.e. wq = 1 / (2 ^ wq_log2) */ + uint16_t wq_log2; +}; + +/** + * Weighted RED (WRED) profile + * + * Multiple WRED contexts can share the same WRED profile. Each leaf node with + * WRED enabled as its congestion management mode has zero or one private WRED + * context (only one leaf node using it) and/or zero, one or several shared + * WRED contexts (multiple leaf nodes use the same WRED context). A private + * WRED context is used to perform congestion management for a single leaf + * node, while a shared WRED context is used to perform congestion management + * for a group of leaf nodes. + */ +struct rte_tm_wred_params { + /** One set of RED parameters per packet color */ + struct rte_tm_red_params red_params[RTE_TM_COLORS]; +}; + +/** + * Token bucket + */ +struct rte_tm_token_bucket { + /** Token bucket rate (bytes per second) */ + uint64_t rate; + + /** Token bucket size (bytes), a.k.a. max burst size */ + uint64_t size; +}; + +/** + * Shaper (rate limiter) profile + * + * Multiple shaper instances can share the same shaper profile. Each node has + * zero or one private shaper (only one node using it) and/or zero, one or + * several shared shapers (multiple nodes use the same shaper instance). + * A private shaper is used to perform traffic shaping for a single node, while + * a shared shaper is used to perform traffic shaping for a group of nodes. + * + * Single rate shapers use a single token bucket. A single rate shaper can be + * configured by setting the rate of the committed bucket to zero, which + * effectively disables this bucket. The peak bucket is used to limit the rate + * and the burst size for the current shaper. + * + * Dual rate shapers use both the committed and the peak token buckets. The + * rate of the peak bucket has to be bigger than zero, as well as greater than + * or equal to the rate of the committed bucket. + */ +struct rte_tm_shaper_params { + /** Committed token bucket */ + struct rte_tm_token_bucket committed; + + /** Peak token bucket */ + struct rte_tm_token_bucket peak; + + /** Signed value to be added to the length of each packet for the + * purpose of shaping. Can be used to correct the packet length with + * the framing overhead bytes that are also consumed on the wire (e.g. + * RTE_TM_ETH_FRAMING_OVERHEAD_FCS). + */ + int32_t pkt_length_adjust; +}; + +/** + * Node parameters + * + * Each non-leaf node has multiple inputs (its children nodes) and single output + * (which is input to its parent node). It arbitrates its inputs using Strict + * Priority (SP) and Weighted Fair Queuing (WFQ) algorithms to schedule input + * packets to its output while observing its shaping (rate limiting) + * constraints. + * + * Algorithms such as Weighted Round Robin (WRR), Byte-level WRR, Deficit WRR + * (DWRR), etc. are considered approximations of the WFQ ideal and are + * assimilated to WFQ, although an associated implementation-dependent trade-off + * on accuracy, performance and resource usage might exist. + * + * Children nodes with different priorities are scheduled using the SP algorithm + * based on their priority, with zero (0) as the highest priority. Children with + * the same priority are scheduled using the WFQ algorithm according to their + * weights. The WFQ weight of a given child node is relative to the sum of the + * weights of all its sibling nodes that have the same priority, with one (1) as + * the lowest weight. For each SP priority, the WFQ weight mode can be set as + * either byte-based or packet-based. + * + * Each leaf node sits on top of a TX queue of the current Ethernet port. Hence, + * the leaf nodes are predefined, with their node IDs set to 0 .. (N-1), where N + * is the number of TX queues configured for the current Ethernet port. The + * non-leaf nodes have their IDs generated by the application. + */ +struct rte_tm_node_params { + /** Shaper profile for the private shaper. The absence of the private + * shaper for the current node is indicated by setting this parameter + * to RTE_TM_SHAPER_PROFILE_ID_NONE. + */ + uint32_t shaper_profile_id; + + /** User allocated array of valid shared shaper IDs. */ + uint32_t *shared_shaper_id; + + /** Number of shared shaper IDs in the *shared_shaper_id* array. */ + uint32_t n_shared_shapers; + + RTE_STD_C11 + union { + /** Parameters only valid for non-leaf nodes. */ + struct { + /** WFQ weight mode for each SP priority. When NULL, it + * indicates that WFQ is to be used for all priorities. + * When non-NULL, it points to a pre-allocated array of + * *n_sp_priorities* values, with non-zero value for + * byte-mode and zero for packet-mode. + */ + int *wfq_weight_mode; + + /** Number of SP priorities. */ + uint32_t n_sp_priorities; + } nonleaf; + + /** Parameters only valid for leaf nodes. */ + struct { + /** Congestion management mode */ + enum rte_tm_cman_mode cman; + + /** WRED parameters (only valid when *cman* is set to + * WRED). + */ + struct { + /** WRED profile for private WRED context. The + * absence of a private WRED context for the + * current leaf node is indicated by value + * RTE_TM_WRED_PROFILE_ID_NONE. + */ + uint32_t wred_profile_id; + + /** User allocated array of shared WRED context + * IDs. When set to NULL, it indicates that the + * current leaf node should not currently be + * part of any shared WRED contexts. + */ + uint32_t *shared_wred_context_id; + + /** Number of elements in the + * *shared_wred_context_id* array. Only valid + * when *shared_wred_context_id* is non-NULL, + * in which case it should be non-zero. + */ + uint32_t n_shared_wred_contexts; + } wred; + } leaf; + }; + + /** Mask of statistics counter types to be enabled for this node. This + * needs to be a subset of the statistics counter types available for + * the current node. Any statistics counter type not included in this + * set is to be disabled for the current node. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_tm_error::cause. + */ +enum rte_tm_error_type { + RTE_TM_ERROR_TYPE_NONE, /**< No error. */ + RTE_TM_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_TM_ERROR_TYPE_CAPABILITIES, + RTE_TM_ERROR_TYPE_LEVEL_ID, + RTE_TM_ERROR_TYPE_WRED_PROFILE, + RTE_TM_ERROR_TYPE_WRED_PROFILE_GREEN, + RTE_TM_ERROR_TYPE_WRED_PROFILE_YELLOW, + RTE_TM_ERROR_TYPE_WRED_PROFILE_RED, + RTE_TM_ERROR_TYPE_WRED_PROFILE_ID, + RTE_TM_ERROR_TYPE_SHARED_WRED_CONTEXT_ID, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID, + RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID, + RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID, + RTE_TM_ERROR_TYPE_NODE_PRIORITY, + RTE_TM_ERROR_TYPE_NODE_WEIGHT, + RTE_TM_ERROR_TYPE_NODE_PARAMS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES, + RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN, + RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS, + RTE_TM_ERROR_TYPE_NODE_ID, +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_tm_error { + enum rte_tm_error_type type; /**< Cause field and error type. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Traffic manager get number of leaf nodes + * + * Each leaf node sits on on top of a TX queue of the current Ethernet port. + * Therefore, the set of leaf nodes is predefined, their number is always equal + * to N (where N is the number of TX queues configured for the current port) + * and their IDs are 0 .. (N-1). + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] n_leaf_nodes + * Number of leaf nodes for the current port. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_get_number_of_leaf_nodes(uint8_t port_id, + uint32_t *n_leaf_nodes, + struct rte_tm_error *error); + +/** + * Traffic manager node ID validate and type (i.e. leaf or non-leaf) get + * + * The leaf nodes have predefined IDs in the range of 0 .. (N-1), where N is + * the number of TX queues of the current Ethernet port. The non-leaf nodes + * have their IDs generated by the application outside of the above range, + * which is reserved for leaf nodes. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID value. Needs to be valid. + * @param[out] is_leaf + * Set to non-zero value when node is leaf and to zero otherwise (non-leaf). + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_node_type_get(uint8_t port_id, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error); + +/** + * Traffic manager capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] cap + * Traffic manager capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_capabilities_get(uint8_t port_id, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager level capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] level_id + * The hierarchy level identifier. The value of 0 identifies the level of the + * root node. + * @param[out] cap + * Traffic manager level capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_level_capabilities_get(uint8_t port_id, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager node capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] cap + * Traffic manager node capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_node_capabilities_get(uint8_t port_id, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager WRED profile add + * + * Create a new WRED profile with ID set to *wred_profile_id*. The new profile + * is used to create one or several WRED contexts. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] wred_profile_id + * WRED profile ID for the new profile. Needs to be unused. + * @param[in] profile + * WRED profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_n_max + */ +int +rte_tm_wred_profile_add(uint8_t port_id, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error); + +/** + * Traffic manager WRED profile delete + * + * Delete an existing WRED profile. This operation fails when there is + * currently at least one user (i.e. WRED context) of this WRED profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] wred_profile_id + * WRED profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_n_max + */ +int +rte_tm_wred_profile_delete(uint8_t port_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared WRED context add or update + * + * When *shared_wred_context_id* is invalid, a new WRED context with this ID is + * created by using the WRED profile identified by *wred_profile_id*. + * + * When *shared_wred_context_id* is valid, this WRED context is no longer using + * the profile previously assigned to it and is updated to use the profile + * identified by *wred_profile_id*. + * + * A valid shared WRED context can be assigned to several hierarchy leaf nodes + * configured to use WRED as the congestion management mode. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_wred_context_id + * Shared WRED context ID + * @param[in] wred_profile_id + * WRED profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_shared_wred_context_add_update(uint8_t port_id, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared WRED context delete + * + * Delete an existing shared WRED context. This operation fails when there is + * currently at least one user (i.e. hierarchy leaf node) of this shared WRED + * context. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_wred_context_id + * Shared WRED context ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_shared_wred_context_delete(uint8_t port_id, + uint32_t shared_wred_context_id, + struct rte_tm_error *error); + +/** + * Traffic manager shaper profile add + * + * Create a new shaper profile with ID set to *shaper_profile_id*. The new + * shaper profile is used to create one or several shapers. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shaper_profile_id + * Shaper profile ID for the new profile. Needs to be unused. + * @param[in] profile + * Shaper profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_n_max + */ +int +rte_tm_shaper_profile_add(uint8_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error); + +/** + * Traffic manager shaper profile delete + * + * Delete an existing shaper profile. This operation fails when there is + * currently at least one user (i.e. shaper) of this shaper profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shaper_profile_id + * Shaper profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_n_max + */ +int +rte_tm_shaper_profile_delete(uint8_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared shaper add or update + * + * When *shared_shaper_id* is not a valid shared shaper ID, a new shared shaper + * with this ID is created using the shaper profile identified by + * *shaper_profile_id*. + * + * When *shared_shaper_id* is a valid shared shaper ID, this shared shaper is + * no longer using the shaper profile previously assigned to it and is updated + * to use the shaper profile identified by *shaper_profile_id*. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_shaper_id + * Shared shaper ID + * @param[in] shaper_profile_id + * Shaper profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_shared_shaper_add_update(uint8_t port_id, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared shaper delete + * + * Delete an existing shared shaper. This operation fails when there is + * currently at least one user (i.e. hierarchy node) of this shared shaper. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_shaper_id + * Shared shaper ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_shared_shaper_delete(uint8_t port_id, + uint32_t shared_shaper_id, + struct rte_tm_error *error); + +/** + * Traffic manager node add + * + * Create new node and connect it as child of an existing node. The new node is + * further identified by *node_id*, which needs to be unused by any of the + * existing nodes. The parent node is identified by *parent_node_id*, which + * needs to be the valid ID of an existing non-leaf node. The parent node is + * going to use the provided SP *priority* and WFQ *weight* to schedule its new + * child node. + * + * This function has to be called for both leaf and non-leaf nodes. In the case + * of leaf nodes (i.e. *node_id* is within the range of 0 .. (N-1), with N as + * the number of configured TX queues of the current port), the leaf node is + * configured rather than created (as the set of leaf nodes is predefined) and + * it is also connected as child of an existing node. + * + * The first node that is added becomes the root node and all the nodes that + * are subsequently added have to be added as descendants of the root node. The + * parent of the root node has to be specified as RTE_TM_NODE_ID_NULL and there + * can only be one node with this parent ID (i.e. the root node). Further + * restrictions for root node: needs to be non-leaf, its private shaper profile + * needs to be valid and single rate, cannot use any shared shapers. + * + * When called before rte_tm_hierarchy_commit() invocation, this function is + * typically used to define the initial start-up hierarchy for the port. + * Provided that dynamic hierarchy updates are supported by the current port (as + * advertised in the port capability set), this function can be also called + * after the rte_tm_hierarchy_commit() invocation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be unused by any of the existing nodes. + * @param[in] parent_node_id + * Parent node ID. Needs to be the valid. + * @param[in] priority + * Node priority. The highest node priority is zero. Used by the SP algorithm + * running on the parent of the current node for scheduling this child node. + * @param[in] weight + * Node weight. The node weight is relative to the weight sum of all siblings + * that have the same priority. The lowest weight is one. Used by the WFQ + * algorithm running on the parent of the current node for scheduling this + * child node. + * @param[in] level_id + * Level ID that should be met by this node. The hierarchy level of the + * current node is already fully specified through its parent node (i.e. the + * level of this node is equal to the level of its parent node plus one), + * therefore the reason for providing this parameter is to enable the + * application to perform step-by-step checking of the node level during + * successive invocations of this function. When not desired, this check can + * be disabled by assigning value RTE_TM_NODE_LEVEL_ID_ANY to this parameter. + * @param[in] params + * Node parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_hierarchy_commit() + * @see RTE_TM_UPDATE_NODE_ADD_DELETE + * @see RTE_TM_NODE_LEVEL_ID_ANY + * @see struct rte_tm_capabilities + */ +int +rte_tm_node_add(uint8_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error); + +/** + * Traffic manager node delete + * + * Delete an existing node. This operation fails when this node currently has + * at least one user (i.e. child node). + * + * When called before rte_tm_hierarchy_commit() invocation, this function is + * typically used to define the initial start-up hierarchy for the port. + * Provided that dynamic hierarchy updates are supported by the current port (as + * advertised in the port capability set), this function can be also called + * after the rte_tm_hierarchy_commit() invocation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_ADD_DELETE + */ +int +rte_tm_node_delete(uint8_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager node suspend + * + * Suspend an existing node. While the node is in suspended state, no packet is + * scheduled from this node and its descendants. The node exits the suspended + * state through the node resume operation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_resume() + * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME + */ +int +rte_tm_node_suspend(uint8_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager node resume + * + * Resume an existing node that is currently in suspended state. The node + * entered the suspended state as result of a previous node suspend operation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_suspend() + * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME + */ +int +rte_tm_node_resume(uint8_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager hierarchy commit + * + * This function is called during the port initialization phase (before the + * Ethernet port is started) to freeze the start-up hierarchy. + * + * This function typically performs the following steps: + * a) It validates the start-up hierarchy that was previously defined for the + * current port through successive rte_tm_node_add() invocations; + * b) Assuming successful validation, it performs all the necessary port + * specific configuration operations to install the specified hierarchy on + * the current port, with immediate effect once the port is started. + * + * This function fails when the currently configured hierarchy is not supported + * by the Ethernet port, in which case the user can abort or try out another + * hierarchy configuration (e.g. a hierarchy with less leaf nodes), which can be + * build from scratch (when *clear_on_fail* is enabled) or by modifying the + * existing hierarchy configuration (when *clear_on_fail* is disabled). + * + * Note that this function can still fail due to other causes (e.g. not enough + * memory available in the system, etc), even though the specified hierarchy is + * supported in principle by the current port. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] clear_on_fail + * On function call failure, hierarchy is cleared when this parameter is + * non-zero and preserved when this parameter is equal to zero. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_add() + * @see rte_tm_node_delete() + */ +int +rte_tm_hierarchy_commit(uint8_t port_id, + int clear_on_fail, + struct rte_tm_error *error); + +/** + * Traffic manager node parent update + * + * Restriction for root node: its parent cannot be changed. + * + * This function can only be called after the rte_tm_hierarchy_commit() + * invocation. Its success depends on the port support for this operation, as + * advertised through the port capability set. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] parent_node_id + * Node ID for the new parent. Needs to be valid. + * @param[in] priority + * Node priority. The highest node priority is zero. Used by the SP algorithm + * running on the parent of the current node for scheduling this child node. + * @param[in] weight + * Node weight. The node weight is relative to the weight sum of all siblings + * that have the same priority. The lowest weight is zero. Used by the WFQ + * algorithm running on the parent of the current node for scheduling this + * child node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL + * @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL + */ +int +rte_tm_node_parent_update(uint8_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error); + +/** + * Traffic manager node private shaper update + * + * Restriction for the root node: its private shaper profile needs to be valid + * and single rate. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] shaper_profile_id + * Shaper profile ID for the private shaper of the current node. Needs to be + * either valid shaper profile ID or RTE_TM_SHAPER_PROFILE_ID_NONE, with + * the latter disabling the private shaper of the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_private_n_max + */ +int +rte_tm_node_shaper_update(uint8_t port_id, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager node shared shapers update + * + * Restriction for root node: cannot use any shared rate shapers. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] shared_shaper_id + * Shared shaper ID. Needs to be valid. + * @param[in] add + * Set to non-zero value to add this shared shaper to current node or to zero + * to delete this shared shaper from current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_node_shared_shaper_update(uint8_t port_id, + uint32_t node_id, + uint32_t shared_shaper_id, + int add, + struct rte_tm_error *error); + +/** + * Traffic manager node enabled statistics counters update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] stats_mask + * Mask of statistics counter types to be enabled for the current node. This + * needs to be a subset of the statistics counter types available for the + * current node. Any statistics counter type not included in this set is to + * be disabled for the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_tm_stats_type + * @see RTE_TM_UPDATE_NODE_STATS + */ +int +rte_tm_node_stats_update(uint8_t port_id, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error); + +/** + * Traffic manager node WFQ weight mode update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] wfq_weight_mode + * WFQ weight mode for each SP priority. When NULL, it indicates that WFQ is + * to be used for all priorities. When non-NULL, it points to a pre-allocated + * array of *n_sp_priorities* values, with non-zero value for byte-mode and + * zero for packet-mode. + * @param[in] n_sp_priorities + * Number of SP priorities. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE + * @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES + */ +int +rte_tm_node_wfq_weight_mode_update(uint8_t port_id, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error); + +/** + * Traffic manager node congestion management mode update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] cman + * Congestion management mode. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_CMAN + */ +int +rte_tm_node_cman_update(uint8_t port_id, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error); + +/** + * Traffic manager node private WRED context update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] wred_profile_id + * WRED profile ID for the private WRED context of the current node. Needs to + * be either valid WRED profile ID or RTE_TM_WRED_PROFILE_ID_NONE, with the + * latter disabling the private WRED context of the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_private_n_max +*/ +int +rte_tm_node_wred_context_update(uint8_t port_id, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager node shared WRED context update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] shared_wred_context_id + * Shared WRED context ID. Needs to be valid. + * @param[in] add + * Set to non-zero value to add this shared WRED context to current node or + * to zero to delete this shared WRED context from current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_node_shared_wred_context_update(uint8_t port_id, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error); + +/** + * Traffic manager node statistics counters read + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] stats + * When non-NULL, it contains the current value for the statistics counters + * enabled for the current node. + * @param[out] stats_mask + * When non-NULL, it contains the mask of statistics counter types that are + * currently enabled for this node, indicating which of the counters + * retrieved with the *stats* structure are valid. + * @param[in] clear + * When this parameter has a non-zero value, the statistics counters are + * cleared (i.e. set to zero) immediately after they have been read, + * otherwise the statistics counters are left untouched. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_tm_stats_type + */ +int +rte_tm_node_stats_read(uint8_t port_id, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - VLAN DEI (IEEE 802.1Q) + * + * IEEE 802.1p maps the traffic class to the VLAN Priority Code Point (PCP) + * field (3 bits), while IEEE 802.1q maps the drop priority to the VLAN Drop + * Eligible Indicator (DEI) field (1 bit), which was previously named Canonical + * Format Indicator (CFI). + * + * All VLAN frames of a given color get their DEI bit set if marking is enabled + * for this color; otherwise, their DEI bit is left as is (either set or not). + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_vlan_dei_supported + */ +int +rte_tm_mark_vlan_dei(uint8_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - IPv4 / IPv6 ECN (IETF RFC 3168) + * + * IETF RFCs 2474 and 3168 reorganize the IPv4 Type of Service (TOS) field + * (8 bits) and the IPv6 Traffic Class (TC) field (8 bits) into Differentiated + * Services Codepoint (DSCP) field (6 bits) and Explicit Congestion + * Notification (ECN) field (2 bits). The DSCP field is typically used to + * encode the traffic class and/or drop priority (RFC 2597), while the ECN + * field is used by RFC 3168 to implement a congestion notification mechanism + * to be leveraged by transport layer protocols such as TCP and SCTP that have + * congestion control mechanisms. + * + * When congestion is experienced, as alternative to dropping the packet, + * routers can change the ECN field of input packets from 2'b01 or 2'b10 + * (values indicating that source endpoint is ECN-capable) to 2'b11 (meaning + * that congestion is experienced). The destination endpoint can use the + * ECN-Echo (ECE) TCP flag to relay the congestion indication back to the + * source endpoint, which acknowledges it back to the destination endpoint with + * the Congestion Window Reduced (CWR) TCP flag. + * + * All IPv4/IPv6 packets of a given color with ECN set to 2’b01 or 2’b10 + * carrying TCP or SCTP have their ECN set to 2’b11 if the marking feature is + * enabled for the current color, otherwise the ECN field is left as is. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_ip_ecn_tcp_supported + * @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported + */ +int +rte_tm_mark_ip_ecn(uint8_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - IPv4 / IPv6 DSCP (IETF RFC 2597) + * + * IETF RFC 2597 maps the traffic class and the drop priority to the IPv4/IPv6 + * Differentiated Services Codepoint (DSCP) field (6 bits). Here are the DSCP + * values proposed by this RFC: + * + * <pre> Class 1 Class 2 Class 3 Class 4 </pre> + * <pre> +----------+----------+----------+----------+</pre> + * <pre>Low Drop Prec | 001010 | 010010 | 011010 | 100010 |</pre> + * <pre>Medium Drop Prec | 001100 | 010100 | 011100 | 100100 |</pre> + * <pre>High Drop Prec | 001110 | 010110 | 011110 | 100110 |</pre> + * <pre> +----------+----------+----------+----------+</pre> + * + * There are 4 traffic classes (classes 1 .. 4) encoded by DSCP bits 1 and 2, + * as well as 3 drop priorities (low/medium/high) encoded by DSCP bits 3 and 4. + * + * All IPv4/IPv6 packets have their color marked into DSCP bits 3 and 4 as + * follows: green mapped to Low Drop Precedence (2’b01), yellow to Medium + * (2’b10) and red to High (2’b11). Marking needs to be explicitly enabled + * for each color; when not enabled for a given color, the DSCP field of all + * packets with that color is left as is. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_ip_dscp_supported + */ +int +rte_tm_mark_ip_dscp(uint8_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TM_H__ */ diff --git a/lib/librte_ether/rte_tm_driver.h b/lib/librte_ether/rte_tm_driver.h new file mode 100644 index 00000000..a5b698fe --- /dev/null +++ b/lib/librte_ether/rte_tm_driver.h @@ -0,0 +1,366 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TM_DRIVER_H__ +#define __INCLUDE_RTE_TM_DRIVER_H__ + +/** + * @file + * RTE Generic Traffic Manager API (Driver Side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_tm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @internal Traffic manager node ID validate and type get */ +typedef int (*rte_tm_node_type_get_t)(struct rte_eth_dev *dev, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error); + +/** @internal Traffic manager capabilities get */ +typedef int (*rte_tm_capabilities_get_t)(struct rte_eth_dev *dev, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager level capabilities get */ +typedef int (*rte_tm_level_capabilities_get_t)(struct rte_eth_dev *dev, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager node capabilities get */ +typedef int (*rte_tm_node_capabilities_get_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager WRED profile add */ +typedef int (*rte_tm_wred_profile_add_t)(struct rte_eth_dev *dev, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error); + +/** @internal Traffic manager WRED profile delete */ +typedef int (*rte_tm_wred_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared WRED context add */ +typedef int (*rte_tm_shared_wred_context_add_update_t)( + struct rte_eth_dev *dev, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared WRED context delete */ +typedef int (*rte_tm_shared_wred_context_delete_t)( + struct rte_eth_dev *dev, + uint32_t shared_wred_context_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shaper profile add */ +typedef int (*rte_tm_shaper_profile_add_t)(struct rte_eth_dev *dev, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error); + +/** @internal Traffic manager shaper profile delete */ +typedef int (*rte_tm_shaper_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared shaper add/update */ +typedef int (*rte_tm_shared_shaper_add_update_t)(struct rte_eth_dev *dev, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared shaper delete */ +typedef int (*rte_tm_shared_shaper_delete_t)(struct rte_eth_dev *dev, + uint32_t shared_shaper_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node add */ +typedef int (*rte_tm_node_add_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error); + +/** @internal Traffic manager node delete */ +typedef int (*rte_tm_node_delete_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node suspend */ +typedef int (*rte_tm_node_suspend_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node resume */ +typedef int (*rte_tm_node_resume_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager hierarchy commit */ +typedef int (*rte_tm_hierarchy_commit_t)(struct rte_eth_dev *dev, + int clear_on_fail, + struct rte_tm_error *error); + +/** @internal Traffic manager node parent update */ +typedef int (*rte_tm_node_parent_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error); + +/** @internal Traffic manager node shaper update */ +typedef int (*rte_tm_node_shaper_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node shaper update */ +typedef int (*rte_tm_node_shared_shaper_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shared_shaper_id, + int32_t add, + struct rte_tm_error *error); + +/** @internal Traffic manager node stats update */ +typedef int (*rte_tm_node_stats_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error); + +/** @internal Traffic manager node WFQ weight mode update */ +typedef int (*rte_tm_node_wfq_weight_mode_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + int *wfq_weigth_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error); + +/** @internal Traffic manager node congestion management mode update */ +typedef int (*rte_tm_node_cman_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error); + +/** @internal Traffic manager node WRED context update */ +typedef int (*rte_tm_node_wred_context_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node WRED context update */ +typedef int (*rte_tm_node_shared_wred_context_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error); + +/** @internal Traffic manager read stats counters for specific node */ +typedef int (*rte_tm_node_stats_read_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - VLAN DEI */ +typedef int (*rte_tm_mark_vlan_dei_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - IPv4/IPv6 ECN */ +typedef int (*rte_tm_mark_ip_ecn_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - IPv4/IPv6 DSCP */ +typedef int (*rte_tm_mark_ip_dscp_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +struct rte_tm_ops { + /** Traffic manager node type get */ + rte_tm_node_type_get_t node_type_get; + + /** Traffic manager capabilities_get */ + rte_tm_capabilities_get_t capabilities_get; + /** Traffic manager level capabilities_get */ + rte_tm_level_capabilities_get_t level_capabilities_get; + /** Traffic manager node capabilities get */ + rte_tm_node_capabilities_get_t node_capabilities_get; + + /** Traffic manager WRED profile add */ + rte_tm_wred_profile_add_t wred_profile_add; + /** Traffic manager WRED profile delete */ + rte_tm_wred_profile_delete_t wred_profile_delete; + /** Traffic manager shared WRED context add/update */ + rte_tm_shared_wred_context_add_update_t + shared_wred_context_add_update; + /** Traffic manager shared WRED context delete */ + rte_tm_shared_wred_context_delete_t + shared_wred_context_delete; + + /** Traffic manager shaper profile add */ + rte_tm_shaper_profile_add_t shaper_profile_add; + /** Traffic manager shaper profile delete */ + rte_tm_shaper_profile_delete_t shaper_profile_delete; + /** Traffic manager shared shaper add/update */ + rte_tm_shared_shaper_add_update_t shared_shaper_add_update; + /** Traffic manager shared shaper delete */ + rte_tm_shared_shaper_delete_t shared_shaper_delete; + + /** Traffic manager node add */ + rte_tm_node_add_t node_add; + /** Traffic manager node delete */ + rte_tm_node_delete_t node_delete; + /** Traffic manager node suspend */ + rte_tm_node_suspend_t node_suspend; + /** Traffic manager node resume */ + rte_tm_node_resume_t node_resume; + /** Traffic manager hierarchy commit */ + rte_tm_hierarchy_commit_t hierarchy_commit; + + /** Traffic manager node parent update */ + rte_tm_node_parent_update_t node_parent_update; + /** Traffic manager node shaper update */ + rte_tm_node_shaper_update_t node_shaper_update; + /** Traffic manager node shared shaper update */ + rte_tm_node_shared_shaper_update_t node_shared_shaper_update; + /** Traffic manager node stats update */ + rte_tm_node_stats_update_t node_stats_update; + /** Traffic manager node WFQ weight mode update */ + rte_tm_node_wfq_weight_mode_update_t node_wfq_weight_mode_update; + /** Traffic manager node congestion management mode update */ + rte_tm_node_cman_update_t node_cman_update; + /** Traffic manager node WRED context update */ + rte_tm_node_wred_context_update_t node_wred_context_update; + /** Traffic manager node shared WRED context update */ + rte_tm_node_shared_wred_context_update_t + node_shared_wred_context_update; + /** Traffic manager read statistics counters for current node */ + rte_tm_node_stats_read_t node_stats_read; + + /** Traffic manager packet marking - VLAN DEI */ + rte_tm_mark_vlan_dei_t mark_vlan_dei; + /** Traffic manager packet marking - IPv4/IPv6 ECN */ + rte_tm_mark_ip_ecn_t mark_ip_ecn; + /** Traffic manager packet marking - IPv4/IPv6 DSCP */ + rte_tm_mark_ip_dscp_t mark_ip_dscp; +}; + +/** + * Initialize generic error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to error structure (may be NULL). + * @param[in] code + * Related error code (rte_errno). + * @param[in] type + * Cause field and error type. + * @param[in] cause + * Object responsible for the error. + * @param[in] message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_tm_error_set(struct rte_tm_error *error, + int code, + enum rte_tm_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_tm_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic traffic manager operations structure from a port + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] error + * Error details + * + * @return + * The traffic manager operations structure associated with port_id on + * success, NULL otherwise. + */ +const struct rte_tm_ops * +rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TM_DRIVER_H__ */ diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile index e06346a6..410578a1 100644 --- a/lib/librte_eventdev/Makefile +++ b/lib/librte_eventdev/Makefile @@ -1,6 +1,6 @@ # BSD LICENSE # -# Copyright(c) 2016 Cavium networks. All rights reserved. +# Copyright(c) 2016 Cavium, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -12,7 +12,7 @@ # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. -# * Neither the name of Cavium networks nor the names of its +# * Neither the name of Cavium, Inc nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # @@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_eventdev.a # library version -LIBABIVER := 1 +LIBABIVER := 2 # build flags CFLAGS += -O3 @@ -42,10 +42,14 @@ CFLAGS += $(WERROR_FLAGS) # library source files SRCS-y += rte_eventdev.c +SRCS-y += rte_event_ring.c # export include files SYMLINK-y-include += rte_eventdev.h SYMLINK-y-include += rte_eventdev_pmd.h +SYMLINK-y-include += rte_eventdev_pmd_pci.h +SYMLINK-y-include += rte_eventdev_pmd_vdev.h +SYMLINK-y-include += rte_event_ring.h # versioning export map EXPORT_MAP := rte_eventdev_version.map diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c new file mode 100644 index 00000000..b14c2127 --- /dev/null +++ b/lib/librte_eventdev/rte_event_ring.c @@ -0,0 +1,207 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/queue.h> +#include <string.h> + +#include <rte_tailq.h> +#include <rte_memzone.h> +#include <rte_rwlock.h> +#include <rte_eal_memconfig.h> +#include "rte_event_ring.h" + +TAILQ_HEAD(rte_event_ring_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_event_ring_tailq = { + .name = RTE_TAILQ_EVENT_RING_NAME, +}; +EAL_REGISTER_TAILQ(rte_event_ring_tailq) + +int +rte_event_ring_init(struct rte_event_ring *r, const char *name, + unsigned int count, unsigned int flags) +{ + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(struct rte_event_ring) & + RTE_CACHE_LINE_MASK) != 0); + + /* init the ring structure */ + return rte_ring_init(&r->r, name, count, flags); +} + +/* create the ring */ +struct rte_event_ring * +rte_event_ring_create(const char *name, unsigned int count, int socket_id, + unsigned int flags) +{ + char mz_name[RTE_MEMZONE_NAMESIZE]; + struct rte_event_ring *r; + struct rte_tailq_entry *te; + const struct rte_memzone *mz; + ssize_t ring_size; + int mz_flags = 0; + struct rte_event_ring_list *ring_list = NULL; + const unsigned int requested_count = count; + int ret; + + ring_list = RTE_TAILQ_CAST(rte_event_ring_tailq.head, + rte_event_ring_list); + + /* for an exact size ring, round up from count to a power of two */ + if (flags & RING_F_EXACT_SZ) + count = rte_align32pow2(count + 1); + else if (!rte_is_power_of_2(count)) { + rte_errno = EINVAL; + return NULL; + } + + ring_size = sizeof(*r) + (count * sizeof(struct rte_event)); + + ret = snprintf(mz_name, sizeof(mz_name), "%s%s", + RTE_RING_MZ_PREFIX, name); + if (ret < 0 || ret >= (int)sizeof(mz_name)) { + rte_errno = ENAMETOOLONG; + return NULL; + } + + te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n"); + rte_errno = ENOMEM; + return NULL; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* + * reserve a memory zone for this ring. If we can't get rte_config or + * we are secondary process, the memzone_reserve function will set + * rte_errno for us appropriately - hence no check in this this function + */ + mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags); + if (mz != NULL) { + r = mz->addr; + /* + * no need to check return value here, we already checked the + * arguments above + */ + rte_event_ring_init(r, name, requested_count, flags); + + te->data = (void *) r; + r->r.memzone = mz; + + TAILQ_INSERT_TAIL(ring_list, te, next); + } else { + r = NULL; + RTE_LOG(ERR, RING, "Cannot reserve memory\n"); + rte_free(te); + } + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return r; +} + + +struct rte_event_ring * +rte_event_ring_lookup(const char *name) +{ + struct rte_tailq_entry *te; + struct rte_event_ring *r = NULL; + struct rte_event_ring_list *ring_list; + + ring_list = RTE_TAILQ_CAST(rte_event_ring_tailq.head, + rte_event_ring_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, ring_list, next) { + r = (struct rte_event_ring *) te->data; + if (strncmp(name, r->r.name, RTE_RING_NAMESIZE) == 0) + break; + } + + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return r; +} + +/* free the ring */ +void +rte_event_ring_free(struct rte_event_ring *r) +{ + struct rte_event_ring_list *ring_list = NULL; + struct rte_tailq_entry *te; + + if (r == NULL) + return; + + /* + * Ring was not created with rte_event_ring_create, + * therefore, there is no memzone to free. + */ + if (r->r.memzone == NULL) { + RTE_LOG(ERR, RING, + "Cannot free ring (not created with rte_event_ring_create()"); + return; + } + + if (rte_memzone_free(r->r.memzone) != 0) { + RTE_LOG(ERR, RING, "Cannot free memory\n"); + return; + } + + ring_list = RTE_TAILQ_CAST(rte_event_ring_tailq.head, + rte_event_ring_list); + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find out tailq entry */ + TAILQ_FOREACH(te, ring_list, next) { + if (te->data == (void *) r) + break; + } + + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(ring_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(te); +} diff --git a/lib/librte_eventdev/rte_event_ring.h b/lib/librte_eventdev/rte_event_ring.h new file mode 100644 index 00000000..ea9b6885 --- /dev/null +++ b/lib/librte_eventdev/rte_event_ring.h @@ -0,0 +1,308 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * RTE Event Ring + * + * This provides a ring implementation for passing rte_event structures + * from one core to another. + */ + +#ifndef _RTE_EVENT_RING_ +#define _RTE_EVENT_RING_ + +#include <stdint.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_ring.h> +#include "rte_eventdev.h" + +#define RTE_TAILQ_EVENT_RING_NAME "RTE_EVENT_RING" + +/** + * Generic ring structure for passing rte_event objects from core to core. + * + * Based on the primitives given in the rte_ring library. Designed to be + * used inside software eventdev implementations and by applications + * directly as needed. + */ +struct rte_event_ring { + struct rte_ring r; +}; + +/** + * Returns the number of events in the ring + * + * @param r + * pointer to the event ring + * @return + * the number of events in the ring + */ +static __rte_always_inline unsigned int +rte_event_ring_count(const struct rte_event_ring *r) +{ + return rte_ring_count(&r->r); +} + +/** + * Returns the amount of free space in the ring + * + * @param r + * pointer to the event ring + * @return + * the number of free slots in the ring, i.e. the number of events that + * can be successfully enqueued before dequeue must be called + */ +static __rte_always_inline unsigned int +rte_event_ring_free_count(const struct rte_event_ring *r) +{ + return rte_ring_free_count(&r->r); +} + +/** + * Enqueue a set of events onto a ring + * + * Note: this API enqueues by copying the events themselves onto the ring, + * rather than just placing a pointer to each event onto the ring. This + * means that statically-allocated events can safely be enqueued by this + * API. + * + * @param r + * pointer to the event ring + * @param events + * pointer to an array of struct rte_event objects + * @param n + * number of events in the array to enqueue + * @param free_space + * if non-null, is updated to indicate the amount of free space in the + * ring once the enqueue has completed. + * @return + * the number of elements, n', enqueued to the ring, 0 <= n' <= n + */ +static __rte_always_inline unsigned int +rte_event_ring_enqueue_burst(struct rte_event_ring *r, + const struct rte_event *events, + unsigned int n, uint16_t *free_space) +{ + uint32_t prod_head, prod_next; + uint32_t free_entries; + + n = __rte_ring_move_prod_head(&r->r, r->r.prod.single, n, + RTE_RING_QUEUE_VARIABLE, + &prod_head, &prod_next, &free_entries); + if (n == 0) + goto end; + + ENQUEUE_PTRS(&r->r, &r[1], prod_head, events, n, struct rte_event); + rte_smp_wmb(); + + update_tail(&r->r.prod, prod_head, prod_next, 1); +end: + if (free_space != NULL) + *free_space = free_entries - n; + return n; +} + +/** + * Dequeue a set of events from a ring + * + * Note: this API does not work with pointers to events, rather it copies + * the events themselves to the destination ``events`` buffer. + * + * @param r + * pointer to the event ring + * @param events + * pointer to an array to hold the struct rte_event objects + * @param n + * number of events that can be held in the ``events`` array + * @param available + * if non-null, is updated to indicate the number of events remaining in + * the ring once the dequeue has completed + * @return + * the number of elements, n', dequeued from the ring, 0 <= n' <= n + */ +static __rte_always_inline unsigned int +rte_event_ring_dequeue_burst(struct rte_event_ring *r, + struct rte_event *events, + unsigned int n, uint16_t *available) +{ + uint32_t cons_head, cons_next; + uint32_t entries; + + n = __rte_ring_move_cons_head(&r->r, r->r.cons.single, n, + RTE_RING_QUEUE_VARIABLE, + &cons_head, &cons_next, &entries); + if (n == 0) + goto end; + + DEQUEUE_PTRS(&r->r, &r[1], cons_head, events, n, struct rte_event); + rte_smp_rmb(); + + update_tail(&r->r.cons, cons_head, cons_next, 1); + +end: + if (available != NULL) + *available = entries - n; + return n; +} + +/* + * Initializes an already-allocated ring structure + * + * @param r + * pointer to the ring memory to be initialized + * @param name + * name to be given to the ring + * @param count + * the number of elements to be stored in the ring. If the flag + * ``RING_F_EXACT_SZ`` is not set, this must be a power of 2, and the actual + * usable space in the ring will be ``count - 1`` entries. If the flag + * ``RING_F_EXACT_SZ`` is set, the this can be any value up to the ring size + * limit - 1, and the usable space will be exactly that requested. + * @param flags + * An OR of the following: + * - RING_F_SP_ENQ: If this flag is set, the default behavior when + * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()`` + * is "single-producer". Otherwise, it is "multi-producers". + * - RING_F_SC_DEQ: If this flag is set, the default behavior when + * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()`` + * is "single-consumer". Otherwise, it is "multi-consumers". + * - RING_F_EXACT_SZ: If this flag is set, the ``count`` parameter is to + * be taken as the exact usable size of the ring, and as such does not + * need to be a power of 2. The underlying ring memory should be a + * power-of-2 size greater than the count value. + * @return + * 0 on success, or a negative value on error. + */ +int +rte_event_ring_init(struct rte_event_ring *r, const char *name, + unsigned int count, unsigned int flags); + +/* + * Create an event ring structure + * + * This function allocates memory and initializes an event ring inside that + * memory. + * + * @param name + * name to be given to the ring + * @param count + * the number of elements to be stored in the ring. If the flag + * ``RING_F_EXACT_SZ`` is not set, this must be a power of 2, and the actual + * usable space in the ring will be ``count - 1`` entries. If the flag + * ``RING_F_EXACT_SZ`` is set, the this can be any value up to the ring size + * limit - 1, and the usable space will be exactly that requested. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * An OR of the following: + * - RING_F_SP_ENQ: If this flag is set, the default behavior when + * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()`` + * is "single-producer". Otherwise, it is "multi-producers". + * - RING_F_SC_DEQ: If this flag is set, the default behavior when + * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()`` + * is "single-consumer". Otherwise, it is "multi-consumers". + * - RING_F_EXACT_SZ: If this flag is set, the ``count`` parameter is to + * be taken as the exact usable size of the ring, and as such does not + * need to be a power of 2. The underlying ring memory should be a + * power-of-2 size greater than the count value. + * @return + * On success, the pointer to the new allocated ring. NULL on error with + * rte_errno set appropriately. Possible errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - count provided is not a power of 2 + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_event_ring * +rte_event_ring_create(const char *name, unsigned int count, int socket_id, + unsigned int flags); + +/** + * Search for an event ring based on its name + * + * @param name + * The name of the ring. + * @return + * The pointer to the ring matching the name, or NULL if not found, + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_event_ring * +rte_event_ring_lookup(const char *name); + +/** + * De-allocate all memory used by the ring. + * + * @param r + * Ring to free + */ +void +rte_event_ring_free(struct rte_event_ring *r); + +/** + * Return the size of the event ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The size of the data store used by the ring. + * NOTE: this is not the same as the usable space in the ring. To query that + * use ``rte_ring_get_capacity()``. + */ +static inline unsigned int +rte_event_ring_get_size(const struct rte_event_ring *r) +{ + return rte_ring_get_size(&r->r); +} + +/** + * Return the number of elements which can be stored in the event ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The usable size of the ring. + */ +static inline unsigned int +rte_event_ring_get_capacity(const struct rte_event_ring *r) +{ + return rte_ring_get_capacity(&r->r); +} +#endif diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index 20afc3f0..bbb38050 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright(c) 2016 Cavium networks. All rights reserved. + * Copyright(c) 2016 Cavium, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -45,7 +45,6 @@ #include <rte_log.h> #include <rte_debug.h> #include <rte_dev.h> -#include <rte_pci.h> #include <rte_memory.h> #include <rte_memcpy.h> #include <rte_memzone.h> @@ -126,8 +125,6 @@ rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info) dev_info->dequeue_timeout_ns = dev->data->dev_conf.dequeue_timeout_ns; dev_info->dev = dev->dev; - if (dev->driver) - dev_info->driver_name = dev->driver->pci_drv.driver.name; return 0; } @@ -301,7 +298,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) sizeof(dev->data->links_map[0]) * nb_ports * RTE_EVENT_MAX_QUEUES_PER_DEV, RTE_CACHE_LINE_SIZE); - if (dev->data->links_map == NULL) { + if (links_map == NULL) { dev->data->nb_ports = 0; RTE_EDEV_LOG_ERR("failed to realloc mem for port_map," "nb_ports %u", nb_ports); @@ -369,9 +366,10 @@ rte_event_dev_configure(uint8_t dev_id, /* Check dequeue_timeout_ns value is in limit */ if (!(dev_conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)) { - if (dev_conf->dequeue_timeout_ns < info.min_dequeue_timeout_ns + if (dev_conf->dequeue_timeout_ns && + (dev_conf->dequeue_timeout_ns < info.min_dequeue_timeout_ns || dev_conf->dequeue_timeout_ns > - info.max_dequeue_timeout_ns) { + info.max_dequeue_timeout_ns)) { RTE_EDEV_LOG_ERR("dev%d invalid dequeue_timeout_ns=%d" " min_dequeue_timeout_ns=%d max_dequeue_timeout_ns=%d", dev_id, dev_conf->dequeue_timeout_ns, @@ -429,8 +427,9 @@ rte_event_dev_configure(uint8_t dev_id, dev_id); return -EINVAL; } - if (dev_conf->nb_event_port_dequeue_depth > - info.max_event_port_dequeue_depth) { + if ((info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) && + (dev_conf->nb_event_port_dequeue_depth > + info.max_event_port_dequeue_depth)) { RTE_EDEV_LOG_ERR("dev%d nb_dq_depth=%d > max_dq_depth=%d", dev_id, dev_conf->nb_event_port_dequeue_depth, info.max_event_port_dequeue_depth); @@ -443,8 +442,9 @@ rte_event_dev_configure(uint8_t dev_id, dev_id); return -EINVAL; } - if (dev_conf->nb_event_port_enqueue_depth > - info.max_event_port_enqueue_depth) { + if ((info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) && + (dev_conf->nb_event_port_enqueue_depth > + info.max_event_port_enqueue_depth)) { RTE_EDEV_LOG_ERR("dev%d nb_enq_depth=%d > max_enq_depth=%d", dev_id, dev_conf->nb_event_port_enqueue_depth, info.max_event_port_enqueue_depth); @@ -1174,10 +1174,6 @@ rte_event_pmd_release(struct rte_eventdev *eventdev) if (eventdev == NULL) return -EINVAL; - ret = rte_event_dev_close(eventdev->data->dev_id); - if (ret < 0) - return ret; - eventdev->attached = RTE_EVENTDEV_DETACHED; eventdev_globals.nb_devs--; @@ -1202,144 +1198,3 @@ rte_event_pmd_release(struct rte_eventdev *eventdev) eventdev->data = NULL; return 0; } - -struct rte_eventdev * -rte_event_pmd_vdev_init(const char *name, size_t dev_private_size, - int socket_id) -{ - struct rte_eventdev *eventdev; - - /* Allocate device structure */ - eventdev = rte_event_pmd_allocate(name, socket_id); - if (eventdev == NULL) - return NULL; - - /* Allocate private device structure */ - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - eventdev->data->dev_private = - rte_zmalloc_socket("eventdev device private", - dev_private_size, - RTE_CACHE_LINE_SIZE, - socket_id); - - if (eventdev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private device" - " data"); - } - - return eventdev; -} - -int -rte_event_pmd_vdev_uninit(const char *name) -{ - struct rte_eventdev *eventdev; - - if (name == NULL) - return -EINVAL; - - eventdev = rte_event_pmd_get_named_dev(name); - if (eventdev == NULL) - return -ENODEV; - - /* Free the event device */ - rte_event_pmd_release(eventdev); - - return 0; -} - -int -rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) -{ - struct rte_eventdev_driver *eventdrv; - struct rte_eventdev *eventdev; - - char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN]; - - int retval; - - eventdrv = (struct rte_eventdev_driver *)pci_drv; - if (eventdrv == NULL) - return -ENODEV; - - rte_pci_device_name(&pci_dev->addr, eventdev_name, - sizeof(eventdev_name)); - - eventdev = rte_event_pmd_allocate(eventdev_name, - pci_dev->device.numa_node); - if (eventdev == NULL) - return -ENOMEM; - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - eventdev->data->dev_private = - rte_zmalloc_socket( - "eventdev private structure", - eventdrv->dev_private_size, - RTE_CACHE_LINE_SIZE, - rte_socket_id()); - - if (eventdev->data->dev_private == NULL) - rte_panic("Cannot allocate memzone for private " - "device data"); - } - - eventdev->dev = &pci_dev->device; - eventdev->driver = eventdrv; - - /* Invoke PMD device initialization function */ - retval = (*eventdrv->eventdev_init)(eventdev); - if (retval == 0) - return 0; - - RTE_EDEV_LOG_ERR("driver %s: (vendor_id=0x%x device_id=0x%x)" - " failed", pci_drv->driver.name, - (unsigned int) pci_dev->id.vendor_id, - (unsigned int) pci_dev->id.device_id); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(eventdev->data->dev_private); - - eventdev->attached = RTE_EVENTDEV_DETACHED; - eventdev_globals.nb_devs--; - - return -ENXIO; -} - -int -rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev) -{ - const struct rte_eventdev_driver *eventdrv; - struct rte_eventdev *eventdev; - char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN]; - int ret; - - if (pci_dev == NULL) - return -EINVAL; - - rte_pci_device_name(&pci_dev->addr, eventdev_name, - sizeof(eventdev_name)); - - eventdev = rte_event_pmd_get_named_dev(eventdev_name); - if (eventdev == NULL) - return -ENODEV; - - eventdrv = (const struct rte_eventdev_driver *)pci_dev->driver; - if (eventdrv == NULL) - return -ENODEV; - - /* Invoke PMD device un-init function */ - if (*eventdrv->eventdev_uninit) { - ret = (*eventdrv->eventdev_uninit)(eventdev); - if (ret) - return ret; - } - - /* Free event device */ - rte_event_pmd_release(eventdev); - - eventdev->dev = NULL; - eventdev->driver = NULL; - - return 0; -} diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index 20e7293e..128bc522 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright 2016 Cavium. + * Copyright 2016 Cavium, Inc. * Copyright 2016 Intel Corporation. * Copyright 2016 NXP. * @@ -15,7 +15,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -199,20 +199,6 @@ * operation. Instead, Event drivers export Poll-Mode enqueue and dequeue * functions to applications. * - * An event driven based application has following typical workflow on fastpath: - * \code{.c} - * while (1) { - * - * rte_event_schedule(dev_id); - * - * rte_event_dequeue(...); - * - * (event processing) - * - * rte_event_enqueue(...); - * } - * \endcode - * * The events are injected to event device through *enqueue* operation by * event producers in the system. The typical event producers are ethdev * subsystem for generating packet events, CPU(SW) for generating events based @@ -237,6 +223,15 @@ * indicates the device is centralized and thus needs a dedicated scheduling * thread that repeatedly calls rte_event_schedule(). * + * An event driven worker thread has following typical workflow on fastpath: + * \code{.c} + * while (1) { + * rte_event_dequeue_burst(...); + * (event processing) + * rte_event_enqueue_burst(...); + * } + * \endcode + * */ #ifdef __cplusplus @@ -279,6 +274,14 @@ struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ * * @see RTE_EVENT_QUEUE_CFG_* values */ +#define RTE_EVENT_DEV_CAP_BURST_MODE (1ULL << 4) +/**< Event device is capable of operating in burst mode for enqueue(forward, + * release) and dequeue operation. If this capability is not set, application + * still uses the rte_event_dequeue_burst() and rte_event_enqueue_burst() but + * PMD accepts only one event at a time. + * + * @see rte_event_dequeue_burst() rte_event_enqueue_burst() + */ /* Event device priority levels */ #define RTE_EVENT_DEV_PRIORITY_HIGHEST 0 @@ -409,6 +412,7 @@ struct rte_event_dev_config { * This value should be in the range of *min_dequeue_timeout_ns* and * *max_dequeue_timeout_ns* which previously provided in * rte_event_dev_info_get() + * The value 0 is allowed, in which case, default dequeue timeout used. * @see RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT */ int32_t nb_events_limit; @@ -438,14 +442,16 @@ struct rte_event_dev_config { /**< Maximum number of events can be dequeued at a time from an * event port by this device. * This value cannot exceed the *max_event_port_dequeue_depth* - * which previously provided in rte_event_dev_info_get() + * which previously provided in rte_event_dev_info_get(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. * @see rte_event_port_setup() */ uint32_t nb_event_port_enqueue_depth; /**< Maximum number of events can be enqueued at a time from an * event port by this device. * This value cannot exceed the *max_event_port_enqueue_depth* - * which previously provided in rte_event_dev_info_get() + * which previously provided in rte_event_dev_info_get(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. * @see rte_event_port_setup() */ uint32_t event_dev_cfg; @@ -521,9 +527,11 @@ rte_event_dev_configure(uint8_t dev_id, struct rte_event_queue_conf { uint32_t nb_atomic_flows; /**< The maximum number of active flows this queue can track at any - * given time. The value must be in the range of - * [1 - nb_event_queue_flows)] which previously provided in - * rte_event_dev_info_get(). + * given time. If the queue is configured for atomic scheduling (by + * applying the RTE_EVENT_QUEUE_CFG_ALL_TYPES or + * RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY flags to event_queue_cfg), then the + * value must be in the range of [1, nb_event_queue_flows], which was + * previously provided in rte_event_dev_configure(). */ uint32_t nb_atomic_order_sequences; /**< The maximum number of outstanding events waiting to be @@ -533,8 +541,11 @@ struct rte_event_queue_conf { * scheduler cannot schedule the events from this queue and invalid * event will be returned from dequeue until one or more entries are * freed up/released. - * The value must be in the range of [1 - nb_event_queue_flows)] - * which previously supplied to rte_event_dev_configure(). + * If the queue is configured for ordered scheduling (by applying the + * RTE_EVENT_QUEUE_CFG_ALL_TYPES or RTE_EVENT_QUEUE_CFG_ORDERED_ONLY + * flags to event_queue_cfg), then the value must be in the range of + * [1, nb_event_queue_flows], which was previously supplied to + * rte_event_dev_configure(). */ uint32_t event_queue_cfg; /**< Queue cfg flags(EVENT_QUEUE_CFG_) */ uint8_t priority; @@ -642,12 +653,14 @@ struct rte_event_port_conf { uint16_t dequeue_depth; /**< Configure number of bulk dequeues for this event port. * This value cannot exceed the *nb_event_port_dequeue_depth* - * which previously supplied to rte_event_dev_configure() + * which previously supplied to rte_event_dev_configure(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. */ uint16_t enqueue_depth; /**< Configure number of bulk enqueues for this event port. * This value cannot exceed the *nb_event_port_enqueue_depth* - * which previously supplied to rte_event_dev_configure() + * which previously supplied to rte_event_dev_configure(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. */ }; @@ -1052,6 +1065,10 @@ struct rte_eventdev { /**< Pointer to PMD enqueue function. */ event_enqueue_burst_t enqueue_burst; /**< Pointer to PMD enqueue burst function. */ + event_enqueue_burst_t enqueue_new_burst; + /**< Pointer to PMD enqueue burst function(op new variant) */ + event_enqueue_burst_t enqueue_forward_burst; + /**< Pointer to PMD enqueue burst function(op forward variant) */ event_dequeue_t dequeue; /**< Pointer to PMD dequeue function. */ event_dequeue_burst_t dequeue_burst; @@ -1063,8 +1080,6 @@ struct rte_eventdev { /**< Functions exported by PMD */ struct rte_device *dev; /**< Device info. supplied by probing */ - const struct rte_eventdev_driver *driver; - /**< Driver for this device */ RTE_STD_C11 uint8_t attached : 1; @@ -1092,6 +1107,34 @@ rte_event_schedule(uint8_t dev_id) (*dev->schedule)(dev); } +static __rte_always_inline uint16_t +__rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id, + const struct rte_event ev[], uint16_t nb_events, + const event_enqueue_burst_t fn) +{ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + if (dev_id >= RTE_EVENT_MAX_DEVS || !rte_eventdevs[dev_id].attached) { + rte_errno = -EINVAL; + return 0; + } + + if (port_id >= dev->data->nb_ports) { + rte_errno = -EINVAL; + return 0; + } +#endif + /* + * Allow zero cost non burst mode routine invocation if application + * requests nb_events as const one + */ + if (nb_events == 1) + return (*dev->enqueue)(dev->data->ports[port_id], ev); + else + return fn(dev->data->ports[port_id], ev, nb_events); +} + /** * Enqueue a burst of events objects or an event object supplied in *rte_event* * structure on an event device designated by its *dev_id* through the event @@ -1135,30 +1178,108 @@ static inline uint16_t rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id, const struct rte_event ev[], uint16_t nb_events) { - struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; -#ifdef RTE_LIBRTE_EVENTDEV_DEBUG - if (dev_id >= RTE_EVENT_MAX_DEVS || !rte_eventdevs[dev_id].attached) { - rte_errno = -EINVAL; - return 0; - } + return __rte_event_enqueue_burst(dev_id, port_id, ev, nb_events, + dev->enqueue_burst); +} - if (port_id >= dev->data->nb_ports) { - rte_errno = -EINVAL; - return 0; - } -#endif +/** + * Enqueue a burst of events objects of operation type *RTE_EVENT_OP_NEW* on + * an event device designated by its *dev_id* through the event port specified + * by *port_id*. + * + * Provides the same functionality as rte_event_enqueue_burst(), expect that + * application can use this API when the all objects in the burst contains + * the enqueue operation of the type *RTE_EVENT_OP_NEW*. This specialized + * function can provide the additional hint to the PMD and optimize if possible. + * + * The rte_event_enqueue_new_burst() result is undefined if the enqueue burst + * has event object of operation type != RTE_EVENT_OP_NEW. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The identifier of the event port. + * @param ev + * Points to an array of *nb_events* objects of type *rte_event* structure + * which contain the event object enqueue operations to be processed. + * @param nb_events + * The number of event objects to enqueue, typically number of + * rte_event_port_enqueue_depth() available for this port. + * + * @return + * The number of event objects actually enqueued on the event device. The + * return value can be less than the value of the *nb_events* parameter when + * the event devices queue is full or if invalid parameters are specified in a + * *rte_event*. If the return value is less than *nb_events*, the remaining + * events at the end of ev[] are not consumed and the caller has to take care + * of them, and rte_errno is set accordingly. Possible errno values include: + * - -EINVAL The port ID is invalid, device ID is invalid, an event's queue + * ID is invalid, or an event's sched type doesn't match the + * capabilities of the destination queue. + * - -ENOSPC The event port was backpressured and unable to enqueue + * one or more events. This error code is only applicable to + * closed systems. + * @see rte_event_port_enqueue_depth() rte_event_enqueue_burst() + */ +static inline uint16_t +rte_event_enqueue_new_burst(uint8_t dev_id, uint8_t port_id, + const struct rte_event ev[], uint16_t nb_events) +{ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; - /* - * Allow zero cost non burst mode routine invocation if application - * requests nb_events as const one - */ - if (nb_events == 1) - return (*dev->enqueue)( - dev->data->ports[port_id], ev); - else - return (*dev->enqueue_burst)( - dev->data->ports[port_id], ev, nb_events); + return __rte_event_enqueue_burst(dev_id, port_id, ev, nb_events, + dev->enqueue_new_burst); +} + +/** + * Enqueue a burst of events objects of operation type *RTE_EVENT_OP_FORWARD* + * on an event device designated by its *dev_id* through the event port + * specified by *port_id*. + * + * Provides the same functionality as rte_event_enqueue_burst(), expect that + * application can use this API when the all objects in the burst contains + * the enqueue operation of the type *RTE_EVENT_OP_FORWARD*. This specialized + * function can provide the additional hint to the PMD and optimize if possible. + * + * The rte_event_enqueue_new_burst() result is undefined if the enqueue burst + * has event object of operation type != RTE_EVENT_OP_FORWARD. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The identifier of the event port. + * @param ev + * Points to an array of *nb_events* objects of type *rte_event* structure + * which contain the event object enqueue operations to be processed. + * @param nb_events + * The number of event objects to enqueue, typically number of + * rte_event_port_enqueue_depth() available for this port. + * + * @return + * The number of event objects actually enqueued on the event device. The + * return value can be less than the value of the *nb_events* parameter when + * the event devices queue is full or if invalid parameters are specified in a + * *rte_event*. If the return value is less than *nb_events*, the remaining + * events at the end of ev[] are not consumed and the caller has to take care + * of them, and rte_errno is set accordingly. Possible errno values include: + * - -EINVAL The port ID is invalid, device ID is invalid, an event's queue + * ID is invalid, or an event's sched type doesn't match the + * capabilities of the destination queue. + * - -ENOSPC The event port was backpressured and unable to enqueue + * one or more events. This error code is only applicable to + * closed systems. + * @see rte_event_port_enqueue_depth() rte_event_enqueue_burst() + */ +static inline uint16_t +rte_event_enqueue_forward_burst(uint8_t dev_id, uint8_t port_id, + const struct rte_event ev[], uint16_t nb_events) +{ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + return __rte_event_enqueue_burst(dev_id, port_id, ev, nb_events, + dev->enqueue_forward_burst); } /** diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h index 4005b3c9..3d72acf3 100644 --- a/lib/librte_eventdev/rte_eventdev_pmd.h +++ b/lib/librte_eventdev/rte_eventdev_pmd.h @@ -1,6 +1,6 @@ /* * - * Copyright(c) 2016 Cavium networks. All rights reserved. + * Copyright(c) 2016 Cavium, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,7 +12,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -46,11 +46,10 @@ extern "C" { #include <string.h> +#include <rte_common.h> #include <rte_dev.h> -#include <rte_pci.h> -#include <rte_malloc.h> #include <rte_log.h> -#include <rte_common.h> +#include <rte_malloc.h> #include "rte_eventdev.h" @@ -87,60 +86,6 @@ extern "C" { #define RTE_EVENTDEV_DETACHED (0) #define RTE_EVENTDEV_ATTACHED (1) -/** - * Initialisation function of a event driver invoked for each matching - * event PCI device detected during the PCI probing phase. - * - * @param dev - * The dev pointer is the address of the *rte_eventdev* structure associated - * with the matching device and which has been [automatically] allocated in - * the *rte_event_devices* array. - * - * @return - * - 0: Success, the device is properly initialised by the driver. - * In particular, the driver MUST have set up the *dev_ops* pointer - * of the *dev* structure. - * - <0: Error code of the device initialisation failure. - */ -typedef int (*eventdev_init_t)(struct rte_eventdev *dev); - -/** - * Finalisation function of a driver invoked for each matching - * PCI device detected during the PCI closing phase. - * - * @param dev - * The dev pointer is the address of the *rte_eventdev* structure associated - * with the matching device and which has been [automatically] allocated in - * the *rte_event_devices* array. - * - * @return - * - 0: Success, the device is properly finalised by the driver. - * In particular, the driver MUST free the *dev_ops* pointer - * of the *dev* structure. - * - <0: Error code of the device initialisation failure. - */ -typedef int (*eventdev_uninit_t)(struct rte_eventdev *dev); - -/** - * The structure associated with a PMD driver. - * - * Each driver acts as a PCI driver and is represented by a generic - * *event_driver* structure that holds: - * - * - An *rte_pci_driver* structure (which must be the first field). - * - * - The *eventdev_init* function invoked for each matching PCI device. - * - * - The size of the private data to allocate for each matching device. - */ -struct rte_eventdev_driver { - struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ - unsigned int dev_private_size; /**< Size of device private data. */ - - eventdev_init_t eventdev_init; /**< Device init function. */ - eventdev_uninit_t eventdev_uninit; /**< Device uninit function. */ -}; - /** Global structure used for maintaining state of allocated event devices */ struct rte_eventdev_global { uint8_t nb_devs; /**< Number of devices found */ @@ -550,48 +495,6 @@ rte_event_pmd_allocate(const char *name, int socket_id); int rte_event_pmd_release(struct rte_eventdev *eventdev); -/** - * Creates a new virtual event device and returns the pointer to that device. - * - * @param name - * PMD type name - * @param dev_private_size - * Size of event PMDs private data - * @param socket_id - * Socket to allocate resources on. - * - * @return - * - Eventdev pointer if device is successfully created. - * - NULL if device cannot be created. - */ -struct rte_eventdev * -rte_event_pmd_vdev_init(const char *name, size_t dev_private_size, - int socket_id); - -/** - * Destroy the given virtual event device - * - * @param name - * PMD type name - * @return - * - 0 on success, negative on error - */ -int -rte_event_pmd_vdev_uninit(const char *name); - -/** - * Wrapper for use by pci drivers as a .probe function to attach to a event - * interface. - */ -int rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev); - -/** - * Wrapper for use by pci drivers as a .remove function to detach a event - * interface. - */ -int rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev); - #ifdef __cplusplus } #endif diff --git a/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/lib/librte_eventdev/rte_eventdev_pmd_pci.h new file mode 100644 index 00000000..b6bd7319 --- /dev/null +++ b/lib/librte_eventdev/rte_eventdev_pmd_pci.h @@ -0,0 +1,162 @@ +/* + * + * Copyright(c) 2016-2017 Cavium, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EVENTDEV_PMD_PCI_H_ +#define _RTE_EVENTDEV_PMD_PCI_H_ + +/** @file + * RTE Eventdev PCI PMD APIs + * + * @note + * These API are from event PCI PMD only and user applications should not call + * them directly. + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +#include <string.h> + +#include <rte_eal.h> +#include <rte_lcore.h> +#include <rte_pci.h> + +#include "rte_eventdev_pmd.h" + +typedef int (*eventdev_pmd_pci_callback_t)(struct rte_eventdev *dev); + +/** + * @internal + * Wrapper for use by pci drivers as a .probe function to attach to a event + * interface. + */ +static inline int +rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev, + size_t private_data_size, + eventdev_pmd_pci_callback_t devinit) +{ + struct rte_eventdev *eventdev; + + char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN]; + + int retval; + + if (devinit == NULL) + return -EINVAL; + + rte_pci_device_name(&pci_dev->addr, eventdev_name, + sizeof(eventdev_name)); + + eventdev = rte_event_pmd_allocate(eventdev_name, + pci_dev->device.numa_node); + if (eventdev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eventdev->data->dev_private = + rte_zmalloc_socket( + "eventdev private structure", + private_data_size, + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + + if (eventdev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private " + "device data"); + } + + eventdev->dev = &pci_dev->device; + + /* Invoke PMD device initialization function */ + retval = devinit(eventdev); + if (retval == 0) + return 0; + + RTE_EDEV_LOG_ERR("driver %s: (vendor_id=0x%x device_id=0x%x)" + " failed", pci_drv->driver.name, + (unsigned int) pci_dev->id.vendor_id, + (unsigned int) pci_dev->id.device_id); + + rte_event_pmd_release(eventdev); + + return -ENXIO; +} + + +/** + * @internal + * Wrapper for use by pci drivers as a .remove function to detach a event + * interface. + */ +static inline int +rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev, + eventdev_pmd_pci_callback_t devuninit) +{ + struct rte_eventdev *eventdev; + char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN]; + int ret = 0; + + if (pci_dev == NULL) + return -EINVAL; + + rte_pci_device_name(&pci_dev->addr, eventdev_name, + sizeof(eventdev_name)); + + eventdev = rte_event_pmd_get_named_dev(eventdev_name); + if (eventdev == NULL) + return -ENODEV; + + ret = rte_event_dev_close(eventdev->data->dev_id); + if (ret < 0) + return ret; + + /* Invoke PMD device un-init function */ + if (devuninit) + ret = devuninit(eventdev); + if (ret) + return ret; + + /* Free event device */ + rte_event_pmd_release(eventdev); + + eventdev->dev = NULL; + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EVENTDEV_PMD_PCI_H_ */ diff --git a/lib/librte_eventdev/rte_eventdev_pmd_vdev.h b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h new file mode 100644 index 00000000..135e8b80 --- /dev/null +++ b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h @@ -0,0 +1,134 @@ +/* + * + * Copyright(c) 2016-2017 Cavium, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EVENTDEV_PMD_VDEV_H_ +#define _RTE_EVENTDEV_PMD_VDEV_H_ + +/** @file + * RTE Eventdev VDEV PMD APIs + * + * @note + * These API are from event VDEV PMD only and user applications should not call + * them directly. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <string.h> + +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_vdev.h> + +#include "rte_eventdev_pmd.h" + +/** + * @internal + * Creates a new virtual event device and returns the pointer to that device. + * + * @param name + * PMD type name + * @param dev_private_size + * Size of event PMDs private data + * @param socket_id + * Socket to allocate resources on. + * + * @return + * - Eventdev pointer if device is successfully created. + * - NULL if device cannot be created. + */ +static inline struct rte_eventdev * +rte_event_pmd_vdev_init(const char *name, size_t dev_private_size, + int socket_id) +{ + + struct rte_eventdev *eventdev; + + /* Allocate device structure */ + eventdev = rte_event_pmd_allocate(name, socket_id); + if (eventdev == NULL) + return NULL; + + /* Allocate private device structure */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eventdev->data->dev_private = + rte_zmalloc_socket("eventdev device private", + dev_private_size, + RTE_CACHE_LINE_SIZE, + socket_id); + + if (eventdev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private device" + " data"); + } + + return eventdev; +} + +/** + * @internal + * Destroy the given virtual event device + * + * @param name + * PMD type name + * @return + * - 0 on success, negative on error + */ +static inline int +rte_event_pmd_vdev_uninit(const char *name) +{ + int ret; + struct rte_eventdev *eventdev; + + if (name == NULL) + return -EINVAL; + + eventdev = rte_event_pmd_get_named_dev(name); + if (eventdev == NULL) + return -ENODEV; + + ret = rte_event_dev_close(eventdev->data->dev_id); + if (ret < 0) + return ret; + + /* Free the event device */ + rte_event_pmd_release(eventdev); + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EVENTDEV_PMD_VDEV_H_ */ diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map index 1fa6b333..4c48e5f0 100644 --- a/lib/librte_eventdev/rte_eventdev_version.map +++ b/lib/librte_eventdev/rte_eventdev_version.map @@ -42,3 +42,12 @@ DPDK_17.05 { local: *; }; + +DPDK_17.08 { + global: + + rte_event_ring_create; + rte_event_ring_free; + rte_event_ring_init; + rte_event_ring_lookup; +} DPDK_17.05; diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile new file mode 100644 index 00000000..747eeec9 --- /dev/null +++ b/lib/librte_gro/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_gro.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_gro_version.map + +LIBABIVER := 1 + +# source files +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c new file mode 100644 index 00000000..61a04232 --- /dev/null +++ b/lib/librte_gro/gro_tcp4.c @@ -0,0 +1,505 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_ip.h> +#include <rte_tcp.h> + +#include "gro_tcp4.h" + +void * +gro_tcp4_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow) +{ + struct gro_tcp4_tbl *tbl; + size_t size; + uint32_t entries_num, i; + + entries_num = max_flow_num * max_item_per_flow; + entries_num = RTE_MIN(entries_num, GRO_TCP4_TBL_MAX_ITEM_NUM); + + if (entries_num == 0) + return NULL; + + tbl = rte_zmalloc_socket(__func__, + sizeof(struct gro_tcp4_tbl), + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl == NULL) + return NULL; + + size = sizeof(struct gro_tcp4_item) * entries_num; + tbl->items = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->items == NULL) { + rte_free(tbl); + return NULL; + } + tbl->max_item_num = entries_num; + + size = sizeof(struct gro_tcp4_key) * entries_num; + tbl->keys = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->keys == NULL) { + rte_free(tbl->items); + rte_free(tbl); + return NULL; + } + /* INVALID_ARRAY_INDEX indicates empty key */ + for (i = 0; i < entries_num; i++) + tbl->keys[i].start_index = INVALID_ARRAY_INDEX; + tbl->max_key_num = entries_num; + + return tbl; +} + +void +gro_tcp4_tbl_destroy(void *tbl) +{ + struct gro_tcp4_tbl *tcp_tbl = tbl; + + if (tcp_tbl) { + rte_free(tcp_tbl->items); + rte_free(tcp_tbl->keys); + } + rte_free(tcp_tbl); +} + +/* + * merge two TCP/IPv4 packets without updating checksums. + * If cmp is larger than 0, append the new packet to the + * original packet. Otherwise, pre-pend the new packet to + * the original packet. + */ +static inline int +merge_two_tcp4_packets(struct gro_tcp4_item *item_src, + struct rte_mbuf *pkt, + uint16_t ip_id, + uint32_t sent_seq, + int cmp) +{ + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; + uint16_t tcp_datalen; + + if (cmp > 0) { + pkt_head = item_src->firstseg; + pkt_tail = pkt; + } else { + pkt_head = pkt; + pkt_tail = item_src->firstseg; + } + + /* check if the packet length will be beyond the max value */ + tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len - + pkt_tail->l3_len - pkt_tail->l4_len; + if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen > + TCP4_MAX_L3_LENGTH) + return 0; + + /* remove packet header for the tail packet */ + rte_pktmbuf_adj(pkt_tail, + pkt_tail->l2_len + + pkt_tail->l3_len + + pkt_tail->l4_len); + + /* chain two packets together */ + if (cmp > 0) { + item_src->lastseg->next = pkt; + item_src->lastseg = rte_pktmbuf_lastseg(pkt); + /* update IP ID to the larger value */ + item_src->ip_id = ip_id; + } else { + lastseg = rte_pktmbuf_lastseg(pkt); + lastseg->next = item_src->firstseg; + item_src->firstseg = pkt; + /* update sent_seq to the smaller value */ + item_src->sent_seq = sent_seq; + } + item_src->nb_merged++; + + /* update mbuf metadata for the merged packet */ + pkt_head->nb_segs += pkt_tail->nb_segs; + pkt_head->pkt_len += pkt_tail->pkt_len; + + return 1; +} + +static inline int +check_seq_option(struct gro_tcp4_item *item, + struct tcp_hdr *tcp_hdr, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint16_t ip_id, + uint32_t sent_seq) +{ + struct rte_mbuf *pkt0 = item->firstseg; + struct ipv4_hdr *ipv4_hdr0; + struct tcp_hdr *tcp_hdr0; + uint16_t tcp_hl0, tcp_dl0; + uint16_t len; + + ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) + + pkt0->l2_len); + tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len); + tcp_hl0 = pkt0->l4_len; + + /* check if TCP option fields equal. If not, return 0. */ + len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr); + if ((tcp_hl != tcp_hl0) || + ((len > 0) && (memcmp(tcp_hdr + 1, + tcp_hdr0 + 1, + len) != 0))) + return 0; + + /* check if the two packets are neighbors */ + tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0; + if ((sent_seq == (item->sent_seq + tcp_dl0)) && + (ip_id == (item->ip_id + 1))) + /* append the new packet */ + return 1; + else if (((sent_seq + tcp_dl) == item->sent_seq) && + ((ip_id + item->nb_merged) == item->ip_id)) + /* pre-pend the new packet */ + return -1; + else + return 0; +} + +static inline uint32_t +find_an_empty_item(struct gro_tcp4_tbl *tbl) +{ + uint32_t i; + uint32_t max_item_num = tbl->max_item_num; + + for (i = 0; i < max_item_num; i++) + if (tbl->items[i].firstseg == NULL) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +find_an_empty_key(struct gro_tcp4_tbl *tbl) +{ + uint32_t i; + uint32_t max_key_num = tbl->max_key_num; + + for (i = 0; i < max_key_num; i++) + if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +insert_new_item(struct gro_tcp4_tbl *tbl, + struct rte_mbuf *pkt, + uint16_t ip_id, + uint32_t sent_seq, + uint32_t prev_idx, + uint64_t start_time) +{ + uint32_t item_idx; + + item_idx = find_an_empty_item(tbl); + if (item_idx == INVALID_ARRAY_INDEX) + return INVALID_ARRAY_INDEX; + + tbl->items[item_idx].firstseg = pkt; + tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); + tbl->items[item_idx].start_time = start_time; + tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; + tbl->items[item_idx].sent_seq = sent_seq; + tbl->items[item_idx].ip_id = ip_id; + tbl->items[item_idx].nb_merged = 1; + tbl->item_num++; + + /* if the previous packet exists, chain the new one with it */ + if (prev_idx != INVALID_ARRAY_INDEX) { + tbl->items[item_idx].next_pkt_idx = + tbl->items[prev_idx].next_pkt_idx; + tbl->items[prev_idx].next_pkt_idx = item_idx; + } + + return item_idx; +} + +static inline uint32_t +delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, + uint32_t prev_item_idx) +{ + uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; + + /* set NULL to firstseg to indicate it's an empty item */ + tbl->items[item_idx].firstseg = NULL; + tbl->item_num--; + if (prev_item_idx != INVALID_ARRAY_INDEX) + tbl->items[prev_item_idx].next_pkt_idx = next_idx; + + return next_idx; +} + +static inline uint32_t +insert_new_key(struct gro_tcp4_tbl *tbl, + struct tcp4_key *key_src, + uint32_t item_idx) +{ + struct tcp4_key *key_dst; + uint32_t key_idx; + + key_idx = find_an_empty_key(tbl); + if (key_idx == INVALID_ARRAY_INDEX) + return INVALID_ARRAY_INDEX; + + key_dst = &(tbl->keys[key_idx].key); + + ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr)); + ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr)); + key_dst->ip_src_addr = key_src->ip_src_addr; + key_dst->ip_dst_addr = key_src->ip_dst_addr; + key_dst->recv_ack = key_src->recv_ack; + key_dst->src_port = key_src->src_port; + key_dst->dst_port = key_src->dst_port; + + /* non-INVALID_ARRAY_INDEX value indicates this key is valid */ + tbl->keys[key_idx].start_index = item_idx; + tbl->key_num++; + + return key_idx; +} + +static inline int +is_same_key(struct tcp4_key k1, struct tcp4_key k2) +{ + if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0) + return 0; + + if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0) + return 0; + + return ((k1.ip_src_addr == k2.ip_src_addr) && + (k1.ip_dst_addr == k2.ip_dst_addr) && + (k1.recv_ack == k2.recv_ack) && + (k1.src_port == k2.src_port) && + (k1.dst_port == k2.dst_port)); +} + +/* + * update packet length for the flushed packet. + */ +static inline void +update_header(struct gro_tcp4_item *item) +{ + struct ipv4_hdr *ipv4_hdr; + struct rte_mbuf *pkt = item->firstseg; + + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + pkt->l2_len); + ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - + pkt->l2_len); +} + +int32_t +gro_tcp4_reassemble(struct rte_mbuf *pkt, + struct gro_tcp4_tbl *tbl, + uint64_t start_time) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t sent_seq; + uint16_t tcp_dl, ip_id; + + struct tcp4_key key; + uint32_t cur_idx, prev_idx, item_idx; + uint32_t i, max_key_num; + int cmp; + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + + /* + * if FIN, SYN, RST, PSH, URG, ECE or + * CWR is set, return immediately. + */ + if (tcp_hdr->tcp_flags != TCP_ACK_FLAG) + return -1; + /* if payload length is 0, return immediately */ + tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len - + pkt->l4_len; + if (tcp_dl == 0) + return -1; + + ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + + ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); + ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr)); + key.ip_src_addr = ipv4_hdr->src_addr; + key.ip_dst_addr = ipv4_hdr->dst_addr; + key.src_port = tcp_hdr->src_port; + key.dst_port = tcp_hdr->dst_port; + key.recv_ack = tcp_hdr->recv_ack; + + /* search for a key */ + max_key_num = tbl->max_key_num; + for (i = 0; i < max_key_num; i++) { + if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) && + is_same_key(tbl->keys[i].key, key)) + break; + } + + /* can't find a key, so insert a new key and a new item. */ + if (i == tbl->max_key_num) { + item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq, + INVALID_ARRAY_INDEX, start_time); + if (item_idx == INVALID_ARRAY_INDEX) + return -1; + if (insert_new_key(tbl, &key, item_idx) == + INVALID_ARRAY_INDEX) { + /* + * fail to insert a new key, so + * delete the inserted item + */ + delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); + return -1; + } + return 0; + } + + /* traverse all packets in the item group to find one to merge */ + cur_idx = tbl->keys[i].start_index; + prev_idx = cur_idx; + do { + cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr, + pkt->l4_len, tcp_dl, ip_id, sent_seq); + if (cmp) { + if (merge_two_tcp4_packets(&(tbl->items[cur_idx]), + pkt, ip_id, + sent_seq, cmp)) + return 1; + /* + * fail to merge two packets since the packet + * length will be greater than the max value. + * So insert the packet into the item group. + */ + if (insert_new_item(tbl, pkt, ip_id, sent_seq, + prev_idx, start_time) == + INVALID_ARRAY_INDEX) + return -1; + return 0; + } + prev_idx = cur_idx; + cur_idx = tbl->items[cur_idx].next_pkt_idx; + } while (cur_idx != INVALID_ARRAY_INDEX); + + /* + * can't find a packet in the item group to merge, + * so insert the packet into the item group. + */ + if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx, + start_time) == INVALID_ARRAY_INDEX) + return -1; + + return 0; +} + +uint16_t +gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out) +{ + uint16_t k = 0; + uint32_t i, j; + uint32_t max_key_num = tbl->max_key_num; + + for (i = 0; i < max_key_num; i++) { + /* all keys have been checked, return immediately */ + if (tbl->key_num == 0) + return k; + + j = tbl->keys[i].start_index; + while (j != INVALID_ARRAY_INDEX) { + if (tbl->items[j].start_time <= flush_timestamp) { + out[k++] = tbl->items[j].firstseg; + if (tbl->items[j].nb_merged > 1) + update_header(&(tbl->items[j])); + /* + * delete the item and get + * the next packet index + */ + j = delete_item(tbl, j, + INVALID_ARRAY_INDEX); + + /* + * delete the key as all of + * packets are flushed + */ + if (j == INVALID_ARRAY_INDEX) { + tbl->keys[i].start_index = + INVALID_ARRAY_INDEX; + tbl->key_num--; + } else + /* update start_index of the key */ + tbl->keys[i].start_index = j; + + if (k == nb_out) + return k; + } else + /* + * left packets of this key won't be + * timeout, so go to check other keys. + */ + break; + } + } + return k; +} + +uint32_t +gro_tcp4_tbl_pkt_count(void *tbl) +{ + struct gro_tcp4_tbl *gro_tbl = tbl; + + if (gro_tbl) + return gro_tbl->item_num; + + return 0; +} diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h new file mode 100644 index 00000000..f41dcee3 --- /dev/null +++ b/lib/librte_gro/gro_tcp4.h @@ -0,0 +1,210 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GRO_TCP4_H_ +#define _GRO_TCP4_H_ + +#define INVALID_ARRAY_INDEX 0xffffffffUL +#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) + +/* + * the max L3 length of a TCP/IPv4 packet. The L3 length + * is the sum of ipv4 header, tcp header and L4 payload. + */ +#define TCP4_MAX_L3_LENGTH UINT16_MAX + +/* criteria of mergeing packets */ +struct tcp4_key { + struct ether_addr eth_saddr; + struct ether_addr eth_daddr; + uint32_t ip_src_addr; + uint32_t ip_dst_addr; + + uint32_t recv_ack; + uint16_t src_port; + uint16_t dst_port; +}; + +struct gro_tcp4_key { + struct tcp4_key key; + /* + * the index of the first packet in the item group. + * If the value is INVALID_ARRAY_INDEX, it means + * the key is empty. + */ + uint32_t start_index; +}; + +struct gro_tcp4_item { + /* + * first segment of the packet. If the value + * is NULL, it means the item is empty. + */ + struct rte_mbuf *firstseg; + /* last segment of the packet */ + struct rte_mbuf *lastseg; + /* + * the time when the first packet is inserted + * into the table. If a packet in the table is + * merged with an incoming packet, this value + * won't be updated. We set this value only + * when the first packet is inserted into the + * table. + */ + uint64_t start_time; + /* + * we use next_pkt_idx to chain the packets that + * have same key value but can't be merged together. + */ + uint32_t next_pkt_idx; + /* the sequence number of the packet */ + uint32_t sent_seq; + /* the IP ID of the packet */ + uint16_t ip_id; + /* the number of merged packets */ + uint16_t nb_merged; +}; + +/* + * TCP/IPv4 reassembly table structure. + */ +struct gro_tcp4_tbl { + /* item array */ + struct gro_tcp4_item *items; + /* key array */ + struct gro_tcp4_key *keys; + /* current item number */ + uint32_t item_num; + /* current key num */ + uint32_t key_num; + /* item array size */ + uint32_t max_item_num; + /* key array size */ + uint32_t max_key_num; +}; + +/** + * This function creates a TCP/IPv4 reassembly table. + * + * @param socket_id + * socket index for allocating TCP/IPv4 reassemblt table + * @param max_flow_num + * the maximum number of flows in the TCP/IPv4 GRO table + * @param max_item_per_flow + * the maximum packet number per flow. + * + * @return + * if create successfully, return a pointer which points to the + * created TCP/IPv4 GRO table. Otherwise, return NULL. + */ +void *gro_tcp4_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow); + +/** + * This function destroys a TCP/IPv4 reassembly table. + * + * @param tbl + * a pointer points to the TCP/IPv4 reassembly table. + */ +void gro_tcp4_tbl_destroy(void *tbl); + +/** + * This function searches for a packet in the TCP/IPv4 reassembly table + * to merge with the inputted one. To merge two packets is to chain them + * together and update packet headers. Packets, whose SYN, FIN, RST, PSH + * CWR, ECE or URG bit is set, are returned immediately. Packets which + * only have packet headers (i.e. without data) are also returned + * immediately. Otherwise, the packet is either merged, or inserted into + * the table. Besides, if there is no available space to insert the + * packet, this function returns immediately too. + * + * This function assumes the inputted packet is with correct IPv4 and + * TCP checksums. And if two packets are merged, it won't re-calculate + * IPv4 and TCP checksums. Besides, if the inputted packet is IP + * fragmented, it assumes the packet is complete (with TCP header). + * + * @param pkt + * packet to reassemble. + * @param tbl + * a pointer that points to a TCP/IPv4 reassembly table. + * @start_time + * the start time that the packet is inserted into the table + * + * @return + * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE + * or URG bit is set, or there is no available space in the table to + * insert a new item or a new key, return a negative value. If the + * packet is merged successfully, return an positive value. If the + * packet is inserted into the table, return 0. + */ +int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, + struct gro_tcp4_tbl *tbl, + uint64_t start_time); + +/** + * This function flushes timeout packets in a TCP/IPv4 reassembly table + * to applications, and without updating checksums for merged packets. + * The max number of flushed timeout packets is the element number of + * the array which is used to keep flushed packets. + * + * @param tbl + * a pointer that points to a TCP GRO table. + * @param flush_timestamp + * this function flushes packets which are inserted into the table + * before or at the flush_timestamp. + * @param out + * pointer array which is used to keep flushed packets. + * @param nb_out + * the element number of out. It's also the max number of timeout + * packets that can be flushed finally. + * + * @return + * the number of packets that are returned. + */ +uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out); + +/** + * This function returns the number of the packets in a TCP/IPv4 + * reassembly table. + * + * @param tbl + * pointer points to a TCP/IPv4 reassembly table. + * + * @return + * the number of packets in the table + */ +uint32_t gro_tcp4_tbl_pkt_count(void *tbl); +#endif diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c new file mode 100644 index 00000000..7853246a --- /dev/null +++ b/lib/librte_gro/rte_gro.c @@ -0,0 +1,278 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> + +#include "rte_gro.h" +#include "gro_tcp4.h" + +typedef void *(*gro_tbl_create_fn)(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow); +typedef void (*gro_tbl_destroy_fn)(void *tbl); +typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); + +static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = { + gro_tcp4_tbl_create, NULL}; +static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { + gro_tcp4_tbl_destroy, NULL}; +static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { + gro_tcp4_tbl_pkt_count, NULL}; + +/* + * GRO context structure, which is used to merge packets. It keeps + * many reassembly tables of desired GRO types. Applications need to + * create GRO context objects before using rte_gro_reassemble to + * perform GRO. + */ +struct gro_ctx { + /* GRO types to perform */ + uint64_t gro_types; + /* reassembly tables */ + void *tbls[RTE_GRO_TYPE_MAX_NUM]; +}; + +void * +rte_gro_ctx_create(const struct rte_gro_param *param) +{ + struct gro_ctx *gro_ctx; + gro_tbl_create_fn create_tbl_fn; + uint64_t gro_type_flag = 0; + uint64_t gro_types = 0; + uint8_t i; + + gro_ctx = rte_zmalloc_socket(__func__, + sizeof(struct gro_ctx), + RTE_CACHE_LINE_SIZE, + param->socket_id); + if (gro_ctx == NULL) + return NULL; + + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { + gro_type_flag = 1ULL << i; + if ((param->gro_types & gro_type_flag) == 0) + continue; + + create_tbl_fn = tbl_create_fn[i]; + if (create_tbl_fn == NULL) + continue; + + gro_ctx->tbls[i] = create_tbl_fn(param->socket_id, + param->max_flow_num, + param->max_item_per_flow); + if (gro_ctx->tbls[i] == NULL) { + /* destroy all created tables */ + gro_ctx->gro_types = gro_types; + rte_gro_ctx_destroy(gro_ctx); + return NULL; + } + gro_types |= gro_type_flag; + } + gro_ctx->gro_types = param->gro_types; + + return gro_ctx; +} + +void +rte_gro_ctx_destroy(void *ctx) +{ + gro_tbl_destroy_fn destroy_tbl_fn; + struct gro_ctx *gro_ctx = ctx; + uint64_t gro_type_flag; + uint8_t i; + + if (gro_ctx == NULL) + return; + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { + gro_type_flag = 1ULL << i; + if ((gro_ctx->gro_types & gro_type_flag) == 0) + continue; + destroy_tbl_fn = tbl_destroy_fn[i]; + if (destroy_tbl_fn) + destroy_tbl_fn(gro_ctx->tbls[i]); + } + rte_free(gro_ctx); +} + +uint16_t +rte_gro_reassemble_burst(struct rte_mbuf **pkts, + uint16_t nb_pkts, + const struct rte_gro_param *param) +{ + uint16_t i; + uint16_t nb_after_gro = nb_pkts; + uint32_t item_num; + + /* allocate a reassembly table for TCP/IPv4 GRO */ + struct gro_tcp4_tbl tcp_tbl; + struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM]; + struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; + + struct rte_mbuf *unprocess_pkts[nb_pkts]; + uint16_t unprocess_num = 0; + int32_t ret; + uint64_t current_time; + + if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0) + return nb_pkts; + + /* get the actual number of packets */ + item_num = RTE_MIN(nb_pkts, (param->max_flow_num * + param->max_item_per_flow)); + item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM); + + for (i = 0; i < item_num; i++) + tcp_keys[i].start_index = INVALID_ARRAY_INDEX; + + tcp_tbl.keys = tcp_keys; + tcp_tbl.items = tcp_items; + tcp_tbl.key_num = 0; + tcp_tbl.item_num = 0; + tcp_tbl.max_key_num = item_num; + tcp_tbl.max_item_num = item_num; + + current_time = rte_rdtsc(); + + for (i = 0; i < nb_pkts; i++) { + if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L4_TCP)) == + (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { + ret = gro_tcp4_reassemble(pkts[i], + &tcp_tbl, + current_time); + if (ret > 0) + /* merge successfully */ + nb_after_gro--; + else if (ret < 0) { + unprocess_pkts[unprocess_num++] = + pkts[i]; + } + } else + unprocess_pkts[unprocess_num++] = pkts[i]; + } + + /* re-arrange GROed packets */ + if (nb_after_gro < nb_pkts) { + i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time, + pkts, nb_pkts); + if (unprocess_num > 0) { + memcpy(&pkts[i], unprocess_pkts, + sizeof(struct rte_mbuf *) * + unprocess_num); + } + } + + return nb_after_gro; +} + +uint16_t +rte_gro_reassemble(struct rte_mbuf **pkts, + uint16_t nb_pkts, + void *ctx) +{ + uint16_t i, unprocess_num = 0; + struct rte_mbuf *unprocess_pkts[nb_pkts]; + struct gro_ctx *gro_ctx = ctx; + uint64_t current_time; + + if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0) + return nb_pkts; + + current_time = rte_rdtsc(); + + for (i = 0; i < nb_pkts; i++) { + if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L4_TCP)) == + (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { + if (gro_tcp4_reassemble(pkts[i], + gro_ctx->tbls + [RTE_GRO_TCP_IPV4_INDEX], + current_time) < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; + } else + unprocess_pkts[unprocess_num++] = pkts[i]; + } + if (unprocess_num > 0) { + memcpy(pkts, unprocess_pkts, + sizeof(struct rte_mbuf *) * + unprocess_num); + } + + return unprocess_num; +} + +uint16_t +rte_gro_timeout_flush(void *ctx, + uint64_t timeout_cycles, + uint64_t gro_types, + struct rte_mbuf **out, + uint16_t max_nb_out) +{ + struct gro_ctx *gro_ctx = ctx; + uint64_t flush_timestamp; + + gro_types = gro_types & gro_ctx->gro_types; + flush_timestamp = rte_rdtsc() - timeout_cycles; + + if (gro_types & RTE_GRO_TCP_IPV4) { + return gro_tcp4_tbl_timeout_flush( + gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX], + flush_timestamp, + out, max_nb_out); + } + return 0; +} + +uint64_t +rte_gro_get_pkt_count(void *ctx) +{ + struct gro_ctx *gro_ctx = ctx; + gro_tbl_pkt_count_fn pkt_count_fn; + uint64_t item_num = 0; + uint64_t gro_type_flag; + uint8_t i; + + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { + gro_type_flag = 1ULL << i; + if ((gro_ctx->gro_types & gro_type_flag) == 0) + continue; + + pkt_count_fn = tbl_pkt_count_fn[i]; + if (pkt_count_fn == NULL) + continue; + item_num += pkt_count_fn(gro_ctx->tbls[i]); + } + return item_num; +} diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h new file mode 100644 index 00000000..d57e0c5f --- /dev/null +++ b/lib/librte_gro/rte_gro.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_GRO_H_ +#define _RTE_GRO_H_ + +/** + * @file + * Interface to GRO library + */ + +#include <stdint.h> +#include <rte_mbuf.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_GRO_MAX_BURST_ITEM_NUM 128U +/**< the max number of packets that rte_gro_reassemble_burst() + * can process in each invocation. + */ +#define RTE_GRO_TYPE_MAX_NUM 64 +/**< the max number of supported GRO types */ +#define RTE_GRO_TYPE_SUPPORT_NUM 1 +/**< the number of currently supported GRO types */ + +#define RTE_GRO_TCP_IPV4_INDEX 0 +#define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX) +/**< TCP/IPv4 GRO flag */ + +/** + * A structure which is used to create GRO context objects or tell + * rte_gro_reassemble_burst() what reassembly rules are demanded. + */ +struct rte_gro_param { + uint64_t gro_types; + /**< desired GRO types */ + uint16_t max_flow_num; + /**< max flow number */ + uint16_t max_item_per_flow; + /**< max packet number per flow */ + uint16_t socket_id; + /**< socket index for allocating GRO related data structures, + * like reassembly tables. When use rte_gro_reassemble_burst(), + * applications don't need to set this value. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function create a GRO context object, which is used to merge + * packets in rte_gro_reassemble(). + * + * @param param + * applications use it to pass needed parameters to create a GRO + * context object. + * + * @return + * if create successfully, return a pointer which points to the GRO + * context object. Otherwise, return NULL. + */ +void *rte_gro_ctx_create(const struct rte_gro_param *param); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function destroys a GRO context object. + * + * @param ctx + * pointer points to a GRO context object. + */ +void rte_gro_ctx_destroy(void *ctx); + +/** + * This is one of the main reassembly APIs, which merges numbers of + * packets at a time. It assumes that all inputted packets are with + * correct checksums. That is, applications should guarantee all + * inputted packets are correct. Besides, it doesn't re-calculate + * checksums for merged packets. If inputted packets are IP fragmented, + * this function assumes them are complete (i.e. with L4 header). After + * finishing processing, it returns all GROed packets to applications + * immediately. + * + * @param pkts + * a pointer array which points to the packets to reassemble. Besides, + * it keeps mbuf addresses for the GROed packets. + * @param nb_pkts + * the number of packets to reassemble. + * @param param + * applications use it to tell rte_gro_reassemble_burst() what rules + * are demanded. + * + * @return + * the number of packets after been GROed. If no packets are merged, + * the returned value is nb_pkts. + */ +uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts, + uint16_t nb_pkts, + const struct rte_gro_param *param); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reassembly function, which tries to merge inputted packets with + * the packets in the reassembly tables of a given GRO context. This + * function assumes all inputted packets are with correct checksums. + * And it won't update checksums if two packets are merged. Besides, + * if inputted packets are IP fragmented, this function assumes they + * are complete packets (i.e. with L4 header). + * + * If the inputted packets don't have data or are with unsupported GRO + * types etc., they won't be processed and are returned to applications. + * Otherwise, the inputted packets are either merged or inserted into + * the table. If applications want get packets in the table, they need + * to call flush API. + * + * @param pkts + * packet to reassemble. Besides, after this function finishes, it + * keeps the unprocessed packets (e.g. without data or unsupported + * GRO types). + * @param nb_pkts + * the number of packets to reassemble. + * @param ctx + * a pointer points to a GRO context object. + * + * @return + * return the number of unprocessed packets (e.g. without data or + * unsupported GRO types). If all packets are processed (merged or + * inserted into the table), return 0. + */ +uint16_t rte_gro_reassemble(struct rte_mbuf **pkts, + uint16_t nb_pkts, + void *ctx); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function flushes the timeout packets from reassembly tables of + * desired GRO types. The max number of flushed timeout packets is the + * element number of the array which is used to keep the flushed packets. + * + * Besides, this function won't re-calculate checksums for merged + * packets in the tables. That is, the returned packets may be with + * wrong checksums. + * + * @param ctx + * a pointer points to a GRO context object. + * @param timeout_cycles + * max TTL for packets in reassembly tables, measured in nanosecond. + * @param gro_types + * this function only flushes packets which belong to the GRO types + * specified by gro_types. + * @param out + * a pointer array that is used to keep flushed timeout packets. + * @param max_nb_out + * the element number of out. It's also the max number of timeout + * packets that can be flushed finally. + * + * @return + * the number of flushed packets. If no packets are flushed, return 0. + */ +uint16_t rte_gro_timeout_flush(void *ctx, + uint64_t timeout_cycles, + uint64_t gro_types, + struct rte_mbuf **out, + uint16_t max_nb_out); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function returns the number of packets in all reassembly tables + * of a given GRO context. + * + * @param ctx + * pointer points to a GRO context object. + * + * @return + * the number of packets in all reassembly tables. + */ +uint64_t rte_gro_get_pkt_count(void *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_GRO_H_ */ diff --git a/lib/librte_gro/rte_gro_version.map b/lib/librte_gro/rte_gro_version.map new file mode 100644 index 00000000..bb40bb41 --- /dev/null +++ b/lib/librte_gro/rte_gro_version.map @@ -0,0 +1,12 @@ +DPDK_17.08 { + global: + + rte_gro_ctrl_create; + rte_gro_ctrl_destroy; + rte_gro_get_pkt_count; + rte_gro_reassemble; + rte_gro_reassemble_burst; + rte_gro_timeout_flush; + + local: *; +}; diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index d856aa26..9cf13a04 100644 --- a/lib/librte_hash/Makefile +++ b/lib/librte_hash/Makefile @@ -49,8 +49,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),) SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h endif +endif SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h diff --git a/lib/librte_hash/rte_cmp_arm64.h b/lib/librte_hash/rte_cmp_arm64.h index 6fd937b1..950cef3b 100644 --- a/lib/librte_hash/rte_cmp_arm64.h +++ b/lib/librte_hash/rte_cmp_arm64.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2015 Cavium networks. All rights reserved. + * Copyright(c) 2015 Cavium, Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h index e8c484d6..704c2dec 100644 --- a/lib/librte_hash/rte_cmp_x86.h +++ b/lib/librte_hash/rte_cmp_x86.h @@ -37,15 +37,9 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unu { const __m128i k1 = _mm_loadu_si128((const __m128i *) key1); const __m128i k2 = _mm_loadu_si128((const __m128i *) key2); -#ifdef RTE_MACHINE_CPUFLAG_SSE4_1 const __m128i x = _mm_xor_si128(k1, k2); return !_mm_test_all_zeros(x, x); -#else - const __m128i x = _mm_cmpeq_epi32(k1, k2); - - return _mm_movemask_epi8(x) != 0xffff; -#endif } static int diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h index 2abe42ab..774428be 100644 --- a/lib/librte_hash/rte_crc_arm64.h +++ b/lib/librte_hash/rte_crc_arm64.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2015 Cavium networks. All rights reserved. + * Copyright(c) 2015 Cavium, Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -52,7 +52,6 @@ extern "C" { static inline uint32_t crc32c_arm64_u8(uint8_t data, uint32_t init_val) { - asm(".arch armv8-a+crc"); __asm__ volatile( "crc32cb %w[crc], %w[crc], %w[value]" : [crc] "+r" (init_val) @@ -63,7 +62,6 @@ crc32c_arm64_u8(uint8_t data, uint32_t init_val) static inline uint32_t crc32c_arm64_u16(uint16_t data, uint32_t init_val) { - asm(".arch armv8-a+crc"); __asm__ volatile( "crc32ch %w[crc], %w[crc], %w[value]" : [crc] "+r" (init_val) @@ -74,7 +72,6 @@ crc32c_arm64_u16(uint16_t data, uint32_t init_val) static inline uint32_t crc32c_arm64_u32(uint32_t data, uint32_t init_val) { - asm(".arch armv8-a+crc"); __asm__ volatile( "crc32cw %w[crc], %w[crc], %w[value]" : [crc] "+r" (init_val) @@ -85,7 +82,6 @@ crc32c_arm64_u32(uint32_t data, uint32_t init_val) static inline uint32_t crc32c_arm64_u64(uint64_t data, uint32_t init_val) { - asm(".arch armv8-a+crc"); __asm__ volatile( "crc32cx %w[crc], %w[crc], %x[value]" : [crc] "+r" (init_val) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 645c0cfa..87b25c01 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -52,11 +52,11 @@ #include <rte_errno.h> #include <rte_string_fns.h> #include <rte_cpuflags.h> -#include <rte_log.h> #include <rte_rwlock.h> #include <rte_spinlock.h> #include <rte_ring.h> #include <rte_compat.h> +#include <rte_pause.h> #include "rte_hash.h" #include "rte_cuckoo_hash.h" @@ -538,8 +538,10 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, n_slots = rte_ring_mc_dequeue_burst(h->free_slots, cached_free_slots->objs, LCORE_CACHE_SIZE, NULL); - if (n_slots == 0) - return -ENOSPC; + if (n_slots == 0) { + ret = -ENOSPC; + goto failure; + } cached_free_slots->len += n_slots; } @@ -548,8 +550,10 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, cached_free_slots->len--; slot_id = cached_free_slots->objs[cached_free_slots->len]; } else { - if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) - return -ENOSPC; + if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) { + ret = -ENOSPC; + goto failure; + } } new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size); @@ -569,7 +573,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, k->pdata = data; /* * Return index where key is stored, - * substracting the first dummy index + * subtracting the first dummy index */ return prim_bkt->key_idx[i] - 1; } @@ -589,7 +593,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, k->pdata = data; /* * Return index where key is stored, - * substracting the first dummy index + * subtracting the first dummy index */ return sec_bkt->key_idx[i] - 1; } @@ -659,6 +663,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Error in addition, store new slot back in the ring and return error */ enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx)); +failure: if (h->add_key == ADD_KEY_MULTIWRITER) rte_spinlock_unlock(h->multiwriter_lock); return ret; @@ -730,7 +735,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, *data = k->pdata; /* * Return index where key is stored, - * substracting the first dummy index + * subtracting the first dummy index */ return bkt->key_idx[i] - 1; } @@ -753,7 +758,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, *data = k->pdata; /* * Return index where key is stored, - * substracting the first dummy index + * subtracting the first dummy index */ return bkt->key_idx[i] - 1; } @@ -847,7 +852,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* * Return index where key is stored, - * substracting the first dummy index + * subtracting the first dummy index */ ret = bkt->key_idx[i] - 1; bkt->key_idx[i] = EMPTY_SLOT; @@ -872,7 +877,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* * Return index where key is stored, - * substracting the first dummy index + * subtracting the first dummy index */ ret = bkt->key_idx[i] - 1; bkt->key_idx[i] = EMPTY_SLOT; diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index 1b8ffed8..f75392d2 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -58,7 +58,7 @@ #endif /* Hash function used if none is specified */ -#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) #include <rte_hash_crc.h> #define DEFAULT_HASH_FUNC rte_hash_crc #else diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h index bd46048f..c39c0976 100644 --- a/lib/librte_hash/rte_fbk_hash.h +++ b/lib/librte_hash/rte_fbk_hash.h @@ -55,7 +55,7 @@ extern "C" { #include <string.h> #ifndef RTE_FBK_HASH_FUNC_DEFAULT -#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) #include <rte_hash_crc.h> /** Default four-byte key hash function if none is specified. */ #define RTE_FBK_HASH_FUNC_DEFAULT rte_hash_crc_4byte diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index 0f485b85..ea6be522 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -387,7 +387,7 @@ crc32c_2words(uint64_t data, uint32_t init_val) return crc; } -#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) +#if defined(RTE_ARCH_X86) static inline uint32_t crc32c_sse42_u8(uint8_t data, uint32_t init_val) { @@ -453,7 +453,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) static uint8_t crc32_alg = CRC32_SW; -#if defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) #include "rte_crc_arm64.h" #else @@ -471,26 +471,12 @@ static uint8_t crc32_alg = CRC32_SW; static inline void rte_hash_crc_set_alg(uint8_t alg) { - switch (alg) { -#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) - case CRC32_SSE42_x64: - if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) - alg = CRC32_SSE42; -#if __GNUC__ >= 7 - __attribute__ ((fallthrough)); +#if defined(RTE_ARCH_X86) + if (alg == CRC32_SSE42_x64 && + !rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) + alg = CRC32_SSE42; #endif - case CRC32_SSE42: - if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) - alg = CRC32_SW; -#if __GNUC__ >= 7 - __attribute__ ((fallthrough)); -#endif -#endif - case CRC32_SW: - crc32_alg = alg; - default: - break; - } + crc32_alg = alg; } /* Setting the best available algorithm */ @@ -515,7 +501,7 @@ rte_hash_crc_init_alg(void) static inline uint32_t rte_hash_crc_1byte(uint8_t data, uint32_t init_val) { -#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 +#if defined RTE_ARCH_X86 if (likely(crc32_alg & CRC32_SSE42)) return crc32c_sse42_u8(data, init_val); #endif @@ -538,7 +524,7 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val) static inline uint32_t rte_hash_crc_2byte(uint16_t data, uint32_t init_val) { -#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 +#if defined RTE_ARCH_X86 if (likely(crc32_alg & CRC32_SSE42)) return crc32c_sse42_u16(data, init_val); #endif @@ -561,7 +547,7 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val) static inline uint32_t rte_hash_crc_4byte(uint32_t data, uint32_t init_val) { -#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 +#if defined RTE_ARCH_X86 if (likely(crc32_alg & CRC32_SSE42)) return crc32c_sse42_u32(data, init_val); #endif @@ -589,7 +575,7 @@ rte_hash_crc_8byte(uint64_t data, uint32_t init_val) return crc32c_sse42_u64(data, init_val); #endif -#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 +#if defined RTE_ARCH_X86 if (likely(crc32_alg & CRC32_SSE42)) return crc32c_sse42_u64_mimic(data, init_val); #endif diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index a4886a8c..2fffd61d 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -56,11 +56,11 @@ extern "C" { #include <rte_ip.h> #include <rte_common.h> -#ifdef __SSE3__ +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) #include <rte_vect.h> #endif -#ifdef __SSE3__ +#ifdef RTE_ARCH_X86 /* Byte swap mask used for converting IPv6 address * 4-byte chunks to CPU byte order */ @@ -134,7 +134,7 @@ struct rte_ipv6_tuple { union rte_thash_tuple { struct rte_ipv4_tuple v4; struct rte_ipv6_tuple v6; -#ifdef __SSE3__ +#ifdef RTE_ARCH_X86 } __attribute__((aligned(XMM_SIZE))); #else }; @@ -169,13 +169,18 @@ rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len) static inline void rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ) { -#ifdef __SSE3__ +#ifdef RTE_ARCH_X86 __m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr); *(__m128i *)targ->v6.src_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); *(__m128i *)targ->v6.dst_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); +#elif defined(RTE_MACHINE_CPUFLAG_NEON) + uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr); + vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6)); + ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr); + vst1q_u8((uint8_t *)targ->v6.dst_addr, vrev32q_u8(ipv6)); #else int i; for (i = 0; i < 4; i++) { diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h index 835e4f93..9f561965 100644 --- a/lib/librte_ip_frag/ip_frag_common.h +++ b/lib/librte_ip_frag/ip_frag_common.h @@ -130,6 +130,26 @@ ip_frag_free(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr) dr->cnt = k; } +/* delete fragment's mbufs immediately instead of using death row */ +static inline void +ip_frag_free_immediate(struct ip_frag_pkt *fp) +{ + uint32_t i; + + for (i = 0; i < fp->last_idx; i++) { + if (fp->frags[i].mb != NULL) { + IP_FRAG_LOG(DEBUG, "%s:%d\n" + "mbuf: %p, tms: %" PRIu64", key: <%" PRIx64 ", %#x>\n", + __func__, __LINE__, fp->frags[i].mb, fp->start, + fp->key.src_dst[0], fp->key.id); + rte_pktmbuf_free(fp->frags[i].mb); + fp->frags[i].mb = NULL; + } + } + + fp->last_idx = 0; +} + /* if key is empty, mark key as in use */ static inline void ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct ip_frag_pkt *fp) diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c index b679ff43..09b755c9 100644 --- a/lib/librte_ip_frag/ip_frag_internal.c +++ b/lib/librte_ip_frag/ip_frag_internal.c @@ -34,9 +34,7 @@ #include <stddef.h> #include <rte_jhash.h> -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 #include <rte_hash_crc.h> -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ #include "ip_frag_common.h" @@ -94,14 +92,14 @@ ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) p = (const uint32_t *)&key->src_dst; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#ifdef RTE_ARCH_X86 v = rte_hash_crc_4byte(p[0], PRIME_VALUE); v = rte_hash_crc_4byte(p[1], v); v = rte_hash_crc_4byte(key->id, v); #else v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ +#endif /* RTE_ARCH_X86 */ *v1 = v; *v2 = (v << 7) + (v >> 14); @@ -115,7 +113,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) p = (const uint32_t *) &key->src_dst; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#ifdef RTE_ARCH_X86 v = rte_hash_crc_4byte(p[0], PRIME_VALUE); v = rte_hash_crc_4byte(p[1], v); v = rte_hash_crc_4byte(p[2], v); @@ -130,7 +128,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE); v = rte_jhash_3words(p[3], p[4], p[5], v); v = rte_jhash_3words(p[6], p[7], key->id, v); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ +#endif /* RTE_ARCH_X86 */ *v1 = v; *v2 = (v << 7) + (v >> 14); diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index 6708906d..35d0ecc3 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -180,11 +180,8 @@ struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, * @param tbl * Fragmentation table to free. */ -static inline void -rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl) -{ - rte_free(tbl); -} +void +rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl); /** * This function implements the fragmentation of IPv6 packets. @@ -233,7 +230,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, * Pointer to the IPv6 fragment extension header. * @return * Pointer to mbuf for reassembled packet, or NULL if: - * - an error occured. + * - an error occurred. * - not all fragments of the packet are collected yet. */ struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, @@ -307,7 +304,7 @@ int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, * Pointer to the IPV4 header inside the fragment. * @return * Pointer to mbuf for reassebled packet, or NULL if: - * - an error occured. + * - an error occurred. * - not all fragments of the packet are collected yet. */ struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c index 6176ff4e..8460f8e8 100644 --- a/lib/librte_ip_frag/rte_ip_frag_common.c +++ b/lib/librte_ip_frag/rte_ip_frag_common.c @@ -109,6 +109,19 @@ rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries, return tbl; } +/* delete fragmentation table */ +void +rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl) +{ + struct ip_frag_pkt *fp; + + TAILQ_FOREACH(fp, &tbl->lru, lru) { + ip_frag_free_immediate(fp); + } + + rte_free(tbl); +} + /* dump frag table statistics to file */ void rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl) diff --git a/lib/librte_ip_frag/rte_ipfrag_version.map b/lib/librte_ip_frag/rte_ipfrag_version.map index 354fa082..d1acf07c 100644 --- a/lib/librte_ip_frag/rte_ipfrag_version.map +++ b/lib/librte_ip_frag/rte_ipfrag_version.map @@ -11,3 +11,10 @@ DPDK_2.0 { local: *; }; + +DPDK_17.08 { + global: + + rte_ip_frag_table_destroy; + +} DPDK_2.0; diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c index a2259e80..8c5f5ec4 100644 --- a/lib/librte_ip_frag/rte_ipv4_fragmentation.c +++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c @@ -48,7 +48,7 @@ #define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) #define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) -#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1) +#define IPV4_HDR_FO_ALIGN (1 << IPV4_HDR_FO_SHIFT) static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, @@ -103,11 +103,14 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, uint32_t out_pkt_pos, in_seg_data_pos; uint32_t more_in_segs; uint16_t fragment_offset, flag_offset, frag_size; + uint16_t frag_bytes_remaining; - frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); - - /* Fragment size should be a multiply of 8. */ - RTE_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); + /* + * Ensure the IP payload length of all fragments is aligned to a + * multiple of 8 bytes as per RFC791 section 2.3. + */ + frag_size = RTE_ALIGN_FLOOR((mtu_size - sizeof(struct ipv4_hdr)), + IPV4_HDR_FO_ALIGN); in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *); flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); @@ -142,6 +145,7 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, /* Reserve space for the IP header that will be built later */ out_pkt->data_len = sizeof(struct ipv4_hdr); out_pkt->pkt_len = sizeof(struct ipv4_hdr); + frag_bytes_remaining = frag_size; out_seg_prev = out_pkt; more_out_segs = 1; @@ -161,7 +165,7 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, /* Prepare indirect buffer */ rte_pktmbuf_attach(out_seg, in_seg); - len = mtu_size - out_pkt->pkt_len; + len = frag_bytes_remaining; if (len > (in_seg->data_len - in_seg_data_pos)) { len = in_seg->data_len - in_seg_data_pos; } @@ -171,9 +175,10 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, out_pkt->pkt_len); out_pkt->nb_segs += 1; in_seg_data_pos += len; + frag_bytes_remaining -= len; /* Current output packet (i.e. fragment) done ? */ - if (unlikely(out_pkt->pkt_len >= mtu_size)) + if (unlikely(frag_bytes_remaining == 0)) more_out_segs = 0; /* Current input segment done ? */ diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c index e084ca59..b1330896 100644 --- a/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -118,7 +118,7 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) * Pointer to the IPV4 header inside the fragment. * @return * Pointer to mbuf for reassebled packet, or NULL if: - * - an error occured. + * - an error occurred. * - not all fragments of the packet are collected yet. */ struct rte_mbuf * diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c index 21a5ef5d..dde58cb7 100644 --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -155,7 +155,7 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) * Pointer to the IPV6 fragment extension header. * @return * Pointer to mbuf for reassembled packet, or NULL if: - * - an error occured. + * - an error occurred. * - not all fragments of the packet are collected yet. */ #define MORE_FRAGS(x) (((x) & 0x100) >> 8) diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h index b3686030..7e76fd50 100644 --- a/lib/librte_jobstats/rte_jobstats.h +++ b/lib/librte_jobstats/rte_jobstats.h @@ -98,7 +98,7 @@ struct rte_jobstats { } __rte_cache_aligned; struct rte_jobstats_context { - /** Viariable holding time at different points: + /** Variable holding time at different points: * -# loop start time if loop was started but no job executed yet. * -# job start time if job is currently executing. * -# job finish time if job finished its execution. diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index c3f9208c..8c483c1f 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -119,7 +119,7 @@ struct rte_kni_memzone_pool { uint32_t max_ifaces; /**< Max. num of KNI ifaces */ struct rte_kni_memzone_slot *slots; /**< Pool slots */ - rte_spinlock_t mutex; /**< alloc/relase mutex */ + rte_spinlock_t mutex; /**< alloc/release mutex */ /* Free memzone slots linked-list */ struct rte_kni_memzone_slot *free; /**< First empty slot */ @@ -624,6 +624,7 @@ kni_allocate_mbufs(struct rte_kni *kni) int i, ret; struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM]; void *phys[MAX_MBUF_BURST_NUM]; + int allocq_free; RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) != offsetof(struct rte_kni_mbuf, pool)); @@ -646,7 +647,9 @@ kni_allocate_mbufs(struct rte_kni *kni) return; } - for (i = 0; i < MAX_MBUF_BURST_NUM; i++) { + allocq_free = (kni->alloc_q->read - kni->alloc_q->write - 1) \ + & (MAX_MBUF_BURST_NUM - 1); + for (i = 0; i < allocq_free; i++) { pkts[i] = rte_pktmbuf_alloc(kni->pktmbuf_pool); if (unlikely(pkts[i] == NULL)) { /* Out of memory */ diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c index 978ac601..64c074e9 100644 --- a/lib/librte_lpm/rte_lpm.c +++ b/lib/librte_lpm/rte_lpm.c @@ -36,7 +36,6 @@ #include <errno.h> #include <stdarg.h> #include <stdio.h> -#include <errno.h> #include <sys/queue.h> #include <rte_log.h> diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c index 9cc7be77..b4a7df34 100644 --- a/lib/librte_lpm/rte_lpm6.c +++ b/lib/librte_lpm/rte_lpm6.c @@ -35,7 +35,6 @@ #include <errno.h> #include <stdarg.h> #include <stdio.h> -#include <errno.h> #include <sys/queue.h> #include <rte_log.h> diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h index 7efd9a0d..4fd33f33 100644 --- a/lib/librte_lpm/rte_lpm_neon.h +++ b/lib/librte_lpm/rte_lpm_neon.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2015 Cavium Networks. All rights reserved. + * Copyright(c) 2015 Cavium, Inc. All rights reserved. * All rights reserved. * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. @@ -19,7 +19,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium Networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h index ef33c6a1..5f2c4d4a 100644 --- a/lib/librte_lpm/rte_lpm_sse.h +++ b/lib/librte_lpm/rte_lpm_sse.h @@ -78,7 +78,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], /* extract values from tbl24[] */ idx = _mm_cvtsi128_si64(i24); - i24 = _mm_srli_si128(i24, sizeof(uint64_t)); + /* With -O0 option, gcc 4.8 - 5.4 fails to fold sizeof() into a constant */ + i24 = _mm_srli_si128(i24, /* sizeof(uint64_t) */ 8); ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx]; tbl[0] = *ptbl; diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c index 0e3e36a5..26a62b8e 100644 --- a/lib/librte_mbuf/rte_mbuf.c +++ b/lib/librte_mbuf/rte_mbuf.c @@ -131,8 +131,7 @@ rte_pktmbuf_init(struct rte_mempool *mp, RTE_ASSERT(mp->elt_size >= mbuf_size); RTE_ASSERT(buf_len <= UINT16_MAX); - memset(m, 0, mp->elt_size); - + memset(m, 0, mbuf_size); /* start of buffer is after mbuf structure and priv data */ m->priv_size = priv_size; m->buf_addr = (char *)m + mbuf_size; @@ -409,6 +408,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE"; case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; + case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP"; case PKT_TX_MACSEC: return "PKT_TX_MACSEC"; default: return NULL; } @@ -440,6 +440,8 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) "PKT_TX_TUNNEL_NONE" }, { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL }, }; const char *name; diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 1cb03109..eaed7eee 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -208,6 +208,8 @@ extern "C" { #define PKT_TX_TUNNEL_GRE (0x2ULL << 45) #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45) #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) +/**< TX packet with MPLS-in-UDP RFC 7510 header. */ +#define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45) /* add new TX TUNNEL type here */ #define PKT_TX_TUNNEL_MASK (0xFULL << 45) @@ -840,7 +842,7 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) * @param m * The mbuf to be freed. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_mbuf_raw_free(struct rte_mbuf *m) { RTE_ASSERT(RTE_MBUF_DIRECT(m)); @@ -1136,6 +1138,7 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp) * Array size * @return * - 0: Success + * - -ENOENT: Not enough entries in the mempool; no mbufs are retrieved. */ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool, struct rte_mbuf **mbufs, unsigned count) @@ -1287,8 +1290,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) * - (m) if it is the last reference. It can be recycled or freed. * - (NULL) if the mbuf still has remaining references on it. */ -__attribute__((always_inline)) -static inline struct rte_mbuf * +static __rte_always_inline struct rte_mbuf * rte_pktmbuf_prefree_seg(struct rte_mbuf *m) { __rte_mbuf_sanity_check(m, 0); @@ -1339,7 +1341,7 @@ __rte_pktmbuf_prefree_seg(struct rte_mbuf *m) * @param m * The packet mbuf segment to be freed. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_pktmbuf_free_seg(struct rte_mbuf *m) { m = rte_pktmbuf_prefree_seg(m); @@ -1453,7 +1455,7 @@ static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) */ static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m) { - __rte_mbuf_sanity_check(m, 1); + __rte_mbuf_sanity_check(m, 0); return m->data_off; } @@ -1467,7 +1469,7 @@ static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m) */ static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m) { - __rte_mbuf_sanity_check(m, 1); + __rte_mbuf_sanity_check(m, 0); return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) - m->data_len); } diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h index a3269c4c..acd70bb6 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.h +++ b/lib/librte_mbuf/rte_mbuf_ptype.h @@ -341,11 +341,11 @@ extern "C" { * Packet format: * <'ether type'=0x0800 * | 'version'=4, 'protocol'=17 - * | 'destination port'=4798> + * | 'destination port'=4789> * or, * <'ether type'=0x86DD * | 'version'=6, 'next header'=17 - * | 'destination port'=4798> + * | 'destination port'=4789> */ #define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 /** diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index f65310f6..6fc3c9c7 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -476,7 +476,7 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, /* required for xen_dom0 to get the machine address */ paddr = rte_mem_phy2mch(-1, paddr); - if (paddr == RTE_BAD_PHYS_ADDR) { + if (paddr == RTE_BAD_PHYS_ADDR && rte_eal_has_hugepages()) { ret = -EINVAL; goto fail; } diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 48bc8ea3..76b5b3b1 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -993,7 +993,7 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache); * @param mp * A pointer to the mempool. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_mempool_cache_flush(struct rte_mempool_cache *cache, struct rte_mempool *mp) { @@ -1011,7 +1011,7 @@ rte_mempool_cache_flush(struct rte_mempool_cache *cache, * @return * A pointer to the mempool cache or NULL if disabled or non-EAL thread. */ -static inline struct rte_mempool_cache *__attribute__((always_inline)) +static __rte_always_inline struct rte_mempool_cache * rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) { if (mp->cache_size == 0) @@ -1038,7 +1038,7 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) * The flags used for the mempool creation. * Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, unsigned n, struct rte_mempool_cache *cache) { @@ -1100,7 +1100,7 @@ ring_enqueue: * The flags used for the mempool creation. * Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, unsigned n, struct rte_mempool_cache *cache, __rte_unused int flags) @@ -1123,7 +1123,7 @@ rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, * @param n * The number of objects to add in the mempool from obj_table. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, unsigned n) { @@ -1144,7 +1144,7 @@ rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, * @param obj * A pointer to the object to be added. */ -static inline void __attribute__((always_inline)) +static __rte_always_inline void rte_mempool_put(struct rte_mempool *mp, void *obj) { rte_mempool_put_bulk(mp, &obj, 1); @@ -1167,7 +1167,7 @@ rte_mempool_put(struct rte_mempool *mp, void *obj) * - >=0: Success; number of objects supplied. * - <0: Error; code of ring dequeue function. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int __mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, struct rte_mempool_cache *cache) { @@ -1248,7 +1248,7 @@ ring_dequeue: * - 0: Success; objects taken. * - -ENOENT: Not enough entries in the mempool; no object is retrieved. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, struct rte_mempool_cache *cache, __rte_unused int flags) { @@ -1281,7 +1281,7 @@ rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, * - 0: Success; objects taken * - -ENOENT: Not enough entries in the mempool; no object is retrieved. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) { struct rte_mempool_cache *cache; @@ -1309,7 +1309,7 @@ rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) * - 0: Success; objects taken. * - -ENOENT: Not enough entries in the mempool; no object is retrieved. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_mempool_get(struct rte_mempool *mp, void **obj_p) { return rte_mempool_get_bulk(mp, obj_p, 1); diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c index e9a122c1..b66a72bb 100644 --- a/lib/librte_metrics/rte_metrics.c +++ b/lib/librte_metrics/rte_metrics.c @@ -144,6 +144,8 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names) entry = &stats->metadata[idx_name + stats->cnt_stats]; strncpy(entry->name, names[idx_name], RTE_METRICS_MAX_NAME_LEN); + /* Enforce NULL-termination */ + entry->name[RTE_METRICS_MAX_NAME_LEN - 1] = '\0'; memset(entry->value, 0, sizeof(entry->value)); entry->idx_next_stat = idx_name + stats->cnt_stats + 1; } @@ -176,7 +178,7 @@ rte_metrics_update_values(int port_id, uint16_t cnt_setsize; if (port_id != RTE_METRICS_GLOBAL && - (port_id < 0 || port_id > RTE_MAX_ETHPORTS)) + (port_id < 0 || port_id >= RTE_MAX_ETHPORTS)) return -EINVAL; if (values == NULL) @@ -263,7 +265,7 @@ rte_metrics_get_values(int port_id, int return_value; if (port_id != RTE_METRICS_GLOBAL && - (port_id < 0 || port_id > RTE_MAX_ETHPORTS)) + (port_id < 0 || port_id >= RTE_MAX_ETHPORTS)) return -EINVAL; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); diff --git a/lib/librte_metrics/rte_metrics.h b/lib/librte_metrics/rte_metrics.h index 0fa3104e..297300ad 100644 --- a/lib/librte_metrics/rte_metrics.h +++ b/lib/librte_metrics/rte_metrics.h @@ -118,7 +118,8 @@ void rte_metrics_init(int socket_id); * is required for updating said metric's value. * * @param name - * Metric name + * Metric name. If this exceeds RTE_METRICS_MAX_NAME_LEN (including + * the NULL terminator), it is truncated. * * @return * - Zero or positive: Success (index key of new metric) diff --git a/lib/librte_net/net_crc_neon.h b/lib/librte_net/net_crc_neon.h new file mode 100644 index 00000000..201b2c88 --- /dev/null +++ b/lib/librte_net/net_crc_neon.h @@ -0,0 +1,297 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_CRC_NEON_H_ +#define _NET_CRC_NEON_H_ + +#include <rte_branch_prediction.h> +#include <rte_net_crc.h> +#include <rte_vect.h> +#include <rte_cpuflags.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** PMULL CRC computation context structure */ +struct crc_pmull_ctx { + uint64x2_t rk1_rk2; + uint64x2_t rk5_rk6; + uint64x2_t rk7_rk8; +}; + +struct crc_pmull_ctx crc32_eth_pmull __rte_aligned(16); +struct crc_pmull_ctx crc16_ccitt_pmull __rte_aligned(16); + +/** + * @brief Performs one folding round + * + * Logically function operates as follows: + * DATA = READ_NEXT_16BYTES(); + * F1 = LSB8(FOLD) + * F2 = MSB8(FOLD) + * T1 = CLMUL(F1, RK1) + * T2 = CLMUL(F2, RK2) + * FOLD = XOR(T1, T2, DATA) + * + * @param data_block 16 byte data block + * @param precomp precomputed rk1 constanst + * @param fold running 16 byte folded data + * + * @return New 16 byte folded data + */ +static inline uint64x2_t +crcr32_folding_round(uint64x2_t data_block, uint64x2_t precomp, + uint64x2_t fold) +{ + uint64x2_t tmp0 = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(fold), 1), + vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 0))); + + uint64x2_t tmp1 = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(fold), 0), + vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 1))); + + return veorq_u64(tmp1, veorq_u64(data_block, tmp0)); +} + +/** + * Performs reduction from 128 bits to 64 bits + * + * @param data128 128 bits data to be reduced + * @param precomp rk5 and rk6 precomputed constants + * + * @return data reduced to 64 bits + */ +static inline uint64x2_t +crcr32_reduce_128_to_64(uint64x2_t data128, + uint64x2_t precomp) +{ + uint64x2_t tmp0, tmp1, tmp2; + + /* 64b fold */ + tmp0 = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(data128), 0), + vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 0))); + tmp1 = vshift_bytes_right(data128, 8); + tmp0 = veorq_u64(tmp0, tmp1); + + /* 32b fold */ + tmp2 = vshift_bytes_left(tmp0, 4); + tmp1 = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(tmp2), 0), + vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 1))); + + return veorq_u64(tmp1, tmp0); +} + +/** + * Performs Barret's reduction from 64 bits to 32 bits + * + * @param data64 64 bits data to be reduced + * @param precomp rk7 precomputed constant + * + * @return data reduced to 32 bits + */ +static inline uint32_t +crcr32_reduce_64_to_32(uint64x2_t data64, + uint64x2_t precomp) +{ + static uint32_t mask1[4] __rte_aligned(16) = { + 0xffffffff, 0xffffffff, 0x00000000, 0x00000000 + }; + static uint32_t mask2[4] __rte_aligned(16) = { + 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff + }; + uint64x2_t tmp0, tmp1, tmp2; + + tmp0 = vandq_u64(data64, vld1q_u64((uint64_t *)mask2)); + + tmp1 = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(tmp0), 0), + vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 0))); + tmp1 = veorq_u64(tmp1, tmp0); + tmp1 = vandq_u64(tmp1, vld1q_u64((uint64_t *)mask1)); + + tmp2 = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(tmp1), 0), + vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 1))); + tmp2 = veorq_u64(tmp2, tmp1); + tmp2 = veorq_u64(tmp2, tmp0); + + return vgetq_lane_u32(vreinterpretq_u32_u64(tmp2), 2); +} + +static inline uint32_t +crc32_eth_calc_pmull( + const uint8_t *data, + uint32_t data_len, + uint32_t crc, + const struct crc_pmull_ctx *params) +{ + uint64x2_t temp, fold, k; + uint32_t n; + + /* Get CRC init value */ + temp = vreinterpretq_u64_u32(vsetq_lane_u32(crc, vmovq_n_u32(0), 0)); + + /** + * Folding all data into single 16 byte data block + * Assumes: fold holds first 16 bytes of data + */ + if (unlikely(data_len < 32)) { + if (unlikely(data_len == 16)) { + /* 16 bytes */ + fold = vld1q_u64((const uint64_t *)data); + fold = veorq_u64(fold, temp); + goto reduction_128_64; + } + + if (unlikely(data_len < 16)) { + /* 0 to 15 bytes */ + uint8_t buffer[16] __rte_aligned(16); + + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, data, data_len); + + fold = vld1q_u64((uint64_t *)buffer); + fold = veorq_u64(fold, temp); + if (unlikely(data_len < 4)) { + fold = vshift_bytes_left(fold, 8 - data_len); + goto barret_reduction; + } + fold = vshift_bytes_left(fold, 16 - data_len); + goto reduction_128_64; + } + /* 17 to 31 bytes */ + fold = vld1q_u64((const uint64_t *)data); + fold = veorq_u64(fold, temp); + n = 16; + k = params->rk1_rk2; + goto partial_bytes; + } + + /** At least 32 bytes in the buffer */ + /** Apply CRC initial value */ + fold = vld1q_u64((const uint64_t *)data); + fold = veorq_u64(fold, temp); + + /** Main folding loop - the last 16 bytes is processed separately */ + k = params->rk1_rk2; + for (n = 16; (n + 16) <= data_len; n += 16) { + temp = vld1q_u64((const uint64_t *)&data[n]); + fold = crcr32_folding_round(temp, k, fold); + } + +partial_bytes: + if (likely(n < data_len)) { + uint64x2_t last16, a, b, mask; + uint32_t rem = data_len & 15; + + last16 = vld1q_u64((const uint64_t *)&data[data_len - 16]); + a = vshift_bytes_left(fold, 16 - rem); + b = vshift_bytes_right(fold, rem); + mask = vshift_bytes_left(vdupq_n_u64(-1), 16 - rem); + b = vorrq_u64(b, vandq_u64(mask, last16)); + + /* k = rk1 & rk2 */ + temp = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(a), 1), + vgetq_lane_p64(vreinterpretq_p64_u64(k), 0))); + fold = vreinterpretq_u64_p128(vmull_p64( + vgetq_lane_p64(vreinterpretq_p64_u64(a), 0), + vgetq_lane_p64(vreinterpretq_p64_u64(k), 1))); + fold = veorq_u64(fold, temp); + fold = veorq_u64(fold, b); + } + + /** Reduction 128 -> 32 Assumes: fold holds 128bit folded data */ +reduction_128_64: + k = params->rk5_rk6; + fold = crcr32_reduce_128_to_64(fold, k); + +barret_reduction: + k = params->rk7_rk8; + n = crcr32_reduce_64_to_32(fold, k); + + return n; +} + +static inline void +rte_net_crc_neon_init(void) +{ + /* Initialize CRC16 data */ + uint64_t ccitt_k1_k2[2] = {0x189aeLLU, 0x8e10LLU}; + uint64_t ccitt_k5_k6[2] = {0x189aeLLU, 0x114aaLLU}; + uint64_t ccitt_k7_k8[2] = {0x11c581910LLU, 0x10811LLU}; + + /* Initialize CRC32 data */ + uint64_t eth_k1_k2[2] = {0xccaa009eLLU, 0x1751997d0LLU}; + uint64_t eth_k5_k6[2] = {0xccaa009eLLU, 0x163cd6124LLU}; + uint64_t eth_k7_k8[2] = {0x1f7011640LLU, 0x1db710641LLU}; + + /** Save the params in context structure */ + crc16_ccitt_pmull.rk1_rk2 = vld1q_u64(ccitt_k1_k2); + crc16_ccitt_pmull.rk5_rk6 = vld1q_u64(ccitt_k5_k6); + crc16_ccitt_pmull.rk7_rk8 = vld1q_u64(ccitt_k7_k8); + + /** Save the params in context structure */ + crc32_eth_pmull.rk1_rk2 = vld1q_u64(eth_k1_k2); + crc32_eth_pmull.rk5_rk6 = vld1q_u64(eth_k5_k6); + crc32_eth_pmull.rk7_rk8 = vld1q_u64(eth_k7_k8); +} + +static inline uint32_t +rte_crc16_ccitt_neon_handler(const uint8_t *data, + uint32_t data_len) +{ + return (uint16_t)~crc32_eth_calc_pmull(data, + data_len, + 0xffff, + &crc16_ccitt_pmull); +} + +static inline uint32_t +rte_crc32_eth_neon_handler(const uint8_t *data, + uint32_t data_len) +{ + return ~crc32_eth_calc_pmull(data, + data_len, + 0xffffffffUL, + &crc32_eth_pmull); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _NET_CRC_NEON_H_ */ diff --git a/lib/librte_net/net_crc_sse.h b/lib/librte_net/net_crc_sse.h index 8bce522a..ac93637b 100644 --- a/lib/librte_net/net_crc_sse.h +++ b/lib/librte_net/net_crc_sse.h @@ -73,7 +73,7 @@ struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16); * @return * New 16 byte folded data */ -static inline __attribute__((always_inline)) __m128i +static __rte_always_inline __m128i crcr32_folding_round(__m128i data_block, __m128i precomp, __m128i fold) @@ -96,7 +96,7 @@ crcr32_folding_round(__m128i data_block, * 64 bits reduced data */ -static inline __attribute__((always_inline)) __m128i +static __rte_always_inline __m128i crcr32_reduce_128_to_64(__m128i data128, __m128i precomp) { __m128i tmp0, tmp1, tmp2; @@ -125,7 +125,7 @@ crcr32_reduce_128_to_64(__m128i data128, __m128i precomp) * reduced 32 bits data */ -static inline __attribute__((always_inline)) uint32_t +static __rte_always_inline uint32_t crcr32_reduce_64_to_32(__m128i data64, __m128i precomp) { static const uint32_t mask1[4] __rte_aligned(16) = { @@ -171,7 +171,7 @@ static const uint8_t crc_xmm_shift_tab[48] __rte_aligned(16) = { * reg << (num * 8) */ -static inline __attribute__((always_inline)) __m128i +static __rte_always_inline __m128i xmm_shift_left(__m128i reg, const unsigned int num) { const __m128i *p = (const __m128i *)(crc_xmm_shift_tab + 16 - num); @@ -179,7 +179,7 @@ xmm_shift_left(__m128i reg, const unsigned int num) return _mm_shuffle_epi8(reg, _mm_loadu_si128(p)); } -static inline __attribute__((always_inline)) uint32_t +static __rte_always_inline uint32_t crc32_eth_calc_pclmulqdq( const uint8_t *data, uint32_t data_len, diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 9d1ee63f..661fe322 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -39,14 +39,16 @@ #include <rte_common.h> #include <rte_net_crc.h> -#if defined(RTE_ARCH_X86_64) \ - && defined(RTE_MACHINE_CPUFLAG_SSE4_2) \ - && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) +#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) #define X86_64_SSE42_PCLMULQDQ 1 +#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) +#define ARM64_NEON_PMULL 1 #endif #ifdef X86_64_SSE42_PCLMULQDQ #include <net_crc_sse.h> +#elif defined ARM64_NEON_PMULL +#include <net_crc_neon.h> #endif /* crc tables */ @@ -74,6 +76,11 @@ static rte_net_crc_handler handlers_sse42[] = { [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler, [RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler, }; +#elif defined ARM64_NEON_PMULL +static rte_net_crc_handler handlers_neon[] = { + [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_neon_handler, + [RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler, +}; #endif /** @@ -116,7 +123,7 @@ crc32_eth_init_lut(uint32_t poly, } } -static inline __attribute__((always_inline)) uint32_t +static __rte_always_inline uint32_t crc32_eth_calc_lut(const uint8_t *data, uint32_t data_len, uint32_t crc, @@ -162,14 +169,21 @@ void rte_net_crc_set_alg(enum rte_net_crc_alg alg) { switch (alg) { - case RTE_NET_CRC_SSE42: #ifdef X86_64_SSE42_PCLMULQDQ + case RTE_NET_CRC_SSE42: handlers = handlers_sse42; -#else - alg = RTE_NET_CRC_SCALAR; -#endif break; +#elif defined ARM64_NEON_PMULL + /* fall-through */ + case RTE_NET_CRC_NEON: + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { + handlers = handlers_neon; + break; + } +#endif + /* fall-through */ case RTE_NET_CRC_SCALAR: + /* fall-through */ default: handlers = handlers_scalar; break; @@ -199,8 +213,13 @@ rte_net_crc_init(void) rte_net_crc_scalar_init(); #ifdef X86_64_SSE42_PCLMULQDQ - alg = RTE_NET_CRC_SSE42; - rte_net_crc_sse42_init(); + alg = RTE_NET_CRC_SSE42; + rte_net_crc_sse42_init(); +#elif defined ARM64_NEON_PMULL + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { + alg = RTE_NET_CRC_NEON; + rte_net_crc_neon_init(); + } #endif rte_net_crc_set_alg(alg); diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h index d22286c6..d01cf4b4 100644 --- a/lib/librte_net/rte_net_crc.h +++ b/lib/librte_net/rte_net_crc.h @@ -57,6 +57,7 @@ enum rte_net_crc_type { enum rte_net_crc_alg { RTE_NET_CRC_SCALAR = 0, RTE_NET_CRC_SSE42, + RTE_NET_CRC_NEON, }; /** @@ -68,6 +69,7 @@ enum rte_net_crc_alg { * This parameter is used to select the CRC implementation version. * - RTE_NET_CRC_SCALAR * - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic) + * - RTE_NET_CRC_NEON (Use ARM Neon intrinsic) */ void rte_net_crc_set_alg(enum rte_net_crc_alg alg); diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index b599d65d..729e79a3 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -46,7 +46,6 @@ #include <rte_lcore.h> #include <rte_log.h> #include <rte_errno.h> -#include <rte_pci.h> #include "rte_pdump.h" diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c index 64bd965f..a4e709c9 100644 --- a/lib/librte_port/rte_port_ring.c +++ b/lib/librte_port/rte_port_ring.c @@ -293,7 +293,7 @@ rte_port_ring_multi_writer_tx(void *port, struct rte_mbuf *pkt) return 0; } -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_port_ring_writer_tx_bulk_internal(void *port, struct rte_mbuf **pkts, uint64_t pkts_mask, @@ -609,7 +609,7 @@ rte_port_ring_multi_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) return 0; } -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_port_ring_writer_nodrop_tx_bulk_internal(void *port, struct rte_mbuf **pkts, uint64_t pkts_mask, diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h index 737e0554..4cd8de76 100644 --- a/lib/librte_reorder/rte_reorder.h +++ b/lib/librte_reorder/rte_reorder.h @@ -147,7 +147,7 @@ rte_reorder_free(struct rte_reorder_buffer *b); * -1 on error * On error case, rte_errno will be set appropriately: * - ENOSPC - Cannot move existing mbufs from reorder buffer to accommodate - * ealry mbuf, but it can be accomodated by performing drain and then insert. + * ealry mbuf, but it can be accommodated by performing drain and then insert. * - ERANGE - Too early or late mbuf which is vastly out of range of expected * window should be ingnored without any handling. */ diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c index 5f98c33f..036467f4 100644 --- a/lib/librte_ring/rte_ring.c +++ b/lib/librte_ring/rte_ring.c @@ -140,8 +140,22 @@ rte_ring_init(struct rte_ring *r, const char *name, unsigned count, r->flags = flags; r->prod.single = (flags & RING_F_SP_ENQ) ? __IS_SP : __IS_MP; r->cons.single = (flags & RING_F_SC_DEQ) ? __IS_SC : __IS_MC; - r->size = count; - r->mask = count - 1; + + if (flags & RING_F_EXACT_SZ) { + r->size = rte_align32pow2(count + 1); + r->mask = r->size - 1; + r->capacity = count; + } else { + if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK)) { + RTE_LOG(ERR, RING, + "Requested size is invalid, must be power of 2, and not exceed the size limit %u\n", + RTE_RING_SZ_MASK); + return -EINVAL; + } + r->size = count; + r->mask = count - 1; + r->capacity = r->mask; + } r->prod.head = r->cons.head = 0; r->prod.tail = r->cons.tail = 0; @@ -160,10 +174,15 @@ rte_ring_create(const char *name, unsigned count, int socket_id, ssize_t ring_size; int mz_flags = 0; struct rte_ring_list* ring_list = NULL; + const unsigned int requested_count = count; int ret; ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + /* for an exact size ring, round up from count to a power of two */ + if (flags & RING_F_EXACT_SZ) + count = rte_align32pow2(count + 1); + ring_size = rte_ring_get_memsize(count); if (ring_size < 0) { rte_errno = ring_size; @@ -189,12 +208,13 @@ rte_ring_create(const char *name, unsigned count, int socket_id, /* reserve a memory zone for this ring. If we can't get rte_config or * we are secondary process, the memzone_reserve function will set * rte_errno for us appropriately - hence no check in this this function */ - mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags); + mz = rte_memzone_reserve_aligned(mz_name, ring_size, socket_id, + mz_flags, __alignof__(*r)); if (mz != NULL) { r = mz->addr; /* no need to check return value here, we already checked the * arguments above */ - rte_ring_init(r, name, count, flags); + rte_ring_init(r, name, requested_count, flags); te->data = (void *) r; r->memzone = mz; @@ -262,6 +282,7 @@ rte_ring_dump(FILE *f, const struct rte_ring *r) fprintf(f, "ring <%s>@%p\n", r->name, r); fprintf(f, " flags=%x\n", r->flags); fprintf(f, " size=%"PRIu32"\n", r->size); + fprintf(f, " capacity=%"PRIu32"\n", r->capacity); fprintf(f, " ct=%"PRIu32"\n", r->cons.tail); fprintf(f, " ch=%"PRIu32"\n", r->cons.head); fprintf(f, " pt=%"PRIu32"\n", r->prod.tail); diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 97f025a1..8f5a4937 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -101,6 +101,7 @@ extern "C" { #include <rte_atomic.h> #include <rte_branch_prediction.h> #include <rte_memzone.h> +#include <rte_pause.h> #define RTE_TAILQ_RING_NAME "RTE_RING" @@ -153,6 +154,7 @@ struct rte_ring { /**< Memzone, if any, containing the rte_ring */ uint32_t size; /**< Size of ring. */ uint32_t mask; /**< Mask (size-1) of ring. */ + uint32_t capacity; /**< Usable size of ring */ /** Ring producer status. */ struct rte_ring_headtail prod __rte_aligned(PROD_ALIGN); @@ -163,6 +165,15 @@ struct rte_ring { #define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */ #define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */ +/** + * Ring is to hold exactly requested number of entries. + * Without this flag set, the ring size requested must be a power of 2, and the + * usable space will be that size - 1. With the flag, the requested size will + * be rounded up to the next power of two, but the usable space will be exactly + * that requested. Worst case, if a power-of-2 size is requested, half the + * ring space will be wasted. + */ +#define RING_F_EXACT_SZ 0x0004 #define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */ /* @internal defines for passing to the enqueue dequeue worker functions */ @@ -345,7 +356,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r); } \ } while (0) -static inline __attribute__((always_inline)) void +static __rte_always_inline void update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, uint32_t single) { @@ -383,13 +394,13 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, * Actual number of objects enqueued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline __attribute__((always_inline)) unsigned int +static __rte_always_inline unsigned int __rte_ring_move_prod_head(struct rte_ring *r, int is_sp, unsigned int n, enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *new_head, uint32_t *free_entries) { - const uint32_t mask = r->mask; + const uint32_t capacity = r->capacity; unsigned int max = n; int success; @@ -399,11 +410,13 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp, *old_head = r->prod.head; const uint32_t cons_tail = r->cons.tail; - /* The subtraction is done between two unsigned 32bits value + /* + * The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * *old_head > cons_tail). So 'free_entries' is always between 0 - * and size(ring)-1. */ - *free_entries = (mask + cons_tail - *old_head); + * and capacity (which is < size). + */ + *free_entries = (capacity + cons_tail - *old_head); /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -443,7 +456,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp, * Actual number of objects enqueued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline __attribute__((always_inline)) unsigned int +static __rte_always_inline unsigned int __rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table, unsigned int n, enum rte_ring_queue_behavior behavior, int is_sp, unsigned int *free_space) @@ -489,7 +502,7 @@ end: * - Actual number of objects dequeued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline __attribute__((always_inline)) unsigned int +static __rte_always_inline unsigned int __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, unsigned int n, enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *new_head, @@ -548,7 +561,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, * - Actual number of objects dequeued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline __attribute__((always_inline)) unsigned int +static __rte_always_inline unsigned int __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table, unsigned int n, enum rte_ring_queue_behavior behavior, int is_sc, unsigned int *available) @@ -590,7 +603,7 @@ end: * @return * The number of objects enqueued, either 0 or n */ -static inline unsigned int __attribute__((always_inline)) +static __rte_always_inline unsigned int rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { @@ -613,7 +626,7 @@ rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, * @return * The number of objects enqueued, either 0 or n */ -static inline unsigned int __attribute__((always_inline)) +static __rte_always_inline unsigned int rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { @@ -640,7 +653,7 @@ rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, * @return * The number of objects enqueued, either 0 or n */ -static inline unsigned int __attribute__((always_inline)) +static __rte_always_inline unsigned int rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { @@ -662,7 +675,7 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, * - 0: Success; objects enqueued. * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_ring_mp_enqueue(struct rte_ring *r, void *obj) { return rte_ring_mp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; @@ -679,7 +692,7 @@ rte_ring_mp_enqueue(struct rte_ring *r, void *obj) * - 0: Success; objects enqueued. * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_ring_sp_enqueue(struct rte_ring *r, void *obj) { return rte_ring_sp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; @@ -700,7 +713,7 @@ rte_ring_sp_enqueue(struct rte_ring *r, void *obj) * - 0: Success; objects enqueued. * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_ring_enqueue(struct rte_ring *r, void *obj) { return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; @@ -724,7 +737,7 @@ rte_ring_enqueue(struct rte_ring *r, void *obj) * @return * The number of objects dequeued, either 0 or n */ -static inline unsigned int __attribute__((always_inline)) +static __rte_always_inline unsigned int rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { @@ -748,7 +761,7 @@ rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, * @return * The number of objects dequeued, either 0 or n */ -static inline unsigned int __attribute__((always_inline)) +static __rte_always_inline unsigned int rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { @@ -775,7 +788,7 @@ rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, * @return * The number of objects dequeued, either 0 or n */ -static inline unsigned int __attribute__((always_inline)) +static __rte_always_inline unsigned int rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { @@ -798,10 +811,10 @@ rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, * - -ENOENT: Not enough entries in the ring to dequeue; no object is * dequeued. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_mc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOBUFS; + return rte_ring_mc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; } /** @@ -816,10 +829,10 @@ rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) * - -ENOENT: Not enough entries in the ring to dequeue, no object is * dequeued. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_sc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOBUFS; + return rte_ring_sc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; } /** @@ -838,76 +851,71 @@ rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) * - -ENOENT: Not enough entries in the ring to dequeue, no object is * dequeued. */ -static inline int __attribute__((always_inline)) +static __rte_always_inline int rte_ring_dequeue(struct rte_ring *r, void **obj_p) { return rte_ring_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; } /** - * Test if a ring is full. + * Return the number of entries in a ring. * * @param r * A pointer to the ring structure. * @return - * - 1: The ring is full. - * - 0: The ring is not full. + * The number of entries in the ring. */ -static inline int -rte_ring_full(const struct rte_ring *r) +static inline unsigned +rte_ring_count(const struct rte_ring *r) { uint32_t prod_tail = r->prod.tail; uint32_t cons_tail = r->cons.tail; - return ((cons_tail - prod_tail - 1) & r->mask) == 0; + uint32_t count = (prod_tail - cons_tail) & r->mask; + return (count > r->capacity) ? r->capacity : count; } /** - * Test if a ring is empty. + * Return the number of free entries in a ring. * * @param r * A pointer to the ring structure. * @return - * - 1: The ring is empty. - * - 0: The ring is not empty. + * The number of free entries in the ring. */ -static inline int -rte_ring_empty(const struct rte_ring *r) +static inline unsigned +rte_ring_free_count(const struct rte_ring *r) { - uint32_t prod_tail = r->prod.tail; - uint32_t cons_tail = r->cons.tail; - return !!(cons_tail == prod_tail); + return r->capacity - rte_ring_count(r); } /** - * Return the number of entries in a ring. + * Test if a ring is full. * * @param r * A pointer to the ring structure. * @return - * The number of entries in the ring. + * - 1: The ring is full. + * - 0: The ring is not full. */ -static inline unsigned -rte_ring_count(const struct rte_ring *r) +static inline int +rte_ring_full(const struct rte_ring *r) { - uint32_t prod_tail = r->prod.tail; - uint32_t cons_tail = r->cons.tail; - return (prod_tail - cons_tail) & r->mask; + return rte_ring_free_count(r) == 0; } /** - * Return the number of free entries in a ring. + * Test if a ring is empty. * * @param r * A pointer to the ring structure. * @return - * The number of free entries in the ring. + * - 1: The ring is empty. + * - 0: The ring is not empty. */ -static inline unsigned -rte_ring_free_count(const struct rte_ring *r) +static inline int +rte_ring_empty(const struct rte_ring *r) { - uint32_t prod_tail = r->prod.tail; - uint32_t cons_tail = r->cons.tail; - return (cons_tail - prod_tail - 1) & r->mask; + return rte_ring_count(r) == 0; } /** @@ -916,7 +924,9 @@ rte_ring_free_count(const struct rte_ring *r) * @param r * A pointer to the ring structure. * @return - * The number of elements which can be stored in the ring. + * The size of the data store used by the ring. + * NOTE: this is not the same as the usable space in the ring. To query that + * use ``rte_ring_get_capacity()``. */ static inline unsigned int rte_ring_get_size(const struct rte_ring *r) @@ -925,6 +935,20 @@ rte_ring_get_size(const struct rte_ring *r) } /** + * Return the number of elements which can be stored in the ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The usable size of the ring. + */ +static inline unsigned int +rte_ring_get_capacity(const struct rte_ring *r) +{ + return r->capacity; +} + +/** * Dump the status of all rings on the console * * @param f @@ -962,7 +986,7 @@ struct rte_ring *rte_ring_lookup(const char *name); * @return * - n: Actual number of objects enqueued. */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { @@ -985,7 +1009,7 @@ rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, * @return * - n: Actual number of objects enqueued. */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { @@ -1012,7 +1036,7 @@ rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, * @return * - n: Actual number of objects enqueued. */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { @@ -1040,7 +1064,7 @@ rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, * @return * - n: Actual number of objects dequeued, 0 if ring is empty */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { @@ -1065,7 +1089,7 @@ rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, * @return * - n: Actual number of objects dequeued, 0 if ring is empty */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { @@ -1092,7 +1116,7 @@ rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, * @return * - Number of objects dequeued */ -static inline unsigned __attribute__((always_inline)) +static __rte_always_inline unsigned rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 614705d8..b7cba110 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -56,8 +56,10 @@ #ifdef RTE_SCHED_VECTOR #include <rte_vect.h> -#if defined(__SSE4__) +#ifdef RTE_ARCH_X86 #define SCHED_VECTOR_SSE4 +#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#define SCHED_VECTOR_NEON #endif #endif @@ -1732,6 +1734,26 @@ grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) return 1; } +#elif defined(SCHED_VECTOR_NEON) + +static inline int +grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +{ + uint32x4_t index, pipes; + uint32_t *pos = (uint32_t *)port->grinder_base_bmp_pos; + + index = vmovq_n_u32(base_pipe); + pipes = vld1q_u32(pos); + if (!vminvq_u32(veorq_u32(pipes, index))) + return 1; + + pipes = vld1q_u32(pos + 4); + if (!vminvq_u32(veorq_u32(pipes, index))) + return 1; + + return 0; +} + #else static inline int diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile index 0d06d36a..8ddc8804 100644 --- a/lib/librte_table/Makefile +++ b/lib/librte_table/Makefile @@ -69,6 +69,12 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h endif SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h +ifeq ($(CONFIG_RTE_ARCH_X86),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_x86.h +endif +ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_arm64.h +endif SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_array.h SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_stub.h diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h index e87e062d..93258ef4 100644 --- a/lib/librte_table/rte_lru.h +++ b/lib/librte_table/rte_lru.h @@ -38,31 +38,15 @@ extern "C" { #endif -#include <stdint.h> - -#ifdef __INTEL_COMPILER -#define GCC_VERSION (0) +#ifdef RTE_ARCH_X86_64 +#include "rte_lru_x86.h" +#elif defined(RTE_ARCH_ARM64) +#include "rte_lru_arm64.h" #else -#define GCC_VERSION (__GNUC__ * 10000+__GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) -#endif - -#ifndef RTE_TABLE_HASH_LRU_STRATEGY -#ifdef __SSE4_2__ -#define RTE_TABLE_HASH_LRU_STRATEGY 2 -#else /* if no SSE, use simple scalar version */ -#define RTE_TABLE_HASH_LRU_STRATEGY 1 -#endif -#endif - -#ifndef RTE_ARCH_X86_64 #undef RTE_TABLE_HASH_LRU_STRATEGY #define RTE_TABLE_HASH_LRU_STRATEGY 1 #endif -#if (RTE_TABLE_HASH_LRU_STRATEGY < 0) || (RTE_TABLE_HASH_LRU_STRATEGY > 3) -#error Invalid value for RTE_TABLE_HASH_LRU_STRATEGY -#endif - #if RTE_TABLE_HASH_LRU_STRATEGY == 0 #define lru_init(bucket) \ @@ -118,87 +102,11 @@ do { \ bucket->lru_list = x; \ } while (0) -#elif RTE_TABLE_HASH_LRU_STRATEGY == 2 - -#if GCC_VERSION > 40306 -#include <x86intrin.h> -#else -#include <emmintrin.h> -#include <smmintrin.h> -#include <xmmintrin.h> -#endif - -#define lru_init(bucket) \ -do \ - bucket->lru_list = 0x0000000100020003LLU; \ -while (0) - -#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) - -#define lru_update(bucket, mru_val) \ -do { \ - /* set up the masks for all possible shuffles, depends on pos */\ - static uint64_t masks[10] = { \ - /* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\ - 0x0100070605040302, 0x8080808080808080, \ - 0x0302070605040100, 0x8080808080808080, \ - 0x0504070603020100, 0x8080808080808080, \ - 0x0706050403020100, 0x8080808080808080, \ - 0x0706050403020100, 0x8080808080808080}; \ - /* load up one register with repeats of mru-val */ \ - uint64_t mru2 = mru_val; \ - uint64_t mru3 = mru2 | (mru2 << 16); \ - uint64_t lru = bucket->lru_list; \ - /* XOR to cause the word we're looking for to go to zero */ \ - uint64_t mru = lru ^ ((mru3 << 32) | mru3); \ - __m128i c = _mm_cvtsi64_si128(mru); \ - __m128i b = _mm_cvtsi64_si128(lru); \ - /* Find the minimum value (first zero word, if it's in there) */\ - __m128i d = _mm_minpos_epu16(c); \ - /* Second word is the index to found word (first word is the value) */\ - unsigned pos = _mm_extract_epi16(d, 1); \ - /* move the recently used location to top of list */ \ - __m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\ - /* Finally, update the original list with the reordered data */ \ - bucket->lru_list = _mm_extract_epi64(k, 0); \ - /* Phwew! */ \ -} while (0) - -#elif RTE_TABLE_HASH_LRU_STRATEGY == 3 +#elif (RTE_TABLE_HASH_LRU_STRATEGY == 2) || (RTE_TABLE_HASH_LRU_STRATEGY == 3) -#if GCC_VERSION > 40306 -#include <x86intrin.h> -#else -#include <emmintrin.h> -#include <smmintrin.h> -#include <xmmintrin.h> -#endif - -#define lru_init(bucket) \ -do \ - bucket->lru_list = ~0LLU; \ -while (0) - - -static inline int -f_lru_pos(uint64_t lru_list) -{ - __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list); - __m128i min = _mm_minpos_epu16(lst); - return _mm_extract_epi16(min, 1); -} -#define lru_pos(bucket) f_lru_pos(bucket->lru_list) - -#define lru_update(bucket, mru_val) \ -do { \ - const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \ - 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \ - const uint64_t decs[] = {0x1000100010001LLU, 0}; \ - __m128i lru = _mm_cvtsi64_si128(bucket->lru_list); \ - __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]); \ - lru = _mm_subs_epu16(lru, vdec); \ - bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val]; \ -} while (0) +/** + * These strategies are implemented in architecture specific header files. + */ #else diff --git a/lib/librte_table/rte_lru_arm64.h b/lib/librte_table/rte_lru_arm64.h new file mode 100644 index 00000000..61735238 --- /dev/null +++ b/lib/librte_table/rte_lru_arm64.h @@ -0,0 +1,88 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __RTE_LRU_ARM64_H__ +#define __RTE_LRU_ARM64_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <rte_vect.h> + +#ifndef RTE_TABLE_HASH_LRU_STRATEGY +#ifdef RTE_MACHINE_CPUFLAG_NEON +#define RTE_TABLE_HASH_LRU_STRATEGY 3 +#else /* if no NEON, use simple scalar version */ +#define RTE_TABLE_HASH_LRU_STRATEGY 1 +#endif +#endif + +#if RTE_TABLE_HASH_LRU_STRATEGY == 3 + +#define lru_init(bucket) \ + { bucket->lru_list = ~0LLU; } + +static inline int +f_lru_pos(uint64_t lru_list) +{ + /* Compare the vector to zero vector */ + uint16x4_t lru_vec = vld1_u16((uint16_t *)&lru_list); + uint16x4_t min_vec = vmov_n_u16(vminv_u16(lru_vec)); + uint64_t mask = vget_lane_u64(vreinterpret_u64_u16( + vceq_u16(min_vec, lru_vec)), 0); + return __builtin_clzl(mask) >> 4; +} +#define lru_pos(bucket) f_lru_pos(bucket->lru_list) + +#define lru_update(bucket, mru_val) \ +do { \ + const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \ + 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \ + const uint64_t decs[] = {0x1000100010001LLU, 0}; \ + uint64x1_t lru = vdup_n_u64(bucket->lru_list); \ + uint64x1_t vdec = vdup_n_u64(decs[mru_val>>2]); \ + bucket->lru_list = vget_lane_u64(vreinterpret_u64_u16( \ + vsub_u16(vreinterpret_u16_u64(lru), \ + vreinterpret_u16_u64(vdec))), \ + 0); \ + bucket->lru_list |= orvals[mru_val]; \ +} while (0) + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_lru_x86.h b/lib/librte_table/rte_lru_x86.h new file mode 100644 index 00000000..10f513cd --- /dev/null +++ b/lib/librte_table/rte_lru_x86.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_LRU_X86_H__ +#define __INCLUDE_RTE_LRU_X86_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +#ifndef RTE_TABLE_HASH_LRU_STRATEGY +#define RTE_TABLE_HASH_LRU_STRATEGY 2 +#endif + +#if RTE_TABLE_HASH_LRU_STRATEGY == 2 + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION > 40306) +#include <x86intrin.h> +#else +#include <emmintrin.h> +#include <smmintrin.h> +#include <xmmintrin.h> +#endif + +#define lru_init(bucket) \ + { bucket->lru_list = 0x0000000100020003LLU; } + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + /* set up the masks for all possible shuffles, depends on pos */\ + static uint64_t masks[10] = { \ + /* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\ + 0x0100070605040302, 0x8080808080808080, \ + 0x0302070605040100, 0x8080808080808080, \ + 0x0504070603020100, 0x8080808080808080, \ + 0x0706050403020100, 0x8080808080808080, \ + 0x0706050403020100, 0x8080808080808080}; \ + /* load up one register with repeats of mru-val */ \ + uint64_t mru2 = mru_val; \ + uint64_t mru3 = mru2 | (mru2 << 16); \ + uint64_t lru = bucket->lru_list; \ + /* XOR to cause the word we're looking for to go to zero */ \ + uint64_t mru = lru ^ ((mru3 << 32) | mru3); \ + __m128i c = _mm_cvtsi64_si128(mru); \ + __m128i b = _mm_cvtsi64_si128(lru); \ + /* Find the minimum value (first zero word, if it's in there) */\ + __m128i d = _mm_minpos_epu16(c); \ + /* Second word is the index to found word (first word is the value) */\ + unsigned int pos = _mm_extract_epi16(d, 1); \ + /* move the recently used location to top of list */ \ + __m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\ + /* Finally, update the original list with the reordered data */ \ + bucket->lru_list = _mm_extract_epi64(k, 0); \ + /* Phwew! */ \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 3 + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION > 40306) +#include <x86intrin.h> +#else +#include <emmintrin.h> +#include <smmintrin.h> +#include <xmmintrin.h> +#endif + +#define lru_init(bucket) \ + { bucket->lru_list = ~0LLU; } + +static inline int +f_lru_pos(uint64_t lru_list) +{ + __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list); + __m128i min = _mm_minpos_epu16(lst); + return _mm_extract_epi16(min, 1); +} +#define lru_pos(bucket) f_lru_pos(bucket->lru_list) + +#define lru_update(bucket, mru_val) \ +do { \ + const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \ + 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \ + const uint64_t decs[] = {0x1000100010001LLU, 0}; \ + __m128i lru = _mm_cvtsi64_si128(bucket->lru_list); \ + __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]); \ + lru = _mm_subs_epu16(lru, vdec); \ + bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val]; \ +} while (0) + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index 18782fab..5ee08408 100644 --- a/lib/librte_timer/rte_timer.c +++ b/lib/librte_timer/rte_timer.c @@ -46,11 +46,11 @@ #include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_branch_prediction.h> #include <rte_spinlock.h> #include <rte_random.h> +#include <rte_pause.h> #include "rte_timer.h" @@ -183,7 +183,7 @@ timer_set_running_state(struct rte_timer *tim) return -1; /* here, we know that timer is stopped or pending, - * mark it atomically as beeing configured */ + * mark it atomically as being configured */ status.state = RTE_TIMER_RUNNING; status.owner = (int16_t)lcore_id; success = rte_atomic32_cmpset(&tim->status.u32, diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 605e47cb..8c974eb1 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -120,7 +120,7 @@ struct vhost_device_ops { * @return * the host virtual address on success, 0 on failure */ -static inline uint64_t __attribute__((always_inline)) +static __rte_always_inline uint64_t rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) { struct rte_vhost_mem_region *reg; @@ -365,7 +365,7 @@ struct rte_mempool; /** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtual device. A packet - * count is returned to indicate the number of packets that were succesfully + * count is returned to indicate the number of packets that were successfully * added to the RX queue. * @param vid * vhost device ID @@ -432,6 +432,18 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring); +/** + * Get vhost RX queue avail count. + * + * @param vid + * vhost device ID + * @param qid + * virtio queue index in mq case + * @return + * num of desc available + */ +uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid); + #ifdef __cplusplus } #endif diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 07858732..1e704953 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -45,3 +45,10 @@ DPDK_17.05 { rte_vhost_log_write; } DPDK_16.07; + +DPDK_17.08 { + global: + + rte_vhost_rx_queue_count; + +} DPDK_17.05; diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index c7f99b08..41aa3f9b 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -445,13 +445,22 @@ vhost_user_reconnect_init(void) { int ret; - pthread_mutex_init(&reconn_list.mutex, NULL); + ret = pthread_mutex_init(&reconn_list.mutex, NULL); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex"); + return ret; + } TAILQ_INIT(&reconn_list.head); ret = pthread_create(&reconn_tid, NULL, vhost_user_client_reconnect, NULL); - if (ret < 0) + if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); + if (pthread_mutex_destroy(&reconn_list.mutex)) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to destroy reconnect mutex"); + } + } return ret; } @@ -613,8 +622,19 @@ rte_vhost_driver_register(const char *path, uint64_t flags) goto out; memset(vsocket, 0, sizeof(struct vhost_user_socket)); vsocket->path = strdup(path); + if (vsocket->path == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: failed to copy socket path string\n"); + free(vsocket); + goto out; + } TAILQ_INIT(&vsocket->conn_list); - pthread_mutex_init(&vsocket->conn_mutex, NULL); + ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: failed to init connection mutex\n"); + goto out_free; + } vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; /* @@ -636,9 +656,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); if (vsocket->reconnect && reconn_tid == 0) { if (vhost_user_reconnect_init() < 0) { - free(vsocket->path); - free(vsocket); - goto out; + goto out_mutex; } } } else { @@ -646,13 +664,22 @@ rte_vhost_driver_register(const char *path, uint64_t flags) } ret = create_unix_socket(vsocket); if (ret < 0) { - free(vsocket->path); - free(vsocket); - goto out; + goto out_mutex; } vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; + pthread_mutex_unlock(&vhost_user.mutex); + return ret; + +out_mutex: + if (pthread_mutex_destroy(&vsocket->conn_mutex)) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: failed to destroy connection mutex\n"); + } +out_free: + free(vsocket->path); + free(vsocket); out: pthread_mutex_unlock(&vhost_user.mutex); @@ -724,6 +751,7 @@ rte_vhost_driver_unregister(const char *path) } pthread_mutex_unlock(&vsocket->conn_mutex); + pthread_mutex_destroy(&vsocket->conn_mutex); free(vsocket->path); free(vsocket); diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 1f565fbb..0b6aa1cc 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -40,6 +40,7 @@ #include <numaif.h> #endif +#include <rte_errno.h> #include <rte_ethdev.h> #include <rte_log.h> #include <rte_string_fns.h> @@ -272,7 +273,7 @@ rte_vhost_get_mtu(int vid, uint16_t *mtu) if (!(dev->flags & VIRTIO_DEV_READY)) return -EAGAIN; - if (!(dev->features & VIRTIO_NET_F_MTU)) + if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU))) return -ENOTSUP; *mtu = dev->mtu; @@ -295,7 +296,8 @@ rte_vhost_get_numa_node(int vid) MPOL_F_NODE | MPOL_F_ADDR); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to query numa node: %d\n", vid, ret); + "(%d) failed to query numa node: %s\n", + vid, rte_strerror(errno)); return -1; } @@ -475,3 +477,29 @@ rte_vhost_log_used_vring(int vid, uint16_t vring_idx, vhost_log_used_vring(dev, vq, offset, len); } + +uint32_t +rte_vhost_rx_queue_count(int vid, uint16_t qid) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (dev == NULL) + return 0; + + if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, qid); + return 0; + } + + vq = dev->virtqueue[qid]; + if (vq == NULL) + return 0; + + if (unlikely(vq->enabled == 0 || vq->avail == NULL)) + return 0; + + return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; +} diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index ddd8a9c4..6fe72aeb 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -201,13 +201,22 @@ struct virtio_net { #define VHOST_LOG_PAGE 4096 -static inline void __attribute__((always_inline)) +/* + * Atomically set a bit in memory. + */ +static __rte_always_inline void +vhost_set_bit(unsigned int nr, volatile uint8_t *addr) +{ + __sync_fetch_and_or_8(addr, (1U << nr)); +} + +static __rte_always_inline void vhost_log_page(uint8_t *log_base, uint64_t page) { - log_base[page / 8] |= 1 << (page % 8); + vhost_set_bit(page % 8, &log_base[page / 8]); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) { uint64_t page; @@ -229,7 +238,7 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) } } -static inline void __attribute__((always_inline)) +static __rte_always_inline void vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t offset, uint64_t len) { @@ -272,7 +281,7 @@ extern uint64_t VHOST_FEATURES; extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; /* Convert guest physical address to host physical address */ -static inline phys_addr_t __attribute__((always_inline)) +static __rte_always_inline phys_addr_t gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) { uint32_t i; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 28808815..ad2e8d38 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -168,8 +168,12 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) uint64_t vhost_features = 0; rte_vhost_driver_get_features(dev->ifname, &vhost_features); - if (features & ~vhost_features) + if (features & ~vhost_features) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) received invalid negotiated features.\n", + dev->vid); return -1; + } if ((dev->flags & VIRTIO_DEV_RUNNING) && dev->features != features) { if (dev->notify_ops->features_changed) @@ -197,11 +201,11 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) */ static int vhost_user_set_vring_num(struct virtio_net *dev, - struct vhost_vring_state *state) + VhostUserMsg *msg) { - struct vhost_virtqueue *vq = dev->virtqueue[state->index]; + struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; - vq->size = state->num; + vq->size = msg->payload.state.num; if (dev->dequeue_zero_copy) { vq->nr_zmbuf = 0; @@ -332,7 +336,7 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva) * This function then converts these to our address space. */ static int -vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) +vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) { struct vhost_virtqueue *vq; @@ -340,11 +344,11 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) return -1; /* addr->index refers to the queue index. The txq 1, rxq is 0. */ - vq = dev->virtqueue[addr->index]; + vq = dev->virtqueue[msg->payload.addr.index]; /* The addresses are converted from QEMU virtual to Vhost virtual. */ vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev, - addr->desc_user_addr); + msg->payload.addr.desc_user_addr); if (vq->desc == 0) { RTE_LOG(ERR, VHOST_CONFIG, "(%d) failed to find desc ring address.\n", @@ -352,11 +356,11 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) return -1; } - dev = numa_realloc(dev, addr->index); - vq = dev->virtqueue[addr->index]; + dev = numa_realloc(dev, msg->payload.addr.index); + vq = dev->virtqueue[msg->payload.addr.index]; vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, - addr->avail_user_addr); + msg->payload.addr.avail_user_addr); if (vq->avail == 0) { RTE_LOG(ERR, VHOST_CONFIG, "(%d) failed to find avail ring address.\n", @@ -365,7 +369,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) } vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev, - addr->used_user_addr); + msg->payload.addr.used_user_addr); if (vq->used == 0) { RTE_LOG(ERR, VHOST_CONFIG, "(%d) failed to find used ring address.\n", @@ -382,7 +386,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) vq->last_avail_idx = vq->used->idx; } - vq->log_guest_addr = addr->log_guest_addr; + vq->log_guest_addr = msg->payload.addr.log_guest_addr; LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", dev->vid, vq->desc); @@ -401,10 +405,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) */ static int vhost_user_set_vring_base(struct virtio_net *dev, - struct vhost_vring_state *state) + VhostUserMsg *msg) { - dev->virtqueue[state->index]->last_used_idx = state->num; - dev->virtqueue[state->index]->last_avail_idx = state->num; + dev->virtqueue[msg->payload.state.index]->last_used_idx = + msg->payload.state.num; + dev->virtqueue[msg->payload.state.index]->last_avail_idx = + msg->payload.state.num; return 0; } @@ -517,6 +523,13 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) dev->max_guest_pages = 8; dev->guest_pages = malloc(dev->max_guest_pages * sizeof(struct guest_page)); + if (dev->guest_pages == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to allocate memory " + "for dev->guest_pages\n", + dev->vid); + return -1; + } } dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) + @@ -695,9 +708,9 @@ free_zmbufs(struct vhost_virtqueue *vq) */ static int vhost_user_get_vring_base(struct virtio_net *dev, - struct vhost_vring_state *state) + VhostUserMsg *msg) { - struct vhost_virtqueue *vq = dev->virtqueue[state->index]; + struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; /* We have to stop the queue (virtio) if it is running. */ if (dev->flags & VIRTIO_DEV_RUNNING) { @@ -708,10 +721,11 @@ vhost_user_get_vring_base(struct virtio_net *dev, dev->flags &= ~VIRTIO_DEV_READY; /* Here we are safe to get the last used index */ - state->num = vq->last_used_idx; + msg->payload.state.num = vq->last_used_idx; RTE_LOG(INFO, VHOST_CONFIG, - "vring base idx:%d file:%d\n", state->index, state->num); + "vring base idx:%d file:%d\n", msg->payload.state.index, + msg->payload.state.num); /* * Based on current qemu vhost-user implementation, this message is * sent and only sent in vhost_vring_stop. @@ -736,18 +750,19 @@ vhost_user_get_vring_base(struct virtio_net *dev, */ static int vhost_user_set_vring_enable(struct virtio_net *dev, - struct vhost_vring_state *state) + VhostUserMsg *msg) { - int enable = (int)state->num; + int enable = (int)msg->payload.state.num; RTE_LOG(INFO, VHOST_CONFIG, "set queue enable: %d to qp idx: %d\n", - enable, state->index); + enable, msg->payload.state.index); if (dev->notify_ops->vring_state_changed) - dev->notify_ops->vring_state_changed(dev->vid, state->index, enable); + dev->notify_ops->vring_state_changed(dev->vid, + msg->payload.state.index, enable); - dev->virtqueue[state->index]->enabled = enable; + dev->virtqueue[msg->payload.state.index]->enabled = enable; return 0; } @@ -1036,17 +1051,17 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_SET_VRING_NUM: - vhost_user_set_vring_num(dev, &msg.payload.state); + vhost_user_set_vring_num(dev, &msg); break; case VHOST_USER_SET_VRING_ADDR: - vhost_user_set_vring_addr(dev, &msg.payload.addr); + vhost_user_set_vring_addr(dev, &msg); break; case VHOST_USER_SET_VRING_BASE: - vhost_user_set_vring_base(dev, &msg.payload.state); + vhost_user_set_vring_base(dev, &msg); break; case VHOST_USER_GET_VRING_BASE: - vhost_user_get_vring_base(dev, &msg.payload.state); + vhost_user_get_vring_base(dev, &msg); msg.size = sizeof(msg.payload.state); send_vhost_message(fd, &msg); break; @@ -1071,7 +1086,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_SET_VRING_ENABLE: - vhost_user_set_vring_enable(dev, &msg.payload.state); + vhost_user_set_vring_enable(dev, &msg); break; case VHOST_USER_SEND_RARP: vhost_user_send_rarp(dev, &msg); diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 48219e05..a5f0eeba 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -55,7 +55,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint16_t to, uint16_t from, uint16_t size) { @@ -67,7 +67,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, size * sizeof(struct vring_used_elem)); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) { uint16_t used_idx = vq->last_used_idx & (vq->size - 1); @@ -95,7 +95,7 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) sizeof(vq->used->idx)); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void update_shadow_used_ring(struct vhost_virtqueue *vq, uint16_t desc_idx, uint16_t len) { @@ -114,11 +114,16 @@ update_shadow_used_ring(struct vhost_virtqueue *vq, static void virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { - if (m_buf->ol_flags & PKT_TX_L4_MASK) { + uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK; + + if (m_buf->ol_flags & PKT_TX_TCP_SEG) + csum_l4 |= PKT_TX_TCP_CKSUM; + + if (csum_l4) { net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; - switch (m_buf->ol_flags & PKT_TX_L4_MASK) { + switch (csum_l4) { case PKT_TX_TCP_CKSUM: net_hdr->csum_offset = (offsetof(struct tcp_hdr, cksum)); @@ -138,6 +143,15 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); } + /* IP cksum verification cannot be bypassed, then calculate here */ + if (m_buf->ol_flags & PKT_TX_IP_CKSUM) { + struct ipv4_hdr *ipv4_hdr; + + ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct ipv4_hdr *, + m_buf->l2_len); + ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); + } + if (m_buf->ol_flags & PKT_TX_TCP_SEG) { if (m_buf->ol_flags & PKT_TX_IPV4) net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; @@ -153,7 +167,7 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } } -static inline int __attribute__((always_inline)) +static __rte_always_inline int copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, struct rte_mbuf *m, uint16_t desc_idx, uint32_t size) { @@ -233,11 +247,11 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, /** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that are succesfully + * count is returned to indicate the number of packets that are successfully * added to the RX queue. This function works when the mbuf is scattered, but * it doesn't support the mergeable feature. */ -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { @@ -335,7 +349,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, return count; } -static inline int __attribute__((always_inline)) +static __rte_always_inline int fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t avail_idx, uint32_t *vec_idx, struct buf_vector *buf_vec, uint16_t *desc_chain_head, @@ -424,7 +438,7 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } -static inline int __attribute__((always_inline)) +static __rte_always_inline int copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, struct buf_vector *buf_vec, uint16_t num_buffers) { @@ -512,7 +526,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, return 0; } -static inline uint32_t __attribute__((always_inline)) +static __rte_always_inline uint32_t virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { @@ -601,9 +615,11 @@ static inline bool virtio_net_with_host_offload(struct virtio_net *dev) { if (dev->features & - (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN | - VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | - VIRTIO_NET_F_HOST_UFO)) + ((1ULL << VIRTIO_NET_F_CSUM) | + (1ULL << VIRTIO_NET_F_HOST_ECN) | + (1ULL << VIRTIO_NET_F_HOST_TSO4) | + (1ULL << VIRTIO_NET_F_HOST_TSO6) | + (1ULL << VIRTIO_NET_F_HOST_UFO))) return true; return false; @@ -655,7 +671,7 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) } } -static inline void __attribute__((always_inline)) +static __rte_always_inline void vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) { uint16_t l4_proto = 0; @@ -743,13 +759,13 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac) return 0; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void put_zmbuf(struct zcopy_mbuf *zmbuf) { zmbuf->in_use = 0; } -static inline int __attribute__((always_inline)) +static __rte_always_inline int copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx, struct rte_mempool *mbuf_pool) @@ -899,7 +915,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, return 0; } -static inline void __attribute__((always_inline)) +static __rte_always_inline void update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t used_idx, uint32_t desc_idx) { @@ -910,7 +926,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, sizeof(vq->used->ring[used_idx])); } -static inline void __attribute__((always_inline)) +static __rte_always_inline void update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t count) { @@ -930,7 +946,7 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, eventfd_write(vq->callfd, (eventfd_t)1); } -static inline struct zcopy_mbuf *__attribute__((always_inline)) +static __rte_always_inline struct zcopy_mbuf * get_zmbuf(struct vhost_virtqueue *vq) { uint16_t i; @@ -961,7 +977,7 @@ again: return NULL; } -static inline bool __attribute__((always_inline)) +static __rte_always_inline bool mbuf_is_consumed(struct rte_mbuf *m) { while (m) { |