diff options
author | 2017-02-05 15:21:19 +0200 | |
---|---|---|
committer | 2017-02-13 12:32:01 +0200 | |
commit | 9ca4a157305e4e23a892ba9bafc9eee0f66954ce (patch) | |
tree | 1a8afcf815fd33e7623e3c16246abe86c01bc8fd /src/dpdk/lib | |
parent | 2dab8f65015e9fa90df395be6ee1a07e9ac71044 (diff) |
dpdk1702-rc2 upstream files unchanged + mlx5 driver rc3
Signed-off-by: Ido Barnea <ibarnea@cisco.com>
Diffstat (limited to 'src/dpdk/lib')
170 files changed, 8781 insertions, 8896 deletions
diff --git a/src/dpdk/lib/librte_acl/acl.h b/src/dpdk/lib/librte_acl/acl.h index 09d67841..6664a55e 100644 --- a/src/dpdk/lib/librte_acl/acl.h +++ b/src/dpdk/lib/librte_acl/acl.h @@ -234,6 +234,10 @@ int rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); +int +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/src/dpdk/lib/librte_acl/acl_run.h b/src/dpdk/lib/librte_acl/acl_run.h index b2fc42c6..a862ff6e 100644 --- a/src/dpdk/lib/librte_acl/acl_run.h +++ b/src/dpdk/lib/librte_acl/acl_run.h @@ -39,7 +39,9 @@ #define MAX_SEARCHES_AVX16 16 #define MAX_SEARCHES_SSE8 8 +#define MAX_SEARCHES_ALTIVEC8 8 #define MAX_SEARCHES_SSE4 4 +#define MAX_SEARCHES_ALTIVEC4 4 #define MAX_SEARCHES_SCALAR 2 #define GET_NEXT_4BYTES(prm, idx) \ @@ -67,10 +69,10 @@ struct acl_flow_data { uint32_t trie; /* current trie index (0 to N-1) */ uint32_t cmplt_size; + /* maximum number of packets to process */ uint32_t total_packets; - uint32_t categories; /* number of result categories per packet. */ - /* maximum number of packets to process */ + uint32_t categories; const uint64_t *trans; const uint8_t **data; uint32_t *results; diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c b/src/dpdk/lib/librte_acl/acl_run_altivec.c index a425f7a8..35235260 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c +++ b/src/dpdk/lib/librte_acl/acl_run_altivec.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright (C) IBM Corporation 2016. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,27 +31,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <stdio.h> -#include <rte_common.h> -#include <rte_log.h> - -#include <eal_private.h> - -/* - * set the log to default function, called during eal init process, - * once memzones are available. - */ -int -rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused) -{ - if (rte_eal_common_log_init(stderr) < 0) - return -1; - return 0; -} +#include "acl_run_altivec.h" int -rte_eal_log_early_init(void) +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) { - rte_openlog_stream(stderr); - return 0; + if (likely(num >= MAX_SEARCHES_ALTIVEC8)) + return search_altivec_8(ctx, data, results, num, categories); + else if (num >= MAX_SEARCHES_ALTIVEC4) + return search_altivec_4(ctx, data, results, num, categories); + else + return rte_acl_classify_scalar(ctx, data, results, num, + categories); } diff --git a/src/dpdk/lib/librte_acl/acl_run_altivec.h b/src/dpdk/lib/librte_acl/acl_run_altivec.h new file mode 100644 index 00000000..7d329bcf --- /dev/null +++ b/src/dpdk/lib/librte_acl/acl_run_altivec.h @@ -0,0 +1,329 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "acl_run.h" +#include "acl_vect.h" + +struct _altivec_acl_const { + rte_xmm_t xmm_shuffle_input; + rte_xmm_t xmm_index_mask; + rte_xmm_t xmm_ones_16; + rte_xmm_t range_base; +} altivec_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = { + { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c} + }, + { + .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX} + }, + { + .u16 = {1, 1, 1, 1, 1, 1, 1, 1} + }, + { + .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c} + }, +}; + +/* + * Resolve priority for multiple results (altivec version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_altivec(uint64_t transition, int n, + const struct rte_acl_ctx *ctx, struct parms *parms, + const struct rte_acl_match_results *p, uint32_t categories) +{ + uint32_t x; + xmm_t results, priority, results1, priority1; + vector bool int selector; + xmm_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + + saved_results = (xmm_t *)(&parms[n].cmplt->results[x]); + saved_priority = + (xmm_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = *(const xmm_t *)&p[transition].results[x]; + priority = *(const xmm_t *)&p[transition].priority[x]; + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + + /* get running best results and their priorities */ + results1 = *saved_results; + priority1 = *saved_priority; + + /* select results that are highest priority */ + selector = vec_cmpgt(priority1, priority); + results = vec_sel(results, results1, selector); + priority = vec_sel(priority, priority1, + selector); + } + + /* save running best results and their priorities */ + *saved_results = results; + *saved_priority = priority; + } +} + +/* + * Check for any match in 4 transitions + */ +static inline __attribute__((always_inline)) uint32_t +check_any_match_x4(uint64_t val[]) +{ + return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH; +} + +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint64_t transitions[]) +{ + while (check_any_match_x4(transitions)) { + transitions[0] = acl_match_check(transitions[0], slot, ctx, + parms, flows, resolve_priority_altivec); + transitions[1] = acl_match_check(transitions[1], slot + 1, ctx, + parms, flows, resolve_priority_altivec); + transitions[2] = acl_match_check(transitions[2], slot + 2, ctx, + parms, flows, resolve_priority_altivec); + transitions[3] = acl_match_check(transitions[3], slot + 3, ctx, + parms, flows, resolve_priority_altivec); + } +} + +/* + * Process 4 transitions (in 2 XMM registers) in parallel + */ +static inline __attribute__((optimize("O2"))) xmm_t +transition4(xmm_t next_input, const uint64_t *trans, + xmm_t *indices1, xmm_t *indices2) +{ + xmm_t addr, tr_lo, tr_hi; + xmm_t in, node_type, r, t; + xmm_t dfa_ofs, quad_ofs; + xmm_t *index_mask, *tp; + vector bool int dfa_msk; + vector signed char zeroes = {}; + union { + uint64_t d64[2]; + uint32_t d32[4]; + } v; + + /* Move low 32 into tr_lo and high 32 into tr_hi */ + tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2], + (*indices2)[0], (*indices2)[2]}; + tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3], + (*indices2)[1], (*indices2)[3]}; + + /* Calculate the address (array index) for all 4 transitions. */ + index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32; + t = vec_xor(*index_mask, *index_mask); + in = vec_perm(next_input, (xmm_t){}, + *(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input); + + /* Calc node type and node addr */ + node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo); + addr = vec_and(tr_lo, *index_mask); + + /* mask for DFA type(0) nodes */ + dfa_msk = vec_cmpeq(node_type, t); + + /* DFA calculations. */ + r = vec_sr(in, (vector unsigned int){30, 30, 30, 30}); + tp = (xmm_t *)&altivec_acl_const.range_base.u32; + r = vec_add(r, *tp); + t = vec_sr(in, (vector unsigned int){24, 24, 24, 24}); + r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16}, + (vector unsigned char)r); + + dfa_ofs = vec_sub(t, r); + + /* QUAD/SINGLE caluclations. */ + t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi); + t = (xmm_t)vec_sel( + vec_sel( + (vector signed char)vec_sub( + zeroes, (vector signed char)t), + (vector signed char)t, + vec_cmpgt((vector signed char)t, zeroes)), + zeroes, + vec_cmpeq((vector signed char)t, zeroes)); + + t = (xmm_t)vec_msum((vector signed char)t, + (vector unsigned char)t, (xmm_t){}); + quad_ofs = (xmm_t)vec_msum((vector signed short)t, + *(vector signed short *)&altivec_acl_const.xmm_ones_16.u16, + (xmm_t){}); + + /* blend DFA and QUAD/SINGLE. */ + t = vec_sel(quad_ofs, dfa_ofs, dfa_msk); + + /* calculate address for next transitions. */ + addr = vec_add(addr, t); + + v.d64[0] = (uint64_t)trans[addr[0]]; + v.d64[1] = (uint64_t)trans[addr[1]]; + *indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]}; + v.d64[0] = (uint64_t)trans[addr[2]]; + v.d64[1] = (uint64_t)trans[addr[3]]; + *indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]}; + + return vec_sr(next_input, + (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT}); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_ALTIVEC8]; + struct completion cmplt[MAX_SEARCHES_ALTIVEC8]; + struct parms parms[MAX_SEARCHES_ALTIVEC8]; + xmm_t input0, input1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0), + GET_NEXT_4BYTES(parms, 1), + GET_NEXT_4BYTES(parms, 2), + GET_NEXT_4BYTES(parms, 3)}; + + input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4), + GET_NEXT_4BYTES(parms, 5), + GET_NEXT_4BYTES(parms, 6), + GET_NEXT_4BYTES(parms, 7)}; + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + (uint64_t *)&index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, + (uint64_t *)&index_array[4]); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_ALTIVEC4]; + struct completion cmplt[MAX_SEARCHES_ALTIVEC4]; + struct parms parms[MAX_SEARCHES_ALTIVEC4]; + xmm_t input; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input = (xmm_t){GET_NEXT_4BYTES(parms, 0), + GET_NEXT_4BYTES(parms, 1), + GET_NEXT_4BYTES(parms, 2), + GET_NEXT_4BYTES(parms, 3)}; + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + } + + return 0; +} diff --git a/src/dpdk/lib/librte_acl/rte_acl.c b/src/dpdk/lib/librte_acl/rte_acl.c index 4ba9786b..d1f40bef 100644 --- a/src/dpdk/lib/librte_acl/rte_acl.c +++ b/src/dpdk/lib/librte_acl/rte_acl.c @@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, return -ENOTSUP; } +int __attribute__ ((weak)) +rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + static const rte_acl_classify_t classify_fns[] = { [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar, [RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar, [RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse, [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2, [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon, + [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec, }; /* by default, use always available scalar code path. */ @@ -119,6 +130,8 @@ rte_acl_init(void) #elif defined(RTE_ARCH_ARM) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) alg = RTE_ACL_CLASSIFY_NEON; +#elif defined(RTE_ARCH_PPC_64) + alg = RTE_ACL_CLASSIFY_ALTIVEC; #else #ifdef CC_AVX2_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) @@ -300,8 +313,7 @@ acl_check_rule(const struct rte_acl_rule_data *rd) if ((RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES, typeof(rd->category_mask)) & rd->category_mask) == 0 || rd->priority > RTE_ACL_MAX_PRIORITY || - rd->priority < RTE_ACL_MIN_PRIORITY || - rd->userdata == RTE_ACL_INVALID_USERDATA) + rd->priority < RTE_ACL_MIN_PRIORITY) return -EINVAL; return 0; } diff --git a/src/dpdk/lib/librte_acl/rte_acl.h b/src/dpdk/lib/librte_acl/rte_acl.h index 0979a098..b53179a8 100644 --- a/src/dpdk/lib/librte_acl/rte_acl.h +++ b/src/dpdk/lib/librte_acl/rte_acl.h @@ -120,8 +120,6 @@ enum { RTE_ACL_MIN_PRIORITY = 0, }; -#define RTE_ACL_INVALID_USERDATA 0 - #define RTE_ACL_MASKLEN_TO_BITMASK(v, s) \ ((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v)))) @@ -144,7 +142,7 @@ struct rte_acl_rule_data { struct rte_acl_field field[fld_num]; \ } -RTE_ACL_RULE_DEF(rte_acl_rule, 0); +RTE_ACL_RULE_DEF(rte_acl_rule,); #define RTE_ACL_RULE_SZ(fld_num) \ (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num)) @@ -271,6 +269,7 @@ enum rte_acl_classify_alg { RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */ RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ + RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */ RTE_ACL_CLASSIFY_NUM /* should always be the last one. */ }; diff --git a/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h b/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h index f649836c..b40e6a13 100644 --- a/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h +++ b/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h @@ -34,6 +34,8 @@ #ifndef __INCLUDE_RTE_CFGFILE_H__ #define __INCLUDE_RTE_CFGFILE_H__ +#include <stddef.h> + #ifdef __cplusplus extern "C" { #endif @@ -86,7 +88,7 @@ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags); * @param length * Maximum section name length * @return -* 0 on success, error code otherwise +* Number of sections */ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, size_t length); @@ -100,13 +102,13 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, * @param cfg * Config file * @param sections -* Array containing section names after successful invocation. Each elemen +* Array containing section names after successful invocation. Each element * of this array should be preallocated by the user with at least * CFG_NAME_LEN characters. * @param max_sections * Maximum number of section names to be stored in sections array * @return -* 0 on success, error code otherwise +* Number of populated sections names */ int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], int max_sections); @@ -134,12 +136,13 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname); * @param sectionname * Section name * @return -* Number of entries in section +* Number of entries in section on success, -1 otherwise */ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, const char *sectionname); -/** Get section entries as key-value pairs +/** +* Get section entries as key-value pairs * * If multiple sections have the given name this function operates on the * first one. @@ -154,14 +157,15 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, * @param max_entries * Maximum number of section entries to be stored in entries array * @return -* 0 on success, error code otherwise +* Number of entries populated on success, -1 otherwise */ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, struct rte_cfgfile_entry *entries, int max_entries); -/** Get section entries as key-value pairs +/** +* Get section entries as key-value pairs * * The index of a section is the same as the index of its name in the * result of rte_cfgfile_sections. This API can be used when there are @@ -180,7 +184,7 @@ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, * @param max_entries * Maximum number of section entries to be stored in entries array * @return -* Number of entries populated on success, negative error code otherwise +* Number of entries populated on success, -1 otherwise */ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index, @@ -188,7 +192,8 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, struct rte_cfgfile_entry *entries, int max_entries); -/** Get value of the named entry in named config file section +/** +* Get value of the named entry in named config file section * * If multiple sections have the given name this function operates on the * first one. @@ -200,13 +205,14 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, * @param entryname * Entry name * @return -* Entry value +* Entry value on success, NULL otherwise */ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname, const char *entryname); -/** Check if given entry exists in named config file section +/** +* Check if given entry exists in named config file section * * If multiple sections have the given name this function operates on the * first one. @@ -223,12 +229,13 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, const char *entryname); -/** Close config file +/** +* Close config file * * @param cfg * Config file * @return -* 0 on success, error code otherwise +* 0 on success, -1 otherwise */ int rte_cfgfile_close(struct rte_cfgfile *cfg); diff --git a/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c b/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c index c6ca3b9c..da971deb 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c +++ b/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c @@ -216,15 +216,19 @@ static int contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **obj, int nprot) { + uint64_t buffer_index; + /* * The buffer index is encoded in the offset. Divide the offset by * PAGE_SIZE to get the index of the buffer requested by the user * app. */ - if ((*offset/PAGE_SIZE) >= contigmem_num_buffers) + buffer_index = *offset / PAGE_SIZE; + if (buffer_index >= contigmem_num_buffers) return EINVAL; - *offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]); + memset(contigmem_buffers[buffer_index], 0, contigmem_buffer_size); + *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index]); *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, curthread->td_ucred); diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c index a0c8f8c8..ee7c9de7 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c +++ b/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c @@ -64,6 +64,7 @@ #include <rte_string_fns.h> #include <rte_cpuflags.h> #include <rte_interrupts.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> @@ -496,14 +497,14 @@ rte_eal_init(int argc, char **argv) char cpuset[RTE_CPU_AFFINITY_STR_LEN]; char thread_name[RTE_MAX_THREAD_NAME_LEN]; + /* checks if the machine is adequate */ + rte_cpu_check_supported(); + if (!rte_atomic32_test_and_set(&run_once)) return -1; thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - eal_log_level_parse(argc, argv); /* set log level as early as possible */ @@ -552,9 +553,6 @@ rte_eal_init(int argc, char **argv) if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); -/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n");*/ - if (rte_eal_alarm_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); @@ -580,8 +578,8 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_bus_scan()) + rte_panic("Cannot scan the buses for devices\n"); RTE_LCORE_FOREACH_SLAVE(i) { @@ -615,10 +613,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) + rte_panic("Cannot probe devices\n"); + /* Probe & Initialize PCI devices */ if (rte_eal_pci_probe()) rte_panic("Cannot probe PCI\n"); + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + rte_eal_mcfg_complete(); return fctret; diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c index 836e4836..ea2afff4 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -36,29 +36,37 @@ #include "eal_private.h" int -rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb_fn __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } int -rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c index 374b68f2..3a5c3159 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -87,15 +87,6 @@ * enabling bus master. */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) -{ - RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " - "for BSD\n"); - return -ENOTSUP; -} - /* Map pci device */ int rte_eal_pci_map_device(struct rte_pci_device *dev) @@ -287,7 +278,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) dev->max_vfs = 0; /* FreeBSD has no NUMA support (yet) */ - dev->numa_node = 0; + dev->device.numa_node = 0; /* FreeBSD has only one pass through driver */ dev->kdrv = RTE_KDRV_NIC_UIO; @@ -406,6 +397,55 @@ error: return -1; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + int fd; + struct pci_conf matches[2]; + struct pci_match_conf match = { + .pc_sel = { + .pc_domain = addr->domain, + .pc_bus = addr->bus, + .pc_dev = addr->devid, + .pc_func = addr->function, + }, + }; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 1, + .patterns = &match, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + if (conf_io.num_matches != 1) + goto error; + + if (pci_scan_one(fd, &matches[0]) < 0) + goto error; + + close(fd); + + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + /* Read PCI config space. */ int rte_eal_pci_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offset) @@ -623,9 +663,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) int rte_eal_pci_init(void) { - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; diff --git a/src/dpdk/lib/librte_eal/common/eal_common_bus.c b/src/dpdk/lib/librte_eal/common/eal_common_bus.c new file mode 100644 index 00000000..4638e78d --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/eal_common_bus.c @@ -0,0 +1,133 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> + +#include <rte_bus.h> + +#include "eal_private.h" + +struct rte_bus_list rte_bus_list = + TAILQ_HEAD_INITIALIZER(rte_bus_list); + +void +rte_bus_register(struct rte_bus *bus) +{ + RTE_VERIFY(bus); + RTE_VERIFY(bus->name && strlen(bus->name)); + /* A bus should mandatorily have the scan implemented */ + RTE_VERIFY(bus->scan); + RTE_VERIFY(bus->probe); + + TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); +} + +void +rte_bus_unregister(struct rte_bus *bus) +{ + TAILQ_REMOVE(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name); +} + +/* Scan all the buses for registered devices */ +int +rte_bus_scan(void) +{ + int ret; + struct rte_bus *bus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus->scan(); + if (ret) { + RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n", + bus->name); + return ret; + } + } + + return 0; +} + +/* Probe all devices of all buses */ +int +rte_bus_probe(void) +{ + int ret; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus->probe(); + if (ret) { + RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", + bus->name); + return ret; + } + } + + return 0; +} + +/* Dump information of a single bus */ +static int +bus_dump_one(FILE *f, struct rte_bus *bus) +{ + int ret; + + /* For now, dump only the bus name */ + ret = fprintf(f, " %s\n", bus->name); + + /* Error in case of inability in writing to stream */ + if (ret < 0) + return ret; + + return 0; +} + +void +rte_bus_dump(FILE *f) +{ + int ret; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus_dump_one(f, bus); + if (ret) { + RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n", + ret); + break; + } + } +} diff --git a/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c b/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c index ecb12409..b5f76f7f 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c @@ -39,14 +39,8 @@ /** * Checks if the machine is adequate for running the binary. If it is not, the * program exits with status 1. - * The function attribute forces this function to be called before main(). But - * with ICC, the check is generated by the compiler. */ -#ifndef __INTEL_COMPILER -void __attribute__ ((__constructor__)) -#else void -#endif rte_cpu_check_supported(void) { /* This is generated at compile-time by the build system */ diff --git a/src/dpdk/lib/librte_eal/common/eal_common_dev.c b/src/dpdk/lib/librte_eal/common/eal_common_dev.c index a8a4146c..4f3b4934 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_dev.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_dev.c @@ -48,6 +48,9 @@ /** Global list of device drivers. */ static struct rte_driver_list dev_driver_list = TAILQ_HEAD_INITIALIZER(dev_driver_list); +/** Global list of device drivers. */ +static struct rte_device_list dev_device_list = + TAILQ_HEAD_INITIALIZER(dev_device_list); /* register a driver */ void @@ -63,42 +66,25 @@ rte_eal_driver_unregister(struct rte_driver *driver) TAILQ_REMOVE(&dev_driver_list, driver, next); } -int -rte_eal_vdev_init(const char *name, const char *args) +void rte_eal_device_insert(struct rte_device *dev) { - struct rte_driver *driver; - - if (name == NULL) - return -EINVAL; - - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_VDEV) - continue; - - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "eth_pcap", but "name" will be "eth_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->name, name, strlen(driver->name))) - return driver->init(name, args); - } + TAILQ_INSERT_TAIL(&dev_device_list, dev, next); +} - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; +void rte_eal_device_remove(struct rte_device *dev) +{ + TAILQ_REMOVE(&dev_device_list, dev, next); } int rte_eal_dev_init(void) { struct rte_devargs *devargs; - struct rte_driver *driver; /* * Note that the dev_driver_list is populated here * from calls made to rte_eal_driver_register from constructor functions - * embedded into PMD modules via the PMD_REGISTER_DRIVER macro + * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro */ /* call the init function for each virtual device */ @@ -115,38 +101,53 @@ rte_eal_dev_init(void) } } - /* Once the vdevs are initalized, start calling all the pdev drivers */ - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_PDEV) - continue; - /* PDEV drivers don't get passed any parameters */ - driver->init(NULL, NULL); - } return 0; } -int -rte_eal_vdev_uninit(const char *name) +int rte_eal_dev_attach(const char *name, const char *devargs) { - struct rte_driver *driver; + struct rte_pci_addr addr; - if (name == NULL) + if (name == NULL || devargs == NULL) { + RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n"); return -EINVAL; + } - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_VDEV) - continue; + if (eal_parse_pci_DomBDF(name, &addr) == 0) { + if (rte_eal_pci_probe_one(&addr) < 0) + goto err; + + } else { + if (rte_eal_vdev_init(name, devargs)) + goto err; + } + + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name); + return -EINVAL; +} + +int rte_eal_dev_detach(const char *name) +{ + struct rte_pci_addr addr; - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "eth_pcap", but "name" will be "eth_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->name, name, strlen(driver->name))) - return driver->uninit(name); + if (name == NULL) { + RTE_LOG(ERR, EAL, "Invalid device provided.\n"); + return -EINVAL; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + if (eal_parse_pci_DomBDF(name, &addr) == 0) { + if (rte_eal_pci_detach(&addr) < 0) + goto err; + } else { + if (rte_eal_vdev_uninit(name)) + goto err; + } + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name); return -EINVAL; } diff --git a/src/dpdk/lib/librte_eal/common/eal_common_devargs.c b/src/dpdk/lib/librte_eal/common/eal_common_devargs.c index e403717b..ffa8ad96 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_devargs.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_devargs.c @@ -72,6 +72,7 @@ rte_eal_parse_devargs_str(const char *devargs_str, if (*drvargs == NULL) { free(*drvname); + *drvname = NULL; return -1; } return 0; diff --git a/src/dpdk/lib/librte_eal/common/eal_common_log.c b/src/dpdk/lib/librte_eal/common/eal_common_log.c index 7916c781..21975583 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_log.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_log.c @@ -48,11 +48,12 @@ struct rte_logs rte_logs = { .file = NULL, }; +/* Stream to use for logging if rte_logs.file is NULL */ static FILE *default_log_stream; /** * This global structure stores some informations about the message - * that is currently beeing processed by one lcore + * that is currently being processed by one lcore */ struct log_cur_msg { uint32_t loglevel; /**< log level - see rte_log.h */ @@ -64,27 +65,11 @@ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg); /* default logs */ -int -rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused) -{ - return 0; -} - -void -rte_log_set_history(int enable) -{ - if (enable) - RTE_LOG(WARNING, EAL, "The log history is deprecated.\n"); -} - /* Change the stream that will be used by logging system */ int rte_openlog_stream(FILE *f) { - if (f == NULL) - rte_logs.file = default_log_stream; - else - rte_logs.file = f; + rte_logs.file = f; return 0; } @@ -131,12 +116,6 @@ int rte_log_cur_msg_logtype(void) return RTE_PER_LCORE(log_cur_msg).logtype; } -/* Dump log history to file */ -void -rte_log_dump_history(FILE *out __rte_unused) -{ -} - /* * Generates a log message The message will be sent in the stream * defined by the previous call to rte_openlog_stream(). @@ -146,6 +125,19 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) { int ret; FILE *f = rte_logs.file; + if (f == NULL) { + f = default_log_stream; + if (f == NULL) { + /* + * Grab the current value of stderr here, rather than + * just initializing default_log_stream to stderr. This + * ensures that we will always use the current value + * of stderr, even if the application closes and + * reopens it. + */ + f = stderr; + } + } if ((level > rte_logs.level) || !(logtype & rte_logs.type)) return 0; @@ -177,17 +169,15 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...) } /* - * called by environment-specific log init function + * Called by environment-specific initialization functions. */ -int -rte_eal_common_log_init(FILE *default_log) +void +eal_log_set_default(FILE *default_log) { default_log_stream = default_log; - rte_openlog_stream(default_log); -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG - RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n"); +#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG + RTE_LOG(NOTICE, EAL, + "Debug dataplane logs available - lower performance\n"); #endif - - return 0; } diff --git a/src/dpdk/lib/librte_eal/common/eal_common_memzone.c b/src/dpdk/lib/librte_eal/common/eal_common_memzone.c index 1bd0a33d..64f4e0ad 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_memzone.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_memzone.c @@ -337,19 +337,7 @@ rte_memzone_free(const struct rte_memzone *mz) idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); idx = idx / sizeof(struct rte_memzone); -#ifdef RTE_LIBRTE_IVSHMEM - /* - * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot - * free it. - */ - if (mcfg->memzone[idx].ioremap_addr != 0) { - rte_rwlock_write_unlock(&mcfg->mlock); - return -EINVAL; - } -#endif - addr = mcfg->memzone[idx].addr; - if (addr == NULL) ret = -EINVAL; else if (mcfg->memzone_cnt == 0) { diff --git a/src/dpdk/lib/librte_eal/common/eal_common_options.c b/src/dpdk/lib/librte_eal/common/eal_common_options.c index 1a1bab36..f36bc556 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_options.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_options.c @@ -118,7 +118,7 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH; /* * Stringified version of solib path used by dpdk-pmdinfo.py * Note: PLEASE DO NOT ALTER THIS without making a corresponding - * change to tools/dpdk-pmdinfo.py + * change to usertools/dpdk-pmdinfo.py */ static const char dpdk_solib_path[] __attribute__((used)) = "DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH; @@ -126,6 +126,7 @@ static const char dpdk_solib_path[] __attribute__((used)) = static int master_lcore_parsed; static int mem_parsed; +static int core_parsed; void eal_reset_internal_config(struct internal_config *internal_cfg) @@ -797,6 +798,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid coremask\n"); return -1; } + core_parsed = 1; break; /* corelist */ case 'l': @@ -804,6 +806,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid core list\n"); return -1; } + core_parsed = 1; break; /* size of memory */ case 'm': @@ -912,6 +915,7 @@ eal_parse_common_option(int opt, const char *optarg, OPT_LCORES "\n"); return -1; } + core_parsed = 1; break; /* don't know what to do, leave this to caller */ @@ -923,12 +927,38 @@ eal_parse_common_option(int opt, const char *optarg, return 0; } +static void +eal_auto_detect_cores(struct rte_config *cfg) +{ + unsigned int lcore_id; + unsigned int removed = 0; + rte_cpuset_t affinity_set; + pthread_t tid = pthread_self(); + + if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t), + &affinity_set) < 0) + CPU_ZERO(&affinity_set); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (cfg->lcore_role[lcore_id] == ROLE_RTE && + !CPU_ISSET(lcore_id, &affinity_set)) { + cfg->lcore_role[lcore_id] = ROLE_OFF; + removed++; + } + } + + cfg->lcore_count -= removed; +} + int eal_adjust_config(struct internal_config *internal_cfg) { int i; struct rte_config *cfg = rte_eal_get_configuration(); + if (!core_parsed) + eal_auto_detect_cores(cfg); + if (internal_config.process_type == RTE_PROC_AUTO) internal_config.process_type = eal_proc_type_detect(); @@ -1021,7 +1051,7 @@ eal_common_usage(void) " [NOTE: PCI whitelist cannot be used with -b option]\n" " --"OPT_VDEV" Add a virtual device.\n" " The argument format is <driver><id>[,key=val,...]\n" - " (ex: --vdev=eth_pcap0,iface=eth2).\n" + " (ex: --vdev=net_pcap0,iface=eth2).\n" " -d LIB.so|DIR Add a driver or driver directory\n" " (can be used multiple times)\n" " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" diff --git a/src/dpdk/lib/librte_eal/common/eal_common_pci.c b/src/dpdk/lib/librte_eal/common/eal_common_pci.c index 7248c38b..72547bd2 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_pci.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_pci.c @@ -82,8 +82,10 @@ #include "eal_private.h" -struct pci_driver_list pci_driver_list; -struct pci_device_list pci_device_list; +struct pci_driver_list pci_driver_list = + TAILQ_HEAD_INITIALIZER(pci_driver_list); +struct pci_device_list pci_device_list = + TAILQ_HEAD_INITIALIZER(pci_device_list); #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" @@ -151,7 +153,7 @@ pci_unmap_resource(void *requested_addr, size_t size) } /* - * If vendor/device ID match, call the devinit() function of the + * If vendor/device ID match, call the probe() function of the * driver. */ static int @@ -183,42 +185,45 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, loc->function, - dev->numa_node); + dev->device.numa_node); /* no initialization when blacklisted, return without error */ - if (dev->devargs != NULL && - dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { + if (dev->device.devargs != NULL && + dev->device.devargs->type == + RTE_DEVTYPE_BLACKLISTED_PCI) { RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n"); return 1; } RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); + dev->id.device_id, dr->driver.name); if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { /* map resources for devices that use igb_uio */ ret = rte_eal_pci_map_device(dev); if (ret != 0) return ret; - } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && - rte_eal_process_type() == RTE_PROC_PRIMARY) { - /* unbind current driver */ - if (pci_unbind_kernel_driver(dev) < 0) - return -1; } /* reference driver structure */ dev->driver = dr; - /* call the driver devinit() function */ - return dr->devinit(dr, dev); + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) { + dev->driver = NULL; + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + rte_eal_pci_unmap_device(dev); + } + + return ret; } /* return positive value if driver doesn't support this device */ return 1; } /* - * If vendor/device ID match, call the devuninit() function of the + * If vendor/device ID match, call the remove() function of the * driver. */ static int @@ -250,12 +255,12 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr, RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, - loc->function, dev->numa_node); + loc->function, dev->device.numa_node); RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); + dev->id.device_id, dr->driver.name); - if (dr->devuninit && (dr->devuninit(dev) < 0)) + if (dr->remove && (dr->remove(dev) < 0)) return -1; /* negative value is an error */ /* clear driver structure */ @@ -273,7 +278,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr, } /* - * If vendor/device ID match, call the devinit() function of all + * If vendor/device ID match, call the probe() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ @@ -286,6 +291,10 @@ pci_probe_all_drivers(struct rte_pci_device *dev) if (dev == NULL) return -1; + /* Check if a driver is already loaded */ + if (dev->driver != NULL) + return 0; + TAILQ_FOREACH(dr, &pci_driver_list, next) { rc = rte_eal_pci_probe_one_driver(dr, dev); if (rc < 0) @@ -300,7 +309,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev) } /* - * If vendor/device ID match, call the devuninit() function of all + * If vendor/device ID match, call the remove() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ @@ -339,21 +348,27 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr) if (addr == NULL) return -1; + /* update current pci device in global list, kernel bindings might have + * changed since last time we looked at it. + */ + if (pci_update_device(addr) < 0) + goto err_return; + TAILQ_FOREACH(dev, &pci_device_list, next) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; ret = pci_probe_all_drivers(dev); - if (ret < 0) + if (ret) goto err_return; return 0; } return -1; err_return: - RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); + RTE_LOG(WARNING, EAL, + "Requested device " PCI_PRI_FMT " cannot be used\n", + addr->domain, addr->bus, addr->devid, addr->function); return -1; } @@ -378,6 +393,7 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr) goto err_return; TAILQ_REMOVE(&pci_device_list, dev, next); + free(dev); return 0; } return -1; @@ -390,7 +406,7 @@ err_return: } /* - * Scan the content of the PCI bus, and call the devinit() function for + * Scan the content of the PCI bus, and call the probe() function for * all registered drivers that have a matching entry in its id_table * for discovered devices. */ @@ -410,7 +426,7 @@ rte_eal_pci_probe(void) /* set devargs in PCI structure */ devargs = pci_devargs_lookup(dev); if (devargs != NULL) - dev->devargs = devargs; + dev->device.devargs = devargs; /* probe all or only whitelisted devices */ if (probe_all) @@ -463,11 +479,13 @@ void rte_eal_pci_register(struct rte_pci_driver *driver) { TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); + rte_eal_driver_register(&driver->driver); } /* unregister a driver */ void rte_eal_pci_unregister(struct rte_pci_driver *driver) { + rte_eal_driver_unregister(&driver->driver); TAILQ_REMOVE(&pci_driver_list, driver, next); } diff --git a/src/dpdk/lib/librte_eal/common/eal_common_timer.c b/src/dpdk/lib/librte_eal/common/eal_common_timer.c index c4227cd8..72656176 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_timer.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_timer.c @@ -47,8 +47,11 @@ /* The frequency of the RDTSC timer resolution */ static uint64_t eal_tsc_resolution_hz; +/* Pointer to user delay function */ +void (*rte_delay_us)(unsigned int) = NULL; + void -rte_delay_us(unsigned us) +rte_delay_us_block(unsigned int us) { const uint64_t start = rte_get_timer_cycles(); const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; @@ -84,3 +87,15 @@ set_tsc_freq(void) RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); eal_tsc_resolution_hz = freq; } + +void rte_delay_us_callback_register(void (*userfunc)(unsigned int)) +{ + rte_delay_us = userfunc; +} + +static void __attribute__((constructor)) +rte_timer_init(void) +{ + /* set rte_delay_us_block as a delay function */ + rte_delay_us_callback_register(rte_delay_us_block); +} diff --git a/src/dpdk/lib/librte_eal/common/eal_common_vdev.c b/src/dpdk/lib/librte_eal/common/eal_common_vdev.c new file mode 100644 index 00000000..7d6e54f4 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/eal_common_vdev.c @@ -0,0 +1,124 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/queue.h> + +#include <rte_vdev.h> +#include <rte_common.h> + +struct vdev_driver_list vdev_driver_list = + TAILQ_HEAD_INITIALIZER(vdev_driver_list); + +/* register a driver */ +void +rte_eal_vdrv_register(struct rte_vdev_driver *driver) +{ + TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); + rte_eal_driver_register(&driver->driver); +} + +/* unregister a driver */ +void +rte_eal_vdrv_unregister(struct rte_vdev_driver *driver) +{ + rte_eal_driver_unregister(&driver->driver); + TAILQ_REMOVE(&vdev_driver_list, driver, next); +} + +int +rte_eal_vdev_init(const char *name, const char *args) +{ + struct rte_vdev_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "net_pcap", but "name" will be "net_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->driver.name, name, + strlen(driver->driver.name))) + return driver->probe(name, args); + } + + /* Give new names precedence over aliases. */ + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + if (driver->driver.alias && + !strncmp(driver->driver.alias, name, + strlen(driver->driver.alias))) + return driver->probe(name, args); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} + +int +rte_eal_vdev_uninit(const char *name) +{ + struct rte_vdev_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "net_pcap", but "name" will be "net_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->driver.name, name, + strlen(driver->driver.name))) + return driver->remove(name); + } + + /* Give new names precedence over aliases. */ + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + if (driver->driver.alias && + !strncmp(driver->driver.alias, name, + strlen(driver->driver.alias))) + return driver->remove(name); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} diff --git a/src/dpdk/lib/librte_eal/common/eal_filesystem.h b/src/dpdk/lib/librte_eal/common/eal_filesystem.h index fdb4a70b..8acbd996 100644 --- a/src/dpdk/lib/librte_eal/common/eal_filesystem.h +++ b/src/dpdk/lib/librte_eal/common/eal_filesystem.h @@ -97,17 +97,6 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id return buffer; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS -static inline const char * -eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) -{ - snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, - internal_config.hugefile_prefix, f_id); - buffer[buflen - 1] = '\0'; - return buffer; -} -#endif - /** define the default filename prefix for the %s values above */ #define HUGEFILE_PREFIX_DEFAULT "rte" diff --git a/src/dpdk/lib/librte_eal/common/eal_hugepages.h b/src/dpdk/lib/librte_eal/common/eal_hugepages.h index 38edac03..68369f26 100644 --- a/src/dpdk/lib/librte_eal/common/eal_hugepages.h +++ b/src/dpdk/lib/librte_eal/common/eal_hugepages.h @@ -52,9 +52,6 @@ struct hugepage_file { int socket_id; /**< NUMA socket ID */ int file_id; /**< the '%d' in HUGEFILE_FMT */ int memseg_id; /**< the memory segment to which page belongs */ -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int repeated; /**< number of times the page size is repeated */ -#endif char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ }; diff --git a/src/dpdk/lib/librte_eal/common/eal_private.h b/src/dpdk/lib/librte_eal/common/eal_private.h index 857dc3ea..9e7d8f6b 100644 --- a/src/dpdk/lib/librte_eal/common/eal_private.h +++ b/src/dpdk/lib/librte_eal/common/eal_private.h @@ -47,7 +47,9 @@ int rte_eal_memzone_init(void); /** - * Common log initialization function (private to eal). + * Common log initialization function (private to eal). Determines + * where log data is written when no call to rte_openlog_stream is + * in effect. * * @param default_log * The default log stream to be used. @@ -55,7 +57,7 @@ int rte_eal_memzone_init(void); * - 0 on success * - Negative on error */ -int rte_eal_common_log_init(FILE *default_log); +void eal_log_set_default(FILE *default_log); /** * Fill configuration with number of physical and logical processors @@ -97,16 +99,6 @@ int rte_eal_memory_init(void); int rte_eal_timer_init(void); /** - * Init early logs - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int rte_eal_log_early_init(void); - -/** * Init the default log stream * * This function is private to EAL. @@ -117,7 +109,7 @@ int rte_eal_log_early_init(void); int rte_eal_log_init(const char *id, int facility); /** - * Init the default log stream + * Init the PCI infrastructure * * This function is private to EAL. * @@ -126,30 +118,21 @@ int rte_eal_log_init(const char *id, int facility); */ int rte_eal_pci_init(void); -#ifdef RTE_LIBRTE_IVSHMEM -/** - * Init the memory from IVSHMEM devices - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int rte_eal_ivshmem_init(void); +struct rte_pci_driver; +struct rte_pci_device; /** - * Init objects in IVSHMEM devices + * Update a pci device object by asking the kernel for the latest information. * * This function is private to EAL. * + * @param addr + * The PCI Bus-Device-Function address to look for * @return - * 0 on success, negative on error + * - 0 on success. + * - negative on error. */ -int rte_eal_ivshmem_obj_init(void); -#endif - -struct rte_pci_driver; -struct rte_pci_device; +int pci_update_device(const struct rte_pci_addr *addr); /** * Unbind kernel driver for this device @@ -259,13 +242,6 @@ int rte_eal_intr_init(void); int rte_eal_alarm_init(void); /** - * This function initialises any virtual devices - * - * This function is private to the EAL. - */ -int rte_eal_dev_init(void); - -/** * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, * etc.) loaded. * diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h index b20056b8..4eac6663 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -38,6 +38,8 @@ extern "C" { #endif +#include <stdint.h> +#include <rte_common.h> #include <emmintrin.h> #include "generic/rte_atomic.h" @@ -59,6 +61,12 @@ extern "C" { #define rte_smp_rmb() rte_compiler_barrier() +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_compiler_barrier() + +#define rte_io_rmb() rte_compiler_barrier() + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index 400d8a96..2e04c759 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -37,9 +37,17 @@ * All rights reserved. */ +#ifndef _RTE_ATOMIC_X86_H_ +#error do not include this file directly, use <rte_atomic.h> instead +#endif + #ifndef _RTE_ATOMIC_I686_H_ #define _RTE_ATOMIC_I686_H_ +#include <stdint.h> +#include <rte_common.h> +#include <rte_atomic.h> + /*------------------------- 64 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS @@ -47,6 +55,7 @@ static inline int rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) { uint8_t res; + RTE_STD_C11 union { struct { uint32_t l32; diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h index 4de66000..1a53a766 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -37,9 +37,17 @@ * All rights reserved. */ +#ifndef _RTE_ATOMIC_X86_H_ +#error do not include this file directly, use <rte_atomic.h> instead +#endif + #ifndef _RTE_ATOMIC_X86_64_H_ #define _RTE_ATOMIC_X86_64_H_ +#include <stdint.h> +#include <rte_common.h> +#include <rte_atomic.h> + /*------------------------- 64 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h index ffdb6ef5..251f11b4 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h @@ -38,6 +38,8 @@ extern "C" { #endif +#include <stdint.h> +#include <rte_common.h> #include "generic/rte_byteorder.h" #ifndef RTE_BYTE_ORDER diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h index 51c306f8..14d64834 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h @@ -31,9 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_BYTEORDER_X86_H_ +#error do not include this file directly, use <rte_byteorder.h> instead +#endif + #ifndef _RTE_BYTEORDER_I686_H_ #define _RTE_BYTEORDER_I686_H_ +#include <stdint.h> +#include <rte_byteorder.h> + /* * An architecture-optimized byte swap for a 64-bit value. * diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h index dda572bd..516ac052 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h @@ -31,9 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_BYTEORDER_X86_H_ +#error do not include this file directly, use <rte_byteorder.h> instead +#endif + #ifndef _RTE_BYTEORDER_X86_64_H_ #define _RTE_BYTEORDER_X86_64_H_ +#include <stdint.h> +#include <rte_common.h> + /* * An architecture-optimized byte swap for a 64-bit value. * diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h index 6e3c7d89..5eb6ce96 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -75,12 +75,14 @@ extern "C" { extern int rte_cycles_vmware_tsc_map; #include <rte_branch_prediction.h> #endif +#include <rte_common.h> static inline uint64_t rte_rdtsc(void) { union { uint64_t tsc_64; + RTE_STD_C11 struct { uint32_t lo_32; uint32_t hi_32; diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h new file mode 100644 index 00000000..c8d14043 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h @@ -0,0 +1,47 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_X86_H_ +#define _RTE_IO_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_io.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IO_X86_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index 413035e7..b9785e85 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -69,6 +69,8 @@ rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); #ifdef RTE_MACHINE_CPUFLAG_AVX512F +#define ALIGNMENT_MASK 0x3F + /** * AVX512 implementation below */ @@ -189,7 +191,7 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -308,6 +310,8 @@ COPY_BLOCK_128_BACK63: #elif defined RTE_MACHINE_CPUFLAG_AVX2 +#define ALIGNMENT_MASK 0x1F + /** * AVX2 implementation below */ @@ -387,7 +391,7 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -499,6 +503,8 @@ COPY_BLOCK_128_BACK31: #else /* RTE_MACHINE_CPUFLAG */ +#define ALIGNMENT_MASK 0x0F + /** * SSE & AVX implementation below */ @@ -594,7 +600,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * - __m128i <xmm0> ~ <xmm8> must be pre-defined */ #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ -({ \ +__extension__ ({ \ int tmp; \ while (len >= 128 + 16 - offset) { \ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ @@ -655,7 +661,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined */ #define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ -({ \ +__extension__ ({ \ switch (offset) { \ case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ @@ -677,7 +683,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) }) static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; uintptr_t dstu = (uintptr_t)dst; @@ -821,6 +827,75 @@ COPY_BLOCK_64_BACK15: #endif /* RTE_MACHINE_CPUFLAG */ +static inline void * +rte_memcpy_aligned(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* Copy size <= 16 bytes */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + src = (const uint8_t *)src + 1; + dst = (uint8_t *)dst + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + src = (const uint16_t *)src + 1; + dst = (uint16_t *)dst + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + src = (const uint32_t *)src + 1; + dst = (uint32_t *)dst + 1; + } + if (n & 0x08) + *(uint64_t *)dst = *(const uint64_t *)src; + + return ret; + } + + /* Copy 16 <= size <= 32 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + + return ret; + } + + /* Copy 32 < size <= 64 bytes */ + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + + return ret; + } + + /* Copy 64 bytes blocks */ + for (; n >= 64; n -= 64) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; + } + + /* Copy whatever left */ + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + + return ret; +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK)) + return rte_memcpy_aligned(dst, src, n); + else + return rte_memcpy_generic(dst, src, n); +} + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h index 5dac47eb..f464398f 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h @@ -38,6 +38,7 @@ extern "C" { #endif +#include <rte_common.h> #include "generic/rte_prefetch.h" static inline void rte_prefetch0(const volatile void *p) diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h index 0649f794..ab099952 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -20,6 +20,7 @@ /* Official RTM intrinsics interface matching gcc/icc, but works on older gcc compatible compilers and binutils. */ +#include <rte_common.h> #ifdef __cplusplus extern "C" { diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h index b698797c..1b4b85dd 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -31,8 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RTE_VECT_H_ -#define _RTE_VECT_H_ +#ifndef _RTE_VECT_X86_H_ +#define _RTE_VECT_X86_H_ /** * @file @@ -40,6 +40,9 @@ * RTE SSE/AVX related header. */ +#include <stdint.h> +#include "generic/rte_vect.h" + #if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) #ifdef __SSE__ @@ -106,7 +109,8 @@ typedef union rte_ymm { #endif /* __AVX__ */ #ifdef RTE_ARCH_I686 -#define _mm_cvtsi128_si64(a) ({ \ +#define _mm_cvtsi128_si64(a) \ +__extension__ ({ \ rte_xmm_t m; \ m.x = (a); \ (m.u64[0]); \ @@ -117,7 +121,8 @@ typedef union rte_ymm { * Prior to version 12.1 icc doesn't support _mm_set_epi64x. */ #if (defined(__ICC) && __ICC < 1210) -#define _mm_set_epi64x(a, b) ({ \ +#define _mm_set_epi64x(a, b) \ +__extension__ ({ \ rte_xmm_t m; \ m.u64[0] = b; \ m.u64[1] = a; \ @@ -129,4 +134,4 @@ typedef union rte_ymm { } #endif -#endif /* _RTE_VECT_H_ */ +#endif /* _RTE_VECT_X86_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h index bfb4fe44..7b81705b 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h @@ -42,6 +42,7 @@ */ #include <stdint.h> +#include <rte_common.h> #ifdef __DOXYGEN__ @@ -99,6 +100,33 @@ static inline void rte_smp_wmb(void); */ static inline void rte_smp_rmb(void); +/** + * General memory barrier for I/O device + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_io_mb() call are visible to I/O device or CPU before the + * LOAD and STORE operations that follow it. + */ +static inline void rte_io_mb(void); + +/** + * Write memory barrier for I/O device + * + * Guarantees that the STORE operations that precede the + * rte_io_wmb() call are visible to I/O device before the STORE + * operations that follow it. + */ +static inline void rte_io_wmb(void); + +/** + * Read memory barrier for IO device + * + * Guarantees that the LOAD operations on I/O device that precede the + * rte_io_rmb() call are visible to CPU before the LOAD + * operations that follow it. + */ +static inline void rte_io_rmb(void); + #endif /* __DOXYGEN__ */ /** diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h index c46fdcf2..e00bccbc 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -50,6 +50,8 @@ #include <endian.h> #endif +#include <rte_common.h> + /* * Compile-time endianness detection */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h index c1da357c..71321f32 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -44,6 +44,7 @@ /** * Enumeration of all CPU features supported */ +__extension__ enum rte_cpu_flag_t; /** @@ -55,6 +56,7 @@ enum rte_cpu_flag_t; * flag name * NULL if flag ID is invalid */ +__extension__ const char * rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); @@ -68,6 +70,7 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); * 0 if flag is not available * -ENOENT if flag is invalid */ +__extension__ int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h index 8cc21f20..00103ca9 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h @@ -180,15 +180,16 @@ rte_get_timer_hz(void) default: rte_panic("Invalid timer source specified\n"); } } - /** * Wait at least us microseconds. + * This function can be replaced with user-defined function. + * @see rte_delay_us_callback_register * * @param us * The number of microseconds to wait. */ -void -rte_delay_us(unsigned us); +extern void +(*rte_delay_us)(unsigned int us); /** * Wait at least ms milliseconds. @@ -202,4 +203,21 @@ rte_delay_ms(unsigned ms) rte_delay_us(ms * 1000); } +/** + * Blocking delay function. + * + * @param us + * Number of microseconds to wait. + */ +void rte_delay_us_block(unsigned int us); + +/** + * Replace rte_delay_us with user defined function. + * + * @param userfunc + * User function which replaces rte_delay_us. rte_delay_us_block restores + * buildin block delay function. + */ +void rte_delay_us_callback_register(void(*userfunc)(unsigned int)); + #endif /* _RTE_CYCLES_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h new file mode 100644 index 00000000..d82ee695 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h @@ -0,0 +1,381 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_H_ +#define _RTE_IO_H_ + +#include <rte_atomic.h> + +/** + * @file + * I/O device memory operations + * + * This file defines the generic API for I/O device memory read/write operations + */ + +#include <stdint.h> +#include <rte_common.h> +#include <rte_atomic.h> + +#ifdef __DOXYGEN__ + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8_relaxed(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16_relaxed(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32_relaxed(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64_relaxed(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8_relaxed(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16_relaxed(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32_relaxed(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64_relaxed(uint64_t value, volatile void *addr); + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64(uint64_t value, volatile void *addr); + +#endif /* __DOXYGEN__ */ + +#ifndef RTE_OVERRIDE_IO_H + +static inline uint8_t __attribute__((always_inline)) +rte_read8_relaxed(const volatile void *addr) +{ + return *(const volatile uint8_t *)addr; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16_relaxed(const volatile void *addr) +{ + return *(const volatile uint16_t *)addr; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32_relaxed(const volatile void *addr) +{ + return *(const volatile uint32_t *)addr; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64_relaxed(const volatile void *addr) +{ + return *(const volatile uint64_t *)addr; +} + +static inline void __attribute__((always_inline)) +rte_write8_relaxed(uint8_t value, volatile void *addr) +{ + *(volatile uint8_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write16_relaxed(uint16_t value, volatile void *addr) +{ + *(volatile uint16_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write32_relaxed(uint32_t value, volatile void *addr) +{ + *(volatile uint32_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write64_relaxed(uint64_t value, volatile void *addr) +{ + *(volatile uint64_t *)addr = value; +} + +static inline uint8_t __attribute__((always_inline)) +rte_read8(const volatile void *addr) +{ + uint8_t val; + val = rte_read8_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16(const volatile void *addr) +{ + uint16_t val; + val = rte_read16_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32(const volatile void *addr) +{ + uint32_t val; + val = rte_read32_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64(const volatile void *addr) +{ + uint64_t val; + val = rte_read64_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8(uint8_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write8_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write16(uint16_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write16_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write32(uint32_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write32_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write64(uint64_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write64_relaxed(value, addr); +} + +#endif /* RTE_OVERRIDE_IO_H */ + +#endif /* _RTE_IO_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h index afb0afe4..4e9d8794 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -64,6 +64,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src); static inline void rte_mov32(uint8_t *dst, const uint8_t *src); +#ifdef __DOXYGEN__ + /** * Copy 48 bytes from one location to another using optimised * instructions. The locations should not overlap. @@ -76,6 +78,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src); static inline void rte_mov48(uint8_t *dst, const uint8_t *src); +#endif /* __DOXYGEN__ */ + /** * Copy 64 bytes from one location to another using optimised * instructions. The locations should not overlap. diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h new file mode 100644 index 00000000..600ee9f3 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h @@ -0,0 +1,214 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * SIMD vector types + * + * This file defines types to use vector instructions with generic C code. + */ + +#include <stdint.h> + +/* Unsigned vector types */ + +/** + * 64 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v64u16_t){ a0, a1, a2, a3 } + */ +typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v64u32_t){ a0, a1 } + */ +typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v128u32_t){ a0, a1, a2, a3, a4 } + */ +typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v128u64_t){ a0, a1 } + */ +typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v256u64_t){ a0, a1, a2, a3 } + */ +typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32))); + + +/* Signed vector types */ + +/** + * 64 bits vector size to use with 8 bits elements. + * + * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 16 bits elements. + * + * a = (rte_v64s16_t){ a0, a1, a2, a3 } + */ +typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 32 bits elements. + * + * a = (rte_v64s32_t){ a0, a1 } + */ +typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with 8 bits elements. + * + * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 16 bits elements. + * + * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 32 bits elements. + * + * a = (rte_v128s32_t){ a0, a1, a2, a3 } + */ +typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 64 bits elements. + * + * a = (rte_v128s64_t){ a1, a2 } + */ +typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with 8 bits elements. + * + * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 16 bits elements. + * + * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 32 bits elements. + * + * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 64 bits elements. + * + * a = (rte_v256s64_t){ a0, a1, a2, a3 } + */ +typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32))); + +#endif /* _RTE_VECT_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_bus.h b/src/dpdk/lib/librte_eal/common/include/rte_bus.h new file mode 100644 index 00000000..7c369692 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/rte_bus.h @@ -0,0 +1,158 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BUS_H_ +#define _RTE_BUS_H_ + +/** + * @file + * + * DPDK device bus interface + * + * This file exposes API and interfaces for bus abstraction + * over the devices and drivers in EAL. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_dev.h> + +/** Double linked list of buses */ +TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * Bus specific scan for devices attached on the bus. + * For each bus object, the scan would be reponsible for finding devices and + * adding them to its private device list. + * + * A bus should mandatorily implement this method. + * + * @return + * 0 for successful scan + * <0 for unsuccessful scan with error value + */ +typedef int (*rte_bus_scan_t)(void); + +/** + * Implementation specific probe function which is responsible for linking + * devices on that bus with applicable drivers. + * + * This is called while iterating over each registered bus. + * + * @return + * 0 for successful probe + * !0 for any error while probing + */ +typedef int (*rte_bus_probe_t)(void); + +/** + * A structure describing a generic bus. + */ +struct rte_bus { + TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */ + const char *name; /**< Name of the bus */ + rte_bus_scan_t scan; /**< Scan for devices attached to bus */ + rte_bus_probe_t probe; /**< Probe devices on bus */ +}; + +/** + * Register a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be registered. + */ +void rte_bus_register(struct rte_bus *bus); + +/** + * Unregister a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be unregistered. + */ +void rte_bus_unregister(struct rte_bus *bus); + +/** + * Scan all the buses. + * + * @return + * 0 in case of success in scanning all buses + * !0 in case of failure to scan + */ +int rte_bus_scan(void); + +/** + * For each device on the buses, perform a driver 'match' and call the + * driver-specific probe for device initialization. + * + * @return + * 0 for successful match/probe + * !0 otherwise + */ +int rte_bus_probe(void); + +/** + * Dump information of all the buses registered with EAL. + * + * @param f + * A valid and open output stream handle + * + * @return + * 0 in case of success + * !0 in case there is error in opening the output stream + */ +void rte_bus_dump(FILE *f); + +/** + * Helper for Bus registration. + * The constructor has higher priority than PMD constructors. + */ +#define RTE_REGISTER_BUS(nm, bus) \ +static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \ +{\ + (bus).name = RTE_STR(nm);\ + rte_bus_register(&bus); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BUS_H */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_common.h b/src/dpdk/lib/librte_eal/common/include/rte_common.h index 332f2a43..8dda3e29 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_common.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_common.h @@ -59,6 +59,13 @@ extern "C" { #define asm __asm__ #endif +/** C extension macro for environments lacking C11 features. */ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define RTE_STD_C11 __extension__ +#else +#define RTE_STD_C11 +#endif + #ifdef RTE_ARCH_STRICT_ALIGN typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); @@ -268,7 +275,8 @@ rte_align64pow2(uint64_t v) /** * Macro to return the minimum of two numbers */ -#define RTE_MIN(a, b) ({ \ +#define RTE_MIN(a, b) \ + __extension__ ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a < _b ? _a : _b; \ @@ -277,7 +285,8 @@ rte_align64pow2(uint64_t v) /** * Macro to return the maximum of two numbers */ -#define RTE_MAX(a, b) ({ \ +#define RTE_MAX(a, b) \ + __extension__ ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a > _b ? _a : _b; \ @@ -322,10 +331,39 @@ rte_bsf32(uint32_t v) #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #endif +/** + * Return pointer to the wrapping struct instance. + * + * Example: + * + * struct wrapper { + * ... + * struct child c; + * ... + * }; + * + * struct child *x = obtain(...); + * struct wrapper *w = container_of(x, struct wrapper, c); + */ +#ifndef container_of +#define container_of(ptr, type, member) __extension__ ({ \ + typeof(((type *)0)->member) *_ptr = (ptr); \ + (type *)(((char *)_ptr) - offsetof(type, member)); }) +#endif + #define _RTE_STR(x) #x /** Take a macro value and get a string version of it */ #define RTE_STR(x) _RTE_STR(x) +/** + * ISO C helpers to modify format strings using variadic macros. + * This is a replacement for the ", ## __VA_ARGS__" GNU extension. + * An empty %s argument is appended to avoid a dangling comma. + */ +#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ "" +#define RTE_FMT_HEAD(fmt, ...) fmt +#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__ + /** Mask value of type "tp" for the first "ln" bit set. */ #define RTE_LEN2MASK(ln, tp) \ ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln)))) diff --git a/src/dpdk/lib/librte_eal/common/include/rte_dev.h b/src/dpdk/lib/librte_eal/common/include/rte_dev.h index 95789f9d..b17791f5 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_dev.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_dev.h @@ -100,37 +100,56 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) } \ } while (0) +/** + * A generic memory resource representation. + */ +struct rte_mem_resource { + uint64_t phys_addr; /**< Physical address, 0 if not resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; /** Double linked list of device drivers. */ TAILQ_HEAD(rte_driver_list, rte_driver); +/** Double linked list of devices. */ +TAILQ_HEAD(rte_device_list, rte_device); + +/* Forward declaration */ +struct rte_driver; /** - * Initialization function called for each device driver once. + * A structure describing a generic device. */ -typedef int (rte_dev_init_t)(const char *name, const char *args); +struct rte_device { + TAILQ_ENTRY(rte_device) next; /**< Next device */ + const struct rte_driver *driver;/**< Associated driver */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ +}; /** - * Uninitilization function called for each device driver once. + * Insert a device detected by a bus scanning. + * + * @param dev + * A pointer to a rte_device structure describing the detected device. */ -typedef int (rte_dev_uninit_t)(const char *name); +void rte_eal_device_insert(struct rte_device *dev); /** - * Driver type enumeration + * Remove a device (e.g. when being unplugged). + * + * @param dev + * A pointer to a rte_device structure describing the device to be removed. */ -enum pmd_type { - PMD_VDEV = 0, - PMD_PDEV = 1, -}; +void rte_eal_device_remove(struct rte_device *dev); /** * A structure describing a device driver. */ struct rte_driver { TAILQ_ENTRY(rte_driver) next; /**< Next in list. */ - enum pmd_type type; /**< PMD Driver type */ const char *name; /**< Driver name. */ - rte_dev_init_t *init; /**< Device init. function. */ - rte_dev_uninit_t *uninit; /**< Device uninit. function. */ + const char *alias; /**< Driver alias. */ }; /** @@ -178,31 +197,73 @@ int rte_eal_vdev_init(const char *name, const char *args); */ int rte_eal_vdev_uninit(const char *name); -#define DRIVER_EXPORT_NAME_ARRAY(n, idx) n##idx[] +/** + * Attach a device to a registered driver. + * + * @param name + * The device name, that refers to a pci device (or some private + * way of designating a vdev device). Based on this device name, eal + * will identify a driver capable of handling it and pass it to the + * driver probing function. + * @param devargs + * Device arguments to be passed to the driver. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_attach(const char *name, const char *devargs); -#define DRIVER_EXPORT_NAME(name, idx) \ -static const char DRIVER_EXPORT_NAME_ARRAY(this_pmd_name, idx) \ -__attribute__((used)) = RTE_STR(name) +/** + * Detach a device from its driver. + * + * @param name + * Same description as for rte_eal_dev_attach(). + * Here, eal will call the driver detaching function. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_detach(const char *name); -#define PMD_REGISTER_DRIVER(drv, nm)\ -void devinitfn_ ##drv(void);\ -void __attribute__((constructor, used)) devinitfn_ ##drv(void)\ -{\ - (drv).name = RTE_STR(nm);\ - rte_eal_driver_register(&drv);\ -} \ -DRIVER_EXPORT_NAME(nm, __COUNTER__) +#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[] + +#define RTE_PMD_EXPORT_NAME(name, idx) \ +static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \ +__attribute__((used)) = RTE_STR(name) #define DRV_EXP_TAG(name, tag) __##name##_##tag -#define DRIVER_REGISTER_PCI_TABLE(name, table) \ +#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \ static const char DRV_EXP_TAG(name, pci_tbl_export)[] __attribute__((used)) = \ RTE_STR(table) -#define DRIVER_REGISTER_PARAM_STRING(name, str) \ +#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \ static const char DRV_EXP_TAG(name, param_string_export)[] \ __attribute__((used)) = str +/** + * Advertise the list of kernel modules required to run this driver + * + * This string lists the kernel modules required for the devices + * associated to a PMD. The format of each line of the string is: + * "<device-pattern> <kmod-expression>". + * + * The possible formats for the device pattern are: + * "*" all devices supported by this driver + * "pci:*" all PCI devices supported by this driver + * "pci:v8086:d*:sv*:sd*" all PCI devices supported by this driver + * whose vendor id is 0x8086. + * + * The format of the kernel modules list is a parenthesed expression + * containing logical-and (&) and logical-or (|). + * + * The device pattern and the kmod expression are separated by a space. + * + * Example: + * - "* igb_uio | uio_pci_generic | vfio" + */ +#define RTE_PMD_REGISTER_KMOD_DEP(name, str) \ +static const char DRV_EXP_TAG(name, kmod_dep_export)[] \ +__attribute__((used)) = str + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_eal/common/include/rte_devargs.h b/src/dpdk/lib/librte_eal/common/include/rte_devargs.h index 53c59f56..88120a1c 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_devargs.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_devargs.h @@ -76,6 +76,7 @@ struct rte_devargs { TAILQ_ENTRY(rte_devargs) next; /** Type of device. */ enum rte_devtype type; + RTE_STD_C11 union { /** Used if type is RTE_DEVTYPE_*_PCI. */ struct { @@ -106,8 +107,8 @@ extern struct rte_devargs_list devargs_list; * "04:00.0,arg=val". * * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", - * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", + * "net_ring0", "net_pmdAnything,arg=0:arg2=1". * * The function parses the arguments string to get driver name and driver * arguments. @@ -134,8 +135,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str, * "04:00.0,arg=val". * * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", - * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", + * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the * driver name is not checked by this function, it is done when probing * the drivers. * diff --git a/src/dpdk/lib/librte_eal/common/include/rte_eal.h b/src/dpdk/lib/librte_eal/common/include/rte_eal.h index a71d6f57..03fee500 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_eal.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_eal.h @@ -44,6 +44,7 @@ #include <sched.h> #include <rte_per_lcore.h> +#include <rte_config.h> #ifdef __cplusplus extern "C" { @@ -145,14 +146,19 @@ int rte_eal_iopl_init(void); * This behavior may change in the future. * * @param argc - * The argc argument that was given to the main() function. + * A non-negative value. If it is greater than 0, the array members + * for argv[0] through argv[argc] (non-inclusive) shall contain pointers + * to strings. * @param argv - * The argv argument that was given to the main() function. + * An array of strings. The contents of the array, as well as the strings + * which are pointed to by the array, may be modified by this function. * @return * - On success, the number of parsed arguments, which is greater or * equal to zero. After the call to rte_eal_init(), - * all arguments argv[x] with x < ret may be modified and should - * not be accessed by the application. + * all arguments argv[x] with x < ret may have been modified by this + * function call and should not be further interpreted by the + * application. The EAL does not take any ownership of the memory used + * for either the argv array, or its members. * - On failure, a negative error value. */ int rte_eal_init(int argc, char **argv); @@ -252,6 +258,9 @@ static inline int rte_gettid(void) return RTE_PER_LCORE(_thread_id); } +#define RTE_INIT(func) \ +static void __attribute__((constructor, used)) func(void) + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h b/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h index ff11ef3a..6cade018 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h @@ -34,6 +34,8 @@ #ifndef _RTE_INTERRUPTS_H_ #define _RTE_INTERRUPTS_H_ +#include <rte_common.h> + /** * @file * @@ -68,7 +70,7 @@ typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_callback_register(struct rte_intr_handle *intr_handle, +int rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -86,7 +88,7 @@ int rte_intr_callback_register(struct rte_intr_handle *intr_handle, * - On success, return the number of callback entities removed. * - On failure, a negative value. */ -int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -99,7 +101,7 @@ int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_enable(struct rte_intr_handle *intr_handle); +int rte_intr_enable(const struct rte_intr_handle *intr_handle); /** * It disables the interrupt for the specified handle. @@ -111,7 +113,7 @@ int rte_intr_enable(struct rte_intr_handle *intr_handle); * - On success, zero. * - On failure, a negative value. */ -int rte_intr_disable(struct rte_intr_handle *intr_handle); +int rte_intr_disable(const struct rte_intr_handle *intr_handle); #ifdef __cplusplus } diff --git a/src/dpdk/lib/librte_eal/common/include/rte_log.h b/src/dpdk/lib/librte_eal/common/include/rte_log.h index b1add04c..954b96cf 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_log.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_log.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,8 +42,6 @@ * This file provides a log API to RTE applications. */ -#include "rte_common.h" /* for __rte_deprecated macro */ - #ifdef __cplusplus extern "C" { #endif @@ -56,7 +54,7 @@ extern "C" { struct rte_logs { uint32_t type; /**< Bitfield with enabled logs. */ uint32_t level; /**< Log level. */ - FILE *file; /**< Pointer to current FILE* for logs. */ + FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */ }; /** Global log informations */ @@ -81,6 +79,7 @@ extern struct rte_logs rte_logs; #define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ #define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ #define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ +#define RTE_LOGTYPE_EFD 0x00040000 /**< Log related to EFD. */ /* these log types can be used in an application */ #define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ @@ -102,9 +101,6 @@ extern struct rte_logs rte_logs; #define RTE_LOG_INFO 7U /**< Informational. */ #define RTE_LOG_DEBUG 8U /**< Debug-level messages. */ -/** The default log stream. */ -extern FILE *eal_default_log_stream; - /** * Change the stream that will be used by the logging system. * @@ -123,9 +119,8 @@ int rte_openlog_stream(FILE *f); /** * Set the global log level. * - * After this call, all logs that are lower or equal than level and - * lower or equal than the RTE_LOG_LEVEL configuration option will be - * displayed. + * After this call, logs with a level lower or equal than the level + * passed as argument will be displayed. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -181,45 +176,6 @@ int rte_log_cur_msg_loglevel(void); int rte_log_cur_msg_logtype(void); /** - * @deprecated - * Enable or disable the history (enabled by default) - * - * @param enable - * true to enable, or 0 to disable history. - */ -__rte_deprecated -void rte_log_set_history(int enable); - -/** - * @deprecated - * Dump the log history to a file - * - * @param f - * A pointer to a file for output - */ -__rte_deprecated -void rte_log_dump_history(FILE *f); - -/** - * @deprecated - * Add a log message to the history. - * - * This function can be called from a user-defined log stream. It adds - * the given message in the history that can be dumped using - * rte_log_dump_history(). - * - * @param buf - * A data buffer containing the message to be saved in the history. - * @param size - * The length of the data buffer. - * @return - * - 0: Success. - * - (-ENOBUFS) if there is no room to store the message. - */ -__rte_deprecated -int rte_log_add_in_history(const char *buf, size_t size); - -/** * Generates a log message. * * The message will be sent in the stream defined by the previous call @@ -228,9 +184,8 @@ int rte_log_add_in_history(const char *buf, size_t size); * The level argument determines if the log should be displayed or * not, depending on the global rte_logs variable. * - * The preferred alternative is the RTE_LOG() function because debug logs may - * be removed at compilation time if optimization is enabled. Moreover, - * logs are automatically prefixed by type when using the macro. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -261,8 +216,8 @@ int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) * not, depending on the global rte_logs variable. A trailing * newline may be added if needed. * - * The preferred alternative is the RTE_LOG() because debug logs may be - * removed at compilation time. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -283,15 +238,8 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) /** * Generates a log message. * - * The RTE_LOG() is equivalent to rte_log() with two differences: - - * - RTE_LOG() can be used to remove debug logs at compilation time, - * depending on RTE_LOG_LEVEL configuration option, and compilation - * optimization level. If optimization is enabled, the tests - * involving constants only are pre-computed. If compilation is done - * with -O0, these tests will be done at run time. - * - The log level and log type names are smaller, for example: - * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * The RTE_LOG() is a helper that prefixes the string with the log level + * and type, and call rte_log(). * * @param l * Log level. A value between EMERG (1) and DEBUG (8). The short name is @@ -307,7 +255,31 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) * - Negative on error. */ #define RTE_LOG(l, t, ...) \ - (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) + +/** + * Generates a log message for data path. + * + * Similar to RTE_LOG(), except that it is removed at compilation time + * if the RTE_LOG_DP_LEVEL configuration option is lower than the log + * level argument. + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG_DP(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ? \ rte_log(RTE_LOG_ ## l, \ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ 0) diff --git a/src/dpdk/lib/librte_eal/common/include/rte_malloc.h b/src/dpdk/lib/librte_eal/common/include/rte_malloc.h index 74bb78c7..008ce134 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_malloc.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_malloc.h @@ -294,7 +294,7 @@ rte_malloc_get_socket_stats(int socket, /** * Dump statistics. * - * Dump for the specified type to the console. If the type argument is + * Dump for the specified type to a file. If the type argument is * NULL, all memory types will be dumped. * * @param f diff --git a/src/dpdk/lib/librte_eal/common/include/rte_memory.h b/src/dpdk/lib/librte_eal/common/include/rte_memory.h index 06611093..4aa5d1f7 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_memory.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_memory.h @@ -44,6 +44,8 @@ #include <stddef.h> #include <stdio.h> +#include <rte_config.h> + #ifdef RTE_EXEC_ENV_LINUXAPP #include <exec-env/rte_dom0_common.h> #endif @@ -54,6 +56,7 @@ extern "C" { #include <rte_common.h> +__extension__ enum rte_page_sizes { RTE_PGSIZE_4K = 1ULL << 12, RTE_PGSIZE_64K = 1ULL << 16, @@ -103,13 +106,11 @@ typedef uint64_t phys_addr_t; /**< Physical address definition. */ */ struct rte_memseg { phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64 bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the segment. */ uint64_t hugepage_sz; /**< The pagesize of underlying memory */ int32_t socket_id; /**< NUMA socket ID. */ @@ -161,7 +162,7 @@ phys_addr_t rte_mem_virt2phy(const void *virt); const struct rte_memseg *rte_eal_get_physmem_layout(void); /** - * Dump the physical memory layout to the console. + * Dump the physical memory layout to a file. * * @param f * A pointer to a file for output diff --git a/src/dpdk/lib/librte_eal/common/include/rte_memzone.h b/src/dpdk/lib/librte_eal/common/include/rte_memzone.h index f69b5a87..1d0827f4 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_memzone.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_memzone.h @@ -53,6 +53,7 @@ #include <stdio.h> #include <rte_memory.h> +#include <rte_common.h> #ifdef __cplusplus extern "C" { @@ -78,13 +79,11 @@ struct rte_memzone { char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64-bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the memzone. */ uint64_t hugepage_sz; /**< The page size of underlying memory */ @@ -256,12 +255,10 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name, /** * Free a memzone. * - * Note: an IVSHMEM zone cannot be freed. - * * @param mz * A pointer to the memzone * @return - * -EINVAL - invalid parameter, IVSHMEM memzone. + * -EINVAL - invalid parameter. * 0 - success */ int rte_memzone_free(const struct rte_memzone *mz); @@ -280,7 +277,7 @@ int rte_memzone_free(const struct rte_memzone *mz); const struct rte_memzone *rte_memzone_lookup(const char *name); /** - * Dump all reserved memzones to the console. + * Dump all reserved memzones to a file. * * @param f * A pointer to a file for output diff --git a/src/dpdk/lib/librte_eal/common/include/rte_pci.h b/src/dpdk/lib/librte_eal/common/include/rte_pci.h index fa749626..8557e477 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_pci.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_pci.h @@ -82,7 +82,9 @@ extern "C" { #include <stdint.h> #include <inttypes.h> +#include <rte_debug.h> #include <rte_interrupts.h> +#include <rte_dev.h> TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ @@ -95,6 +97,7 @@ const char *pci_get_sysfs_path(void); /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 +#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X") /** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ #define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 @@ -105,15 +108,6 @@ const char *pci_get_sysfs_path(void); /** Nb. of values in PCI resource format. */ #define PCI_RESOURCE_FMT_NVAL 3 -/** - * A structure describing a PCI resource. - */ -struct rte_pci_resource { - uint64_t phys_addr; /**< Physical address, 0 if no resource. */ - uint64_t len; /**< Length of the resource. */ - void *addr; /**< Virtual address, NULL when not mapped. */ -}; - /** Maximum number of PCI resources. */ #define PCI_MAX_RESOURCE 6 @@ -155,17 +149,23 @@ enum rte_kernel_driver { */ struct rte_pci_device { TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_device device; /**< Inherit core device */ struct rte_pci_addr addr; /**< PCI location. */ struct rte_pci_id id; /**< PCI ID. */ - struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ + struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; + /**< PCI Memory Resource */ struct rte_intr_handle intr_handle; /**< Interrupt handle */ struct rte_pci_driver *driver; /**< Associated driver */ uint16_t max_vfs; /**< sriov enable if not zero */ - int numa_node; /**< NUMA node connection */ - struct rte_devargs *devargs; /**< Device user arguments */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ }; +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_pci_device. + */ +#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) + /** Any PCI device identifier (vendor, device, ...) */ #define PCI_ANY_ID (0xffff) #define RTE_CLASS_ANY_ID (0xffffff) @@ -193,33 +193,29 @@ struct rte_pci_driver; /** * Initialisation function for the driver called during PCI probing. */ -typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *); +typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); /** * Uninitialisation function for the driver called during hotplugging. */ -typedef int (pci_devuninit_t)(struct rte_pci_device *); +typedef int (pci_remove_t)(struct rte_pci_device *); /** * A structure describing a PCI driver. */ struct rte_pci_driver { TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ - const char *name; /**< Driver name. */ - pci_devinit_t *devinit; /**< Device init. function. */ - pci_devuninit_t *devuninit; /**< Device uninit function. */ + struct rte_driver driver; /**< Inherit core driver. */ + pci_probe_t *probe; /**< Device Probe function. */ + pci_remove_t *remove; /**< Device Remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ uint32_t drv_flags; /**< Flags contolling handling of device. */ }; /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ #define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device needs to be unbound even if no module is provided */ -#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 /** Device driver supports link state interrupt */ #define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports detaching capability */ -#define RTE_PCI_DRV_DETACHABLE 0x0010 /** * A structure describing a PCI mapping. @@ -308,6 +304,28 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) } #undef GET_PCIADDR_FIELD +/** + * Utility function to write a pci device name, this device name can later be + * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* + * BDF helpers. + * + * @param addr + * The PCI Bus-Device-Function address + * @param output + * The output buffer string + * @param size + * The output buffer size + */ +static inline void +rte_eal_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size) +{ + RTE_VERIFY(size >= PCI_PRI_STR_SIZE); + RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, + addr->domain, addr->bus, + addr->devid, addr->function) >= 0); +} + /* Compare two PCI device addresses. */ /** * Utility function to compare two PCI device addresses. @@ -442,7 +460,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); * Close the single PCI device. * * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the devuninit() function for registered driver that has a + * address, then call the remove() function for registered driver that has a * matching entry in its id_table for discovered device. * * @param addr @@ -470,6 +488,16 @@ void rte_eal_pci_dump(FILE *f); */ void rte_eal_pci_register(struct rte_pci_driver *driver); +/** Helper for PCI device registration from driver (eth, crypto) instance */ +#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ +RTE_INIT(pciinitfn_ ##nm); \ +static void pciinitfn_ ##nm(void) \ +{\ + (pci_drv).driver.name = RTE_STR(nm);\ + rte_eal_pci_register(&pci_drv); \ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + /** * Unregister a PCI driver. * diff --git a/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h b/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h deleted file mode 100644 index 6ec8ae8c..00000000 --- a/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h +++ /dev/null @@ -1,326 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef RTE_PCI_DEV_ID_DECL_IGB -#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IGBVF -#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IXGBE -#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF -#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) -#endif - -#ifndef PCI_VENDOR_ID_INTEL -/** Vendor ID used by Intel devices */ -#define PCI_VENDOR_ID_INTEL 0x8086 -#endif - -/******************** Physical IGB devices from e1000_hw.h ********************/ - -#define E1000_DEV_ID_82576 0x10C9 -#define E1000_DEV_ID_82576_FIBER 0x10E6 -#define E1000_DEV_ID_82576_SERDES 0x10E7 -#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 -#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 -#define E1000_DEV_ID_82576_NS 0x150A -#define E1000_DEV_ID_82576_NS_SERDES 0x1518 -#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D -#define E1000_DEV_ID_82575EB_COPPER 0x10A7 -#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 -#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 -#define E1000_DEV_ID_82580_COPPER 0x150E -#define E1000_DEV_ID_82580_FIBER 0x150F -#define E1000_DEV_ID_82580_SERDES 0x1510 -#define E1000_DEV_ID_82580_SGMII 0x1511 -#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 -#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 -#define E1000_DEV_ID_I350_COPPER 0x1521 -#define E1000_DEV_ID_I350_FIBER 0x1522 -#define E1000_DEV_ID_I350_SERDES 0x1523 -#define E1000_DEV_ID_I350_SGMII 0x1524 -#define E1000_DEV_ID_I350_DA4 0x1546 -#define E1000_DEV_ID_I210_COPPER 0x1533 -#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 -#define E1000_DEV_ID_I210_COPPER_IT 0x1535 -#define E1000_DEV_ID_I210_FIBER 0x1536 -#define E1000_DEV_ID_I210_SERDES 0x1537 -#define E1000_DEV_ID_I210_SGMII 0x1538 -#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B -#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C -#define E1000_DEV_ID_I211_COPPER 0x1539 -#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 -#define E1000_DEV_ID_I354_SGMII 0x1F41 -#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 -#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 -#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A -#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C -#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) - -/****************** Physical IXGBE devices from ixgbe_type.h ******************/ - -#define IXGBE_DEV_ID_82598 0x10B6 -#define IXGBE_DEV_ID_82598_BX 0x1508 -#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 -#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 -#define IXGBE_DEV_ID_82598AT 0x10C8 -#define IXGBE_DEV_ID_82598AT2 0x150B -#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB -#define IXGBE_DEV_ID_82598EB_CX4 0x10DD -#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC -#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 -#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 -#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 -#define IXGBE_DEV_ID_82599_KX4 0x10F7 -#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 -#define IXGBE_DEV_ID_82599_KR 0x1517 -#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 -#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C -#define IXGBE_DEV_ID_82599_CX4 0x10F9 -#define IXGBE_DEV_ID_82599_SFP 0x10FB -#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 -#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 -#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 -#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 -#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A -#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 -#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 -#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D -#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A -#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 -#define IXGBE_DEV_ID_82599EN_SFP 0x1557 -#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC -#define IXGBE_DEV_ID_82599_T3_LOM 0x151C -#define IXGBE_DEV_ID_82599_LS 0x154F -#define IXGBE_DEV_ID_X540T 0x1528 -#define IXGBE_DEV_ID_X540T1 0x1560 -#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC -#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD -#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE -#define IXGBE_DEV_ID_X550T 0x1563 -#define IXGBE_DEV_ID_X550T1 0x15D1 -#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2 -#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3 -#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 -#define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6 -#define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7 -#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 -#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA -#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC -#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE -#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15E4 -#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15E5 -#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA -#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB - -#ifdef RTE_NIC_BYPASS -#define IXGBE_DEV_ID_82599_BYPASS 0x155D -#endif - -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82598AF_SINGLE_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82599_COMBO_BACKPLANE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) - -#ifdef RTE_NIC_BYPASS -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) -#endif - -/****************** Virtual IGB devices from e1000_hw.h ******************/ - -#define E1000_DEV_ID_82576_VF 0x10CA -#define E1000_DEV_ID_82576_VF_HV 0x152D -#define E1000_DEV_ID_I350_VF 0x1520 -#define E1000_DEV_ID_I350_VF_HV 0x152F - -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV) - -/****************** Virtual IXGBE devices from ixgbe_type.h ******************/ - -#define IXGBE_DEV_ID_82599_VF 0x10ED -#define IXGBE_DEV_ID_82599_VF_HV 0x152E -#define IXGBE_DEV_ID_X540_VF 0x1515 -#define IXGBE_DEV_ID_X540_VF_HV 0x1530 -#define IXGBE_DEV_ID_X550_VF_HV 0x1564 -#define IXGBE_DEV_ID_X550_VF 0x1565 -#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 -#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4 -#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 -#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 - -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV) - -/* - * Undef all RTE_PCI_DEV_ID_DECL_* here. - */ -#undef RTE_PCI_DEV_ID_DECL_IGB -#undef RTE_PCI_DEV_ID_DECL_IGBVF -#undef RTE_PCI_DEV_ID_DECL_IXGBE -#undef RTE_PCI_DEV_ID_DECL_IXGBEVF diff --git a/src/dpdk/lib/librte_eal/common/include/rte_tailq.h b/src/dpdk/lib/librte_eal/common/include/rte_tailq.h index cc3c0f1d..3aae098a 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_tailq.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_tailq.h @@ -107,7 +107,7 @@ struct rte_tailq_elem { RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) /** - * Dump tail queues to the console. + * Dump tail queues to a file. * * @param f * A pointer to a file for output @@ -148,8 +148,8 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); int rte_eal_tailq_register(struct rte_tailq_elem *t); #define EAL_REGISTER_TAILQ(t) \ -void tailqinitfn_ ##t(void); \ -void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \ +RTE_INIT(tailqinitfn_ ##t); \ +static void tailqinitfn_ ##t(void) \ { \ if (rte_eal_tailq_register(&t) < 0) \ rte_panic("Cannot initialize tailq: %s\n", t.name); \ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_time.h b/src/dpdk/lib/librte_eal/common/include/rte_time.h index 4b13b9c1..28c6274c 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_time.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_time.h @@ -31,6 +31,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_TIME_H_ +#define _RTE_TIME_H_ + +#include <stdint.h> +#include <time.h> + #define NSEC_PER_SEC 1000000000L /** @@ -120,3 +126,5 @@ rte_ns_to_timespec(uint64_t nsec) return ts; } + +#endif /* _RTE_TIME_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_vdev.h b/src/dpdk/lib/librte_eal/common/include/rte_vdev.h new file mode 100644 index 00000000..784e837d --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/rte_vdev.h @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_VDEV_H +#define RTE_VDEV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/queue.h> +#include <rte_dev.h> + +/** Double linked list of virtual device drivers. */ +TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); + +/** + * Probe function called for each virtual device driver once. + */ +typedef int (rte_vdev_probe_t)(const char *name, const char *args); + +/** + * Remove function called for each virtual device driver once. + */ +typedef int (rte_vdev_remove_t)(const char *name); + +/** + * A virtual device driver abstraction. + */ +struct rte_vdev_driver { + TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */ + struct rte_driver driver; /**< Inherited general driver. */ + rte_vdev_probe_t *probe; /**< Virtual device probe function. */ + rte_vdev_remove_t *remove; /**< Virtual device remove function. */ +}; + +/** + * Register a virtual device driver. + * + * @param driver + * A pointer to a rte_vdev_driver structure describing the driver + * to be registered. + */ +void rte_eal_vdrv_register(struct rte_vdev_driver *driver); + +/** + * Unregister a virtual device driver. + * + * @param driver + * A pointer to a rte_vdev_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver); + +#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ +RTE_INIT(vdrvinitfn_ ##vdrv);\ +static const char *vdrvinit_ ## nm ## _alias;\ +static void vdrvinitfn_ ##vdrv(void)\ +{\ + (vdrv).driver.name = RTE_STR(nm);\ + (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ + rte_eal_vdrv_register(&vdrv);\ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +#define RTE_PMD_REGISTER_ALIAS(nm, alias)\ +static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk/lib/librte_eal/common/include/rte_version.h b/src/dpdk/lib/librte_eal/common/include/rte_version.h index 615deb7f..76bfe601 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_version.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_version.h @@ -45,6 +45,7 @@ extern "C" { #include <stdint.h> #include <string.h> +#include <stdio.h> #include <rte_common.h> /** @@ -55,12 +56,12 @@ extern "C" { /** * Major version/year number i.e. the yy in yy.mm.z */ -#define RTE_VER_YEAR 16 +#define RTE_VER_YEAR 17 /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 7 +#define RTE_VER_MONTH 2 /** * Patch level number i.e. the z in yy.mm.z @@ -70,14 +71,14 @@ extern "C" { /** * Extra string to be appended to version number */ -#define RTE_VER_SUFFIX "" +#define RTE_VER_SUFFIX "-rc" /** * Patch release number * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 16 +#define RTE_VER_RELEASE 2 /** * Macro to compute a version number usable for comparisons diff --git a/src/dpdk/lib/librte_eal/common/malloc_heap.c b/src/dpdk/lib/librte_eal/common/malloc_heap.c index 763fa324..267a4c6c 100644 --- a/src/dpdk/lib/librte_eal/common/malloc_heap.c +++ b/src/dpdk/lib/librte_eal/common/malloc_heap.c @@ -221,14 +221,6 @@ rte_eal_malloc_heap_init(void) for (ms = &mcfg->memseg[0], ms_cnt = 0; (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); ms_cnt++, ms++) { -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * it is not memory to allocate from. - */ - if (ms->ioremap_addr != 0) - continue; -#endif malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); } diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c index 3fb2188f..bf6b818c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c @@ -69,7 +69,9 @@ #include <rte_string_fns.h> #include <rte_cpuflags.h> #include <rte_interrupts.h> +#include <rte_bus.h> #include <rte_pci.h> +#include <rte_dev.h> #include <rte_devargs.h> #include <rte_common.h> #include <rte_version.h> @@ -238,7 +240,8 @@ rte_eal_config_attach(void) mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), PROT_READ, MAP_SHARED, mem_cfg_fd, 0); if (mem_config == MAP_FAILED) - rte_panic("Cannot mmap memory for rte_config\n"); + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); rte_config.mem_config = mem_config; } @@ -263,9 +266,17 @@ rte_eal_config_reattach(void) mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { + if (mem_config != MAP_FAILED) + /* errno is stale, don't use */ + rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]" + " - please use '--base-virtaddr' option\n", + rte_mem_cfg_addr, mem_config); + else + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); + } close(mem_cfg_fd); - if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) - rte_panic("Cannot mmap memory for rte_config\n"); rte_config.mem_config = mem_config; } @@ -740,6 +751,9 @@ rte_eal_init(int argc, char **argv) char cpuset[RTE_CPU_AFFINITY_STR_LEN]; char thread_name[RTE_MAX_THREAD_NAME_LEN]; + /* checks if the machine is adequate */ + rte_cpu_check_supported(); + if (!rte_atomic32_test_and_set(&run_once)) return -1; @@ -748,9 +762,6 @@ rte_eal_init(int argc, char **argv) thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - eal_log_level_parse(argc, argv); /* set log level as early as possible */ @@ -789,6 +800,9 @@ rte_eal_init(int argc, char **argv) rte_config_init(); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n"); + if (rte_eal_pci_init() < 0) rte_panic("Cannot init PCI\n"); @@ -797,11 +811,6 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot init VFIO\n"); #endif -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_init() < 0) - rte_panic("Cannot init IVSHMEM\n"); -#endif - if (rte_eal_memory_init() < 0) rte_panic("Cannot init memory\n"); @@ -814,14 +823,6 @@ rte_eal_init(int argc, char **argv) if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_obj_init() < 0) - rte_panic("Cannot init IVSHMEM objects\n"); -#endif - - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - if (rte_eal_alarm_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); @@ -841,12 +842,12 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); - if (rte_eal_intr_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_bus_scan()) + rte_panic("Cannot scan the buses for devices\n"); + RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -883,10 +884,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) + rte_panic("Cannot probe devices\n"); + /* Probe & Initialize PCI devices */ if (rte_eal_pci_probe()) rte_panic("Cannot probe PCI\n"); + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + rte_eal_mcfg_complete(); return fctret; diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 54ab6253..b5b3f2bd 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -73,9 +73,6 @@ static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ -// TREX_PATCH -int eal_err_read_from_file_is_error = 1; - /** * union for pipe fds. */ @@ -139,7 +136,7 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(struct rte_intr_handle *intr_handle) { +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -186,7 +183,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(struct rte_intr_handle *intr_handle) { +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -229,7 +226,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { /* enable MSI interrupts */ static int -vfio_enable_msi(struct rte_intr_handle *intr_handle) { +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -258,7 +255,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) { /* disable MSI interrupts */ static int -vfio_disable_msi(struct rte_intr_handle *intr_handle) { +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -281,9 +278,30 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) { return ret; } +static int +get_max_intr(const struct rte_intr_handle *intr_handle) +{ + struct rte_intr_source *src; + + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd != intr_handle->fd) + continue; + + if (!src->intr_handle.max_intr) + src->intr_handle.max_intr = 1; + else if (src->intr_handle.max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + src->intr_handle.max_intr + = RTE_MAX_RXTX_INTR_VEC_ID + 1; + + return src->intr_handle.max_intr; + } + + return -1; +} + /* enable MSI-X interrupts */ static int -vfio_enable_msix(struct rte_intr_handle *intr_handle) { +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -293,12 +311,15 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; - if (!intr_handle->max_intr) - intr_handle->max_intr = 1; - else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) - intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; - irq_set->count = intr_handle->max_intr; + ret = get_max_intr(intr_handle); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Invalid number of MSI-X irqs for fd %d\n", + intr_handle->fd); + return -1; + } + + irq_set->count = ret; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; @@ -321,7 +342,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { /* disable MSI-X interrupts */ static int -vfio_disable_msix(struct rte_intr_handle *intr_handle) { +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; int len, ret; @@ -346,7 +367,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) { #endif static int -uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -370,7 +391,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -394,7 +415,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle) } static int -uio_intr_disable(struct rte_intr_handle *intr_handle) +uio_intr_disable(const struct rte_intr_handle *intr_handle) { const int value = 0; @@ -408,7 +429,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intr_enable(struct rte_intr_handle *intr_handle) +uio_intr_enable(const struct rte_intr_handle *intr_handle) { const int value = 1; @@ -422,7 +443,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg) { int ret, wake_thread; @@ -494,7 +515,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle, } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb_fn, void *cb_arg) { int ret; @@ -558,7 +579,7 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, } int -rte_intr_enable(struct rte_intr_handle *intr_handle) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -602,7 +623,7 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_disable(struct rte_intr_handle *intr_handle) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -712,19 +733,10 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) if (errno == EINTR || errno == EWOULDBLOCK) continue; - // TREX_PATCH. Because of issues with e1000, we want this message to - // have lower priority only if running on e1000 card - if (eal_err_read_from_file_is_error) { - RTE_LOG(ERR, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, - strerror(errno)); - } else { - RTE_LOG(INFO, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, - strerror(errno)); - } + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + events[n].data.fd, + strerror(errno)); } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); @@ -1169,7 +1181,7 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) RTE_LOG(ERR, EAL, "can't setup eventfd, error %i (%s)\n", errno, strerror(errno)); - return -1; + return -errno; } intr_handle->efds[i] = fd; } diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c deleted file mode 100644 index 67b3caf2..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ /dev/null @@ -1,954 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ - -#include <stdint.h> -#include <unistd.h> -#include <inttypes.h> -#include <sys/mman.h> -#include <sys/file.h> -#include <string.h> -#include <sys/queue.h> - -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_memory.h> -#include <rte_eal.h> -#include <rte_eal_memconfig.h> -#include <rte_string_fns.h> -#include <rte_errno.h> -#include <rte_ring.h> -#include <rte_malloc.h> -#include <rte_common.h> -#include <rte_ivshmem.h> - -#include "eal_internal_cfg.h" -#include "eal_private.h" - -#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 -#define PCI_DEVICE_ID_IVSHMEM 0x1110 - -#define IVSHMEM_MAGIC 0x0BADC0DE - -#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" -#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" - -#define PHYS 0x1 -#define VIRT 0x2 -#define IOREMAP 0x4 -#define FULL (PHYS|VIRT|IOREMAP) - -#define METADATA_SIZE_ALIGNED \ - (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) - -#define CONTAINS(x,y)\ - (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) - -#define DIM(x) (sizeof(x)/sizeof(x[0])) - -struct ivshmem_pci_device { - char path[PATH_MAX]; - phys_addr_t ioremap_addr; -}; - -/* data type to store in config */ -struct ivshmem_segment { - struct rte_ivshmem_metadata_entry entry; - uint64_t align; - char path[PATH_MAX]; -}; -struct ivshmem_shared_config { - struct ivshmem_segment segment[RTE_MAX_MEMSEG]; - uint32_t segment_idx; - struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; - uint32_t pci_devs_idx; -}; -static struct ivshmem_shared_config * ivshmem_config; -static int memseg_idx; -static int pagesz; - -/* Tailq heads to add rings to */ -TAILQ_HEAD(rte_ring_list, rte_tailq_entry); - -/* - * Utility functions - */ - -static int -is_ivshmem_device(struct rte_pci_device * dev) -{ - return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM - && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM; -} - -static void * -map_metadata(int fd, uint64_t len) -{ - size_t metadata_len = sizeof(struct rte_ivshmem_metadata); - size_t aligned_len = METADATA_SIZE_ALIGNED; - - return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, len - aligned_len); -} - -static void -unmap_metadata(void * ptr) -{ - munmap(ptr, sizeof(struct rte_ivshmem_metadata)); -} - -static int -has_ivshmem_metadata(int fd, uint64_t len) -{ - struct rte_ivshmem_metadata metadata; - void * ptr; - - ptr = map_metadata(fd, len); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - return metadata.magic_number == IVSHMEM_MAGIC; -} - -static void -remove_segment(struct ivshmem_segment * ms, int len, int idx) -{ - int i; - - for (i = idx; i < len - 1; i++) - memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); - memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); -} - -static int -overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check for overlap in virtual addresses */ - if (start1 >= start2 && start1 < end2) - result |= VIRT; - if (start2 >= start1 && start2 < end1) - result |= VIRT; - - /* check for overlap in physical addresses */ - if (p_start1 >= p_start2 && p_start1 < p_end2) - result |= PHYS; - if (p_start2 >= p_start1 && p_start2 < p_end1) - result |= PHYS; - - /* check for overlap in ioremap addresses */ - if (i_start1 >= i_start2 && i_start1 < i_end2) - result |= IOREMAP; - if (i_start2 >= i_start1 && i_start2 < i_end1) - result |= IOREMAP; - - return result; -} - -static int -adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check if segments are virtually adjacent */ - if (start1 == end2) - result |= VIRT; - if (start2 == end1) - result |= VIRT; - - /* check if segments are physically adjacent */ - if (p_start1 == p_end2) - result |= PHYS; - if (p_start2 == p_end1) - result |= PHYS; - - /* check if segments are ioremap-adjacent */ - if (i_start1 == i_end2) - result |= IOREMAP; - if (i_start2 == i_end1) - result |= IOREMAP; - - return result; -} - -static int -has_adjacent_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) { - /* we're only interested in fully adjacent segments; partially - * adjacent segments can coexist. - */ - if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL) - return 1; - } - return 0; -} - -static int -has_overlapping_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) - if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) - return 1; - return 0; -} - -static int -seg_compare(const void * a, const void * b) -{ - const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; - const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; - - /* move unallocated zones to the end */ - if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) - return 0; - if (s1->entry.mz.addr == 0) - return 1; - if (s2->entry.mz.addr == 0) - return -1; - - return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; -} - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG -static void -entry_dump(struct rte_ivshmem_metadata_entry *e) -{ - RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, - RTE_PTR_ADD(e->mz.addr, e->mz.len)); - RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.phys_addr, - e->mz.phys_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.ioremap_addr, - e->mz.ioremap_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); - RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); -} -#endif - - - -/* - * Actual useful code - */ - -/* read through metadata mapped from the IVSHMEM device */ -static int -read_metadata(char * path, int path_len, int fd, uint64_t flen) -{ - struct rte_ivshmem_metadata metadata; - struct rte_ivshmem_metadata_entry * entry; - int idx, i; - void * ptr; - - ptr = map_metadata(fd, flen); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); - - idx = ivshmem_config->segment_idx; - - for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && - idx <= RTE_MAX_MEMSEG; i++) { - - if (idx == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); - return -1; - } - - entry = &metadata.entry[i]; - - /* stop on uninitialized memzone */ - if (entry->mz.len == 0) - break; - - /* copy metadata entry */ - memcpy(&ivshmem_config->segment[idx].entry, entry, - sizeof(struct rte_ivshmem_metadata_entry)); - - /* copy path */ - snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); - - idx++; - } - ivshmem_config->segment_idx = idx; - - return 0; -} - -/* check through each segment and look for adjacent or overlapping ones. */ -static int -cleanup_segments(struct ivshmem_segment * ms, int tbl_len) -{ - struct ivshmem_segment * s, * tmp; - int i, j, concat, seg_adjacent, seg_overlapping; - uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; - - qsort(ms, tbl_len, sizeof(struct ivshmem_segment), - seg_compare); - - while (has_overlapping_segments(ms, tbl_len) || - has_adjacent_segments(ms, tbl_len)) { - - for (i = 0; i < tbl_len; i++) { - s = &ms[i]; - - concat = 0; - - for (j = i + 1; j < tbl_len; j++) { - tmp = &ms[j]; - - /* check if this segment is overlapping with existing segment, - * or is adjacent to existing segment */ - seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); - seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); - - /* check if segments fully overlap or are fully adjacent */ - if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - - start1 = s->entry.mz.addr_64; - start2 = tmp->entry.mz.addr_64; - p_start1 = s->entry.mz.phys_addr; - p_start2 = tmp->entry.mz.phys_addr; - i_start1 = s->entry.mz.ioremap_addr; - i_start2 = tmp->entry.mz.ioremap_addr; - end1 = s->entry.mz.addr_64 + s->entry.mz.len; - end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; - - /* settle for minimum start address and maximum length */ - s->entry.mz.addr_64 = RTE_MIN(start1, start2); - s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); - s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); - s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); - s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; - concat = 1; - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); - entry_dump(&s->entry); - -#endif - } - /* if segments not fully overlap, we have an error condition. - * adjacent segments can coexist. - */ - else if (seg_overlapping > 0) { - RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - return -1; - } - if (concat) - break; - } - /* if we concatenated, remove segment at j */ - if (concat) { - remove_segment(ms, tbl_len, j); - tbl_len--; - break; - } - } - } - - return tbl_len; -} - -static int -create_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_CREAT | O_RDWR, 0600); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the file is locked, we have a problem */ - if (flock(fd, LOCK_EX | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); - close(fd); - return -1; - } - - if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { - RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); - return -1; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); - - /* change the exclusive lock we got earlier to a shared lock */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* open shared config file and, if present, map the config. - * having no config file is not an error condition, as we later check if - * ivshmem_config is NULL (if it is, that means nothing was mapped). */ -static int -open_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_RDONLY); - - /* if the file doesn't exist, just return success */ - if (fd < 0 && errno == ENOENT) - return 0; - /* else we have an error condition */ - else if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", - path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the lock *does* succeed, this means it's a - * stray config file, so it should be deleted. - */ - if (flock(fd, LOCK_EX | LOCK_NB) != -1) { - - /* if we can't remove the file, something is wrong */ - if (unlink(path) < 0) { - RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, - strerror(errno)); - return -1; - } - - /* release the lock */ - flock(fd, LOCK_UN); - close(fd); - - /* return success as having a stray config file is equivalent to not - * having config file at all. - */ - return 0; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - /* place a shared lock on config file */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* - * This function does the following: - * - * 1) Builds a table of ivshmem_segments with proper offset alignment - * 2) Cleans up that table so that we don't have any overlapping or adjacent - * memory segments - * 3) Creates memsegs from this table and maps them into memory. - */ -static inline int -map_all_segments(void) -{ - struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; - struct ivshmem_pci_device * pci_dev; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - int fd, fd_zero; - unsigned i, j; - struct rte_memzone mz; - struct rte_memseg ms; - void * base_addr; - uint64_t align, len; - phys_addr_t ioremap_addr; - - ioremap_addr = 0; - - memset(ms_tbl, 0, sizeof(ms_tbl)); - memset(&mz, 0, sizeof(struct rte_memzone)); - memset(&ms, 0, sizeof(struct rte_memseg)); - - /* first, build a table of memsegs to map, to avoid failed mmaps due to - * overlaps - */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { - if (i == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Too many segments requested!\n"); - return -1; - } - - seg = &ivshmem_config->segment[i]; - - /* copy segment to table */ - memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); - - /* find ioremap addr */ - for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { - pci_dev = &ivshmem_config->pci_devs[j]; - if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { - ioremap_addr = pci_dev->ioremap_addr; - break; - } - } - if (ioremap_addr == 0) { - RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); - return -1; - } - - /* work out alignments */ - align = seg->entry.mz.addr_64 - - RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); - len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); - - /* save original alignments */ - ms_tbl[i].align = align; - - /* create a memory zone */ - mz.addr_64 = seg->entry.mz.addr_64 - align; - mz.len = len; - mz.hugepage_sz = seg->entry.mz.hugepage_sz; - mz.phys_addr = seg->entry.mz.phys_addr - align; - - /* find true physical address */ - mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; - - ms_tbl[i].entry.offset = seg->entry.offset - align; - - memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); - } - - /* clean up the segments */ - memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); - - if (memseg_idx < 0) - return -1; - - mcfg = rte_eal_get_configuration()->mem_config; - - fd_zero = open("/dev/zero", O_RDWR); - - if (fd_zero < 0) { - RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); - return -1; - } - - /* create memsegs and put them into DPDK memory */ - for (i = 0; i < (unsigned) memseg_idx; i++) { - - seg = &ms_tbl[i]; - - ms.addr_64 = seg->entry.mz.addr_64; - ms.hugepage_sz = seg->entry.mz.hugepage_sz; - ms.len = seg->entry.mz.len; - ms.nchannel = rte_memory_get_nchannel(); - ms.nrank = rte_memory_get_nrank(); - ms.phys_addr = seg->entry.mz.phys_addr; - ms.ioremap_addr = seg->entry.mz.ioremap_addr; - ms.socket_id = seg->entry.mz.socket_id; - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); - return -1; - } - - fd = open(seg->path, O_RDWR); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, - strerror(errno)); - return -1; - } - - munmap(ms.addr, ms.len); - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, - seg->entry.offset); - - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map segment into memory: " - "expected %p got %p (%s)\n", ms.addr, base_addr, - strerror(errno)); - return -1; - } - - RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " - "offset 0x%" PRIx64 "\n", - ms.addr, ms.len, seg->entry.offset); - - /* put the pointers back into their real positions using original - * alignment */ - ms.addr_64 += seg->align; - ms.phys_addr += seg->align; - ms.ioremap_addr += seg->align; - ms.len -= seg->align; - - /* at this point, the rest of DPDK memory is not initialized, so we - * expect memsegs to be empty */ - memcpy(&mcfg->memseg[i], &ms, - sizeof(struct rte_memseg)); - - close(fd); - - RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", - ms.len); - } - - return 0; -} - -/* this happens at a later stage, after general EAL memory initialization */ -int -rte_eal_ivshmem_obj_init(void) -{ - struct rte_ring_list* ring_list = NULL; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - struct rte_memzone * mz; - struct rte_ring * r; - struct rte_tailq_entry *te; - unsigned i, ms, idx; - uint64_t offset; - - /* secondary process would not need any object discovery - it'll all - * already be in shared config */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) - return 0; - - /* check that we have an initialised ring tail queue */ - ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); - if (ring_list == NULL) { - RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); - return -1; - } - - mcfg = rte_eal_get_configuration()->mem_config; - - /* create memzones */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { - - seg = &ivshmem_config->segment[i]; - - /* add memzone */ - if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { - RTE_LOG(ERR, EAL, "No more memory zones available!\n"); - return -1; - } - - idx = mcfg->memzone_cnt; - - RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", - seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); - - memcpy(&mcfg->memzone[idx], &seg->entry.mz, - sizeof(struct rte_memzone)); - - /* find ioremap address */ - for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { - if (ms == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); - return -1; - } - if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { - offset = mcfg->memzone[idx].addr_64 - - mcfg->memseg[ms].addr_64; - mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + - offset; - break; - } - } - - mcfg->memzone_cnt++; - } - - rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); - - /* find rings */ - for (i = 0; i < mcfg->memzone_cnt; i++) { - mz = &mcfg->memzone[i]; - - /* check if memzone has a ring prefix */ - if (strncmp(mz->name, RTE_RING_MZ_PREFIX, - sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) - continue; - - r = (struct rte_ring*) (mz->addr_64); - - te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); - if (te == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); - return -1; - } - - te->data = (void *) r; - - TAILQ_INSERT_TAIL(ring_list, te, next); - - RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); - } - rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - rte_memzone_dump(stdout); - rte_ring_list_dump(stdout); -#endif - - return 0; -} - -/* initialize ivshmem structures */ -int rte_eal_ivshmem_init(void) -{ - struct rte_pci_device * dev; - struct rte_pci_resource * res; - int fd, ret; - char path[PATH_MAX]; - - /* initialize everything to 0 */ - memset(path, 0, sizeof(path)); - ivshmem_config = NULL; - - pagesz = getpagesize(); - - RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); - - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - - if (open_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); - return -1; - } - } - else { - - TAILQ_FOREACH(dev, &pci_device_list, next) { - - if (is_ivshmem_device(dev)) { - - /* IVSHMEM memory is always on BAR2 */ - res = &dev->mem_resource[2]; - - /* if we don't have a BAR2 */ - if (res->len == 0) - continue; - - /* construct pci device path */ - snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, - dev->addr.domain, dev->addr.bus, dev->addr.devid, - dev->addr.function); - - /* try to find memseg */ - fd = open(path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", path); - return -1; - } - - /* check if it's a DPDK IVSHMEM device */ - ret = has_ivshmem_metadata(fd, res->len); - - /* is DPDK device */ - if (ret == 1) { - - /* config file creation is deferred until the first - * DPDK device is found. then, it has to be created - * only once. */ - if (ivshmem_config == NULL && - create_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); - close(fd); - return -1; - } - - if (read_metadata(path, sizeof(path), fd, res->len) < 0) { - RTE_LOG(ERR, EAL, "Could not read metadata from" - " device %02x:%02x.%x!\n", dev->addr.bus, - dev->addr.devid, dev->addr.function); - close(fd); - return -1; - } - - if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { - RTE_LOG(WARNING, EAL, - "IVSHMEM PCI device limit exceeded. Increase " - "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " - "your config file.\n"); - break; - } - - RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", - dev->addr.bus, dev->addr.devid, dev->addr.function); - - ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; - snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, - sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), - "%s", path); - - ivshmem_config->pci_devs_idx++; - } - /* failed to read */ - else if (ret < 0) { - RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", - strerror(errno)); - close(fd); - return -1; - } - /* not a DPDK device */ - else - RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); - - /* close the BAR fd */ - close(fd); - } - } - } - - /* ivshmem_config is not NULL only if config was created and/or mapped */ - if (ivshmem_config) { - if (map_all_segments() < 0) { - RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); - return -1; - } - } - else { - RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); - } - - return 0; -} - -#endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c index d3911004..e3a50aa3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c @@ -97,45 +97,7 @@ rte_eal_log_init(const char *id, int facility) openlog(id, LOG_NDELAY | LOG_PID, facility); - if (rte_eal_common_log_init(log_stream) < 0) - return -1; - - return 0; -} - -/* early logs */ - -/* - * early log function, used before rte_eal_log_init - */ -static ssize_t -early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) -{ - ssize_t ret; - ret = fwrite(buf, size, 1, stdout); - fflush(stdout); - if (ret == 0) - return -1; - return ret; -} - -static cookie_io_functions_t early_log_func = { - .write = early_log_write, -}; -static FILE *early_log_stream; + eal_log_set_default(log_stream); -/* - * init the log library, called by rte_eal_init() to enable early - * logs - */ -int -rte_eal_log_early_init(void) -{ - early_log_stream = fopencookie(NULL, "w+", early_log_func); - if (early_log_stream == NULL) { - printf("Cannot configure early_log_stream\n"); - return -1; - } - rte_openlog_stream(early_log_stream); return 0; } diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c index 41e0a928..a956bb22 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -376,25 +376,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, void *vma_addr = NULL; size_t vma_len = 0; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_SET_USED(vma_len); -#endif - for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, - sizeof(hugepg_tbl[i].filepath), hpi->hugedir, - hugepg_tbl[i].file_id); -#else eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); -#endif hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_64 @@ -408,8 +398,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, continue; } #endif - -#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS else if (vma_len == 0) { unsigned j, num_pages; @@ -439,10 +427,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, if (vma_addr == NULL) vma_len = hugepage_sz; } -#endif /* try to create hugepage file */ - fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); + fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600); if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); @@ -505,169 +492,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, return i; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - -/* - * Remaps all hugepages into single file segments - */ -static int -remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - int fd; - unsigned i = 0, j, num_pages, page_idx = 0; - void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; - size_t vma_len = 0; - size_t hugepage_sz = hpi->hugepage_sz; - size_t total_size, offset; - char filepath[MAX_HUGEPAGE_PATH]; - phys_addr_t physaddr; - int socket; - - while (i < hpi->num_pages[0]) { - -#ifndef RTE_ARCH_64 - /* for 32-bit systems, don't remap 1G pages and 16G pages, - * just reuse original map address as final map address. - */ - if ((hugepage_sz == RTE_PGSIZE_1G) - || (hugepage_sz == RTE_PGSIZE_16G)) { - hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; - hugepg_tbl[i].orig_va = NULL; - i++; - continue; - } -#endif - - /* reserve a virtual area for next contiguous - * physical block: count the number of - * contiguous physical pages. */ - for (j = i+1; j < hpi->num_pages[0] ; j++) { -#ifdef RTE_ARCH_PPC_64 - /* The physical addresses are sorted in descending - * order on PPC64 */ - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr - hugepage_sz) - break; -#else - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr + hugepage_sz) - break; -#endif - } - num_pages = j - i; - vma_len = num_pages * hugepage_sz; - - socket = hugepg_tbl[i].socket_id; - - /* get the biggest virtual memory area up to - * vma_len. If it fails, vma_addr is NULL, so - * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); - - /* If we can't find a big enough virtual area, work out how many pages - * we are going to get */ - if (vma_addr == NULL) - j = i + 1; - else if (vma_len != num_pages * hugepage_sz) { - num_pages = vma_len / hugepage_sz; - j = i + num_pages; - - } - - hugepg_tbl[page_idx].file_id = page_idx; - eal_get_hugefile_path(filepath, - sizeof(filepath), - hpi->hugedir, - hugepg_tbl[page_idx].file_id); - - /* try to create hugepage file */ - fd = open(filepath, O_CREAT | O_RDWR, 0755); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return -1; - } - - total_size = 0; - for (;i < j; i++) { - - /* unmap current segment */ - if (total_size > 0) - munmap(vma_addr, total_size); - - /* unmap original page */ - munmap(hugepg_tbl[i].orig_va, hugepage_sz); - unlink(hugepg_tbl[i].filepath); - - total_size += hugepage_sz; - - old_addr = vma_addr; - - /* map new, bigger segment, and populate page tables, - * the kernel fills this segment with zeros */ - vma_addr = mmap(vma_addr, total_size, - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); - - if (vma_addr == MAP_FAILED || vma_addr != old_addr) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); - close(fd); - return -1; - } - } - - /* set shared flock on the file. */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", - __func__, strerror(errno)); - close(fd); - return -1; - } - - snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", - filepath); - - physaddr = rte_mem_virt2phy(vma_addr); - - if (physaddr == RTE_BAD_PHYS_ADDR) - return -1; - - hugepg_tbl[page_idx].final_va = vma_addr; - - hugepg_tbl[page_idx].physaddr = physaddr; - - hugepg_tbl[page_idx].repeated = num_pages; - - hugepg_tbl[page_idx].socket_id = socket; - - close(fd); - - /* verify the memory segment - that is, check that every VA corresponds - * to the physical address we expect to see - */ - for (offset = 0; offset < vma_len; offset += hugepage_sz) { - uint64_t expected_physaddr; - - expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; - page_addr = RTE_PTR_ADD(vma_addr, offset); - physaddr = rte_mem_virt2phy(page_addr); - - if (physaddr != expected_physaddr) { - RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " - "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 - " (expected 0x%" PRIx64 ")\n", - page_addr, offset, physaddr, expected_physaddr); - return -1; - } - } - - page_idx++; - } - - /* zero out the rest */ - memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); - return page_idx; -} -#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ - /* Unmap all hugepages from original mapping */ static int unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) @@ -681,7 +505,6 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info } return 0; } -#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ /* * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge @@ -875,12 +698,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, for (page = 0; page < nrpages; page++) { struct hugepage_file *hp = &hugepg_tbl[page]; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* if this page was already cleared */ - if (hp->final_va == NULL) - continue; -#endif - /* find a page that matches the criteria */ if ((hp->size == hpi[size].hugepage_sz) && (hp->socket_id == (int) socket)) { @@ -889,11 +706,7 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, if (pages_found == hpi[size].num_pages[socket]) { uint64_t unmap_len; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - unmap_len = hp->size * hp->repeated; -#else unmap_len = hp->size; -#endif /* get start addr and len of the remaining segment */ munmap(hp->final_va, (size_t) unmap_len); @@ -904,50 +717,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, __func__, hp->filepath, strerror(errno)); return -1; } - } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* else, check how much do we need to map */ - else { - int nr_pg_left = - hpi[size].num_pages[socket] - pages_found; - - /* if we need enough memory to fit into the segment */ - if (hp->repeated <= nr_pg_left) { - pages_found += hp->repeated; - } - /* truncate the segment */ - else { - uint64_t final_size = nr_pg_left * hp->size; - uint64_t seg_size = hp->repeated * hp->size; - - void * unmap_va = RTE_PTR_ADD(hp->final_va, - final_size); - int fd; - - munmap(unmap_va, seg_size - final_size); - - fd = open(hp->filepath, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - if (ftruncate(fd, final_size) < 0) { - RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - close(fd); - - pages_found += nr_pg_left; - hp->repeated = nr_pg_left; - } - } -#else - /* else, lock the page and skip */ - else + } else { + /* lock the page and skip */ pages_found++; -#endif + } } /* match page */ } /* foreach page */ @@ -1177,9 +950,6 @@ rte_eal_hugepage_init(void) int i, j, new_memseg; int nr_hugefiles, nr_hugepages = 0; void *addr; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int new_pages_count[MAX_HUGEPAGE_SIZES]; -#endif test_proc_pagemap_readable(); @@ -1260,13 +1030,6 @@ rte_eal_hugepage_init(void) pages_old = hpi->num_pages[0]; pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); if (pages_new < pages_old) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_LOG(ERR, EAL, - "%d not %d hugepages of size %u MB allocated\n", - pages_new, pages_old, - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; -#else RTE_LOG(DEBUG, EAL, "%d not %d hugepages of size %u MB allocated\n", pages_new, pages_old, @@ -1278,7 +1041,6 @@ rte_eal_hugepage_init(void) hpi->num_pages[0] = pages_new; if (pages_new == 0) continue; -#endif } /* find physical addresses and sockets for each hugepage */ @@ -1297,18 +1059,6 @@ rte_eal_hugepage_init(void) qsort(&tmp_hp[hp_offset], hpi->num_pages[0], sizeof(struct hugepage_file), cmp_physaddr); -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* remap all hugepages into single file segments */ - new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); - if (new_pages_count[i] < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* we have processed a num of hugepages of this size, so inc offset */ - hp_offset += new_pages_count[i]; -#else /* remap all hugepages */ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != hpi->num_pages[0]) { @@ -1323,7 +1073,6 @@ rte_eal_hugepage_init(void) /* we have processed a num of hugepages of this size, so inc offset */ hp_offset += hpi->num_pages[0]; -#endif } huge_recover_sigbus(); @@ -1331,14 +1080,7 @@ rte_eal_hugepage_init(void) if (internal_config.memory == 0 && internal_config.force_sockets == 0) internal_config.memory = eal_get_hugepage_mem_size(); -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - nr_hugefiles = 0; - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { - nr_hugefiles += new_pages_count[i]; - } -#else nr_hugefiles = nr_hugepages; -#endif /* clean out the numbers of pages */ @@ -1356,12 +1098,7 @@ rte_eal_hugepage_init(void) for (j = 0; j < nb_hpsizes; j++) { if (tmp_hp[i].size == internal_config.hugepage_info[j].hugepage_sz) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - internal_config.hugepage_info[j].num_pages[socket] += - tmp_hp[i].repeated; -#else internal_config.hugepage_info[j].num_pages[socket]++; -#endif } } } @@ -1436,15 +1173,8 @@ rte_eal_hugepage_init(void) free(tmp_hp); tmp_hp = NULL; - /* find earliest free memseg - this is needed because in case of IVSHMEM, - * segments might have already been initialized */ - for (j = 0; j < RTE_MAX_MEMSEG; j++) - if (mcfg->memseg[j].addr == NULL) { - /* move to previous segment and exit loop */ - j--; - break; - } - + /* first memseg index shall be 0 after incrementing it below */ + j = -1; for (i = 0; i < nr_hugefiles; i++) { new_memseg = 0; @@ -1482,11 +1212,7 @@ rte_eal_hugepage_init(void) mcfg->memseg[j].phys_addr = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; -#else mcfg->memseg[j].len = hugepage[i].size; -#endif mcfg->memseg[j].socket_id = hugepage[i].socket_id; mcfg->memseg[j].hugepage_sz = hugepage[i].size; } @@ -1552,7 +1278,8 @@ rte_eal_hugepage_attach(void) struct hugepage_file *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ - off_t size; + unsigned max_seg = RTE_MAX_MEMSEG; + off_t size = 0; int fd, fd_zero = -1, fd_hugepage = -1; if (aslr_enabled() > 0) { @@ -1597,15 +1324,6 @@ rte_eal_hugepage_attach(void) if (mcfg->memseg[s].len == 0) break; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) - continue; -#endif - /* * fdzero is mmapped to get a contiguous block of virtual * addresses of the appropriate memseg size. @@ -1615,10 +1333,21 @@ rte_eal_hugepage_attach(void) PROT_READ, MAP_PRIVATE, fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero to requested address [%p]: '%s'\n", - (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr, strerror(errno)); + max_seg = s; + if (base_addr != MAP_FAILED) { + /* errno is stale, don't use */ + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero at [%p], got [%p] - " + "please use '--base-virtaddr' option\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, base_addr); + munmap(base_addr, mcfg->memseg[s].len); + } else { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero at [%p]: '%s'\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, strerror(errno)); + } if (aslr_enabled() > 0) { RTE_LOG(ERR, EAL, "It is recommended to " "disable ASLR in the kernel " @@ -1644,16 +1373,6 @@ rte_eal_hugepage_attach(void) void *addr, *base_addr; uintptr_t offset = 0; size_t mapping_size; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) { - s++; - continue; - } -#endif /* * free previously mapped memory so we can map the * hugepages into the space @@ -1672,11 +1391,7 @@ rte_eal_hugepage_attach(void) hp[i].filepath); goto error; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mapping_size = hp[i].size * hp[i].repeated; -#else mapping_size = hp[i].size; -#endif addr = mmap(RTE_PTR_ADD(base_addr, offset), mapping_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); @@ -1701,11 +1416,8 @@ rte_eal_hugepage_attach(void) return 0; error: - s = 0; - while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) { - munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len); - s++; - } + for (i = 0; i < max_seg && mcfg->memseg[i].len > 0; i++) + munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len); if (hp != NULL && hp != MAP_FAILED) munmap(hp, size); if (fd_zero >= 0) diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c index cd9de7cc..e2fc219b 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -54,45 +54,6 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - static int pci_get_kernel_driver_by_path(const char *filename, char *dri_name) { @@ -267,8 +228,7 @@ error: /* Scan one pci sysfs entry, and fill the devices list from it. */ static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; @@ -281,10 +241,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, return -1; memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; + dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); @@ -350,13 +307,13 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dirname); if (access(filename, R_OK) != 0) { /* if no NUMA support, set default to 0 */ - dev->numa_node = 0; + dev->device.numa_node = 0; } else { if (eal_parse_sysfs_value(filename, &tmp) < 0) { free(dev); return -1; } - dev->numa_node = tmp; + dev->device.numa_node = tmp; } /* parse resources */ @@ -390,6 +347,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, /* device is valid, add in list (sorted) */ if (TAILQ_EMPTY(&pci_device_list)) { + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } else { struct rte_pci_device *dev2; @@ -402,6 +360,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, if (ret < 0) { TAILQ_INSERT_BEFORE(dev2, dev, next); + rte_eal_device_insert(&dev->device); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -411,18 +370,30 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } return 0; } + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } return 0; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, + pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + return pci_scan_one(filename, addr); +} + /* * split up a pci address into its constituent parts. */ static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) { /* first split on ':' */ union splitaddr { @@ -450,10 +421,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, /* now convert to int values */ errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; @@ -474,8 +445,7 @@ rte_eal_pci_scan(void) struct dirent *e; DIR *dir; char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; + struct rte_pci_addr addr; dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { @@ -488,13 +458,12 @@ rte_eal_pci_scan(void) if (e->d_name[0] == '.') continue; - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) continue; snprintf(dirname, sizeof(dirname), "%s/%s", pci_get_sysfs_path(), e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + if (pci_scan_one(dirname, &addr) < 0) goto error; } closedir(dir); @@ -743,9 +712,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) int rte_eal_pci_init(void) { - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 1786b754..3e4ffb57 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -133,7 +133,7 @@ pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); dev = makedev(major, minor); ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (f == NULL) { + if (ret != 0) { RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", __func__, strerror(errno)); return -1; diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index 3dacbff8..d459bf48 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -82,6 +82,7 @@ struct rte_epoll_event { /** Handle for interrupts. */ struct rte_intr_handle { + RTE_STD_C11 union { int vfio_dev_fd; /**< VFIO device file descriptor */ int uio_cfg_fd; /**< UIO config file descriptor diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 2acdfd9b..09713b0c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -61,6 +61,9 @@ #ifdef __KERNEL__ #include <linux/if.h> +#define RTE_STD_C11 +#else +#include <rte_common.h> #endif /** @@ -85,6 +88,7 @@ enum rte_kni_req_id { */ struct rte_kni_request { uint32_t req_id; /**< Request id */ + RTE_STD_C11 union { uint32_t new_mtu; /**< New MTU */ uint8_t if_up; /**< 1: interface up, 0: interface down */ @@ -102,7 +106,7 @@ struct rte_kni_fifo { volatile unsigned read; /**< Next position to be read */ unsigned len; /**< Circular buffer length */ unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ - void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ + void *volatile buffer[]; /**< The buffer contains mbuf pointers */ }; /* @@ -111,7 +115,8 @@ struct rte_kni_fifo { */ struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); - char pad0[10]; + uint64_t buf_physaddr; + char pad0[2]; uint16_t data_off; /**< Start address of data in segment buffer. */ char pad1[2]; uint8_t nb_segs; /**< Number of segments. */ @@ -159,6 +164,7 @@ struct rte_kni_device_info { uint16_t group_id; /**< Group ID */ uint32_t core_id; /**< core ID to bind for kernel thread */ + __extension__ uint8_t force_bind : 1; /**< Flag for kernel thread binding */ /* mbuf size */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h b/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h index 647ba3ce..78da08e5 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h @@ -19,13 +19,25 @@ #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -#define sk_sleep(s) (s)->sk_sleep +#define sk_sleep(s) ((s)->sk_sleep) +#else +#define HAVE_SOCKET_WQ +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) +#define HAVE_STATIC_SOCK_MAP_FD +#else +#define kni_sock_map_fd(s) sock_map_fd(s, 0) #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) #define HAVE_CHANGE_CARRIER_CB #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN) +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) #define HAVE_IOV_ITER_MSGHDR #endif @@ -35,6 +47,23 @@ #define HAVE_REBUILD_HEADER #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#define HAVE_SK_ALLOC_KERN_PARAM +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) #define HAVE_TRANS_START_HELPER #endif + +/* + * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev() + * For old kernels just backported the commit that enables the macro + * (685343fc3ba6) but still uses old API, it is required to undefine macro to + * select correct version of API, this is safe since KNI doesn't use the value. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) +#undef NET_NAME_UNKNOWN +#endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c index b8c9a13f..d558af20 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h index 1aec75ab..185ccdf1 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c index 6095d3b4..220c9a40 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h index b21294ec..55c8a5f4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h index 63b228c5..d42c7998 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h index 347cef71..35886e93 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c index 1e9f3e6e..7e4c20a9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h index 57b2eb56..b8fa70d0 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c index 4ee59ba9..74319def 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h index 6a1b0f52..3bcdd88c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c index a1700398..51dfae5d 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h index c94b2185..0627f271 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c index 3ef0d98b..bd64429f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h index bbf838c8..64685d9d 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c index 6188d007..1ce59154 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h index fe62785a..17bc53c3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h index d1cf98e2..c1ab60c4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c index 140a2a47..d8a77c45 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h index 5387c5e7..db24fb0b 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h index 0e083c54..830ec991 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h index e5554ca3..d077b49e 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c deleted file mode 100644 index c07f9f53..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c index af7e68a5..d7a987d5 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c deleted file mode 100644 index 07a1ae07..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c +++ /dev/null @@ -1,260 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "e1000_82575.h" -#include "e1000_hw.h" -#ifdef IGB_HWMON -#include <linux/module.h> -#include <linux/types.h> -#include <linux/sysfs.h> -#include <linux/kobject.h> -#include <linux/device.h> -#include <linux/netdevice.h> -#include <linux/hwmon.h> -#include <linux/pci.h> - -#ifdef HAVE_I2C_SUPPORT -static struct i2c_board_info i350_sensor_info = { - I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), -}; -#endif /* HAVE_I2C_SUPPORT */ - -/* hwmon callback functions */ -static ssize_t igb_hwmon_show_location(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - return sprintf(buf, "loc%u\n", - igb_attr->sensor->location); -} - -static ssize_t igb_hwmon_show_temp(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value; - - /* reset the temp field */ - igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw); - - value = igb_attr->sensor->temp; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value = igb_attr->sensor->caution_thresh; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value = igb_attr->sensor->max_op_thresh; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file. - * @ adapter: pointer to the adapter structure - * @ offset: offset in the eeprom sensor data table - * @ type: type of sensor data to display - * - * For each file we want in hwmon's sysfs interface we need a device_attribute - * This is included in our hwmon_attr struct that contains the references to - * the data structures we need to get the data to display. - */ -static int igb_add_hwmon_attr(struct igb_adapter *adapter, - unsigned int offset, int type) { - int rc; - unsigned int n_attr; - struct hwmon_attr *igb_attr; - - n_attr = adapter->igb_hwmon_buff.n_hwmon; - igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr]; - - switch (type) { - case IGB_HWMON_TYPE_LOC: - igb_attr->dev_attr.show = igb_hwmon_show_location; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_label", offset); - break; - case IGB_HWMON_TYPE_TEMP: - igb_attr->dev_attr.show = igb_hwmon_show_temp; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_input", offset); - break; - case IGB_HWMON_TYPE_CAUTION: - igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_max", offset); - break; - case IGB_HWMON_TYPE_MAX: - igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_crit", offset); - break; - default: - rc = -EPERM; - return rc; - } - - /* These always the same regardless of type */ - igb_attr->sensor = - &adapter->hw.mac.thermal_sensor_data.sensor[offset]; - igb_attr->hw = &adapter->hw; - igb_attr->dev_attr.store = NULL; - igb_attr->dev_attr.attr.mode = S_IRUGO; - igb_attr->dev_attr.attr.name = igb_attr->name; - sysfs_attr_init(&igb_attr->dev_attr.attr); - rc = device_create_file(&adapter->pdev->dev, - &igb_attr->dev_attr); - if (rc == 0) - ++adapter->igb_hwmon_buff.n_hwmon; - - return rc; -} - -static void igb_sysfs_del_adapter(struct igb_adapter *adapter) -{ - int i; - - if (adapter == NULL) - return; - - for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) { - device_remove_file(&adapter->pdev->dev, - &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr); - } - - kfree(adapter->igb_hwmon_buff.hwmon_list); - - if (adapter->igb_hwmon_buff.device) - hwmon_device_unregister(adapter->igb_hwmon_buff.device); -} - -/* called from igb_main.c */ -void igb_sysfs_exit(struct igb_adapter *adapter) -{ - igb_sysfs_del_adapter(adapter); -} - -/* called from igb_main.c */ -int igb_sysfs_init(struct igb_adapter *adapter) -{ - struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff; - unsigned int i; - int n_attrs; - int rc = 0; -#ifdef HAVE_I2C_SUPPORT - struct i2c_client *client = NULL; -#endif /* HAVE_I2C_SUPPORT */ - - /* If this method isn't defined we don't support thermals */ - if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) - goto exit; - - /* Don't create thermal hwmon interface if no sensors present */ - rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw)); - if (rc) - goto exit; -#ifdef HAVE_I2C_SUPPORT - /* init i2c_client */ - client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info); - if (client == NULL) { - dev_info(&adapter->pdev->dev, - "Failed to create new i2c device..\n"); - goto exit; - } - adapter->i2c_client = client; -#endif /* HAVE_I2C_SUPPORT */ - - /* Allocation space for max attributes - * max num sensors * values (loc, temp, max, caution) - */ - n_attrs = E1000_MAX_SENSORS * 4; - igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr), - GFP_KERNEL); - if (!igb_hwmon->hwmon_list) { - rc = -ENOMEM; - goto err; - } - - igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev); - if (IS_ERR(igb_hwmon->device)) { - rc = PTR_ERR(igb_hwmon->device); - goto err; - } - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - - /* Only create hwmon sysfs entries for sensors that have - * meaningful data. - */ - if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0) - continue; - - /* Bail if any hwmon attr struct fails to initialize */ - rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX); - if (rc) - goto err; - } - - goto exit; - -err: - igb_sysfs_del_adapter(adapter); -exit: - return rc; -} -#endif /* IGB_HWMON */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index 96acec58..f4dca5a3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> @@ -76,7 +76,7 @@ static const char igb_driver_string[] = static const char igb_copyright[] = "Copyright (c) 2007-2013 Intel Corporation."; -static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { +const struct pci_device_id igb_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, @@ -195,7 +195,11 @@ static void igb_process_mdd_event(struct igb_adapter *); #ifdef IFLA_VF_MAX static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, +#ifdef HAVE_VF_VLAN_PROTO + int vf, u16 vlan, u8 qos, __be16 vlan_proto); +#else int vf, u16 vlan, u8 qos); +#endif #ifdef HAVE_VF_SPOOFCHK_CONFIGURE static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); @@ -1558,6 +1562,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); connsw = E1000_READ_REG(hw, E1000_CONNSW); link = igb_has_link(adapter); + (void) link; /* need to live swap if current media is copper and we have fiber/serdes * to go to. @@ -6411,7 +6416,11 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) } static int igb_ndo_set_vf_vlan(struct net_device *netdev, +#ifdef HAVE_VF_VLAN_PROTO + int vf, u16 vlan, u8 qos, __be16 vlan_proto) +#else int vf, u16 vlan, u8 qos) +#endif { int err = 0; struct igb_adapter *adapter = netdev_priv(netdev); @@ -6419,6 +6428,12 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, /* VLAN IDs accepted range 0-4094 */ if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7)) return -EINVAL; + +#ifdef HAVE_VF_VLAN_PROTO + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; +#endif + if (vlan || qos) { err = igb_vlvf_set(adapter, vlan, !!vlan, vf); if (err) @@ -6579,7 +6594,12 @@ static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) if (adapter->vf_data[vf].pf_vlan) igb_ndo_set_vf_vlan(adapter->netdev, vf, adapter->vf_data[vf].pf_vlan, +#ifdef HAVE_VF_VLAN_PROTO + adapter->vf_data[vf].pf_qos, + htons(ETH_P_8021Q)); +#else adapter->vf_data[vf].pf_qos); +#endif else igb_clear_vf_vfta(adapter, vf); #endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c index f79ce7c1..c922ca2f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c deleted file mode 100644 index 66236d29..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c +++ /dev/null @@ -1,363 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "e1000_82575.h" -#include "e1000_hw.h" - -#ifdef IGB_PROCFS -#ifndef IGB_HWMON - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/proc_fs.h> -#include <linux/device.h> -#include <linux/netdevice.h> - -static struct proc_dir_entry *igb_top_dir = NULL; - - -bool igb_thermal_present(struct igb_adapter *adapter) -{ - s32 status; - struct e1000_hw *hw; - - if (adapter == NULL) - return false; - hw = &adapter->hw; - - /* - * Only set I2C bit-bang mode if an external thermal sensor is - * supported on this device. - */ - if (adapter->ets) { - status = e1000_set_i2c_bb(hw); - if (status != E1000_SUCCESS) - return false; - } - - status = hw->mac.ops.init_thermal_sensor_thresh(hw); - if (status != E1000_SUCCESS) - return false; - - return true; -} - - -static int igb_macburn(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct e1000_hw *hw; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", - (unsigned int)hw->mac.perm_addr[0], - (unsigned int)hw->mac.perm_addr[1], - (unsigned int)hw->mac.perm_addr[2], - (unsigned int)hw->mac.perm_addr[3], - (unsigned int)hw->mac.perm_addr[4], - (unsigned int)hw->mac.perm_addr[5]); -} - -static int igb_macadmn(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct e1000_hw *hw; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", - (unsigned int)hw->mac.addr[0], - (unsigned int)hw->mac.addr[1], - (unsigned int)hw->mac.addr[2], - (unsigned int)hw->mac.addr[3], - (unsigned int)hw->mac.addr[4], - (unsigned int)hw->mac.addr[5]); -} - -static int igb_numeports(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct e1000_hw *hw; - int ports; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - ports = 4; - - return snprintf(page, count, "%d\n", ports); -} - -static int igb_porttype(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - return snprintf(page, count, "%d\n", - test_bit(__IGB_DOWN, &adapter->state)); -} - -static int igb_therm_location(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", therm_data->sensor_data->location); -} - -static int igb_therm_maxopthresh(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", - therm_data->sensor_data->max_op_thresh); -} - -static int igb_therm_cautionthresh(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", - therm_data->sensor_data->caution_thresh); -} - -static int igb_therm_temp(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - s32 status; - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - status = e1000_get_thermal_sensor_data(therm_data->hw); - if (status != E1000_SUCCESS) - snprintf(page, count, "error: status %d returned\n", status); - - return snprintf(page, count, "%d\n", therm_data->sensor_data->temp); -} - -struct igb_proc_type{ - char name[32]; - int (*read)(char*, char**, off_t, int, int*, void*); -}; - -struct igb_proc_type igb_proc_entries[] = { - {"numeports", &igb_numeports}, - {"porttype", &igb_porttype}, - {"macburn", &igb_macburn}, - {"macadmn", &igb_macadmn}, - {"", NULL} -}; - -struct igb_proc_type igb_internal_entries[] = { - {"location", &igb_therm_location}, - {"temp", &igb_therm_temp}, - {"cautionthresh", &igb_therm_cautionthresh}, - {"maxopthresh", &igb_therm_maxopthresh}, - {"", NULL} -}; - -void igb_del_proc_entries(struct igb_adapter *adapter) -{ - int index, i; - char buf[16]; /* much larger than the sensor number will ever be */ - - if (igb_top_dir == NULL) - return; - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - if (adapter->therm_dir[i] == NULL) - continue; - - for (index = 0; ; index++) { - if (igb_internal_entries[index].read == NULL) - break; - - remove_proc_entry(igb_internal_entries[index].name, - adapter->therm_dir[i]); - } - snprintf(buf, sizeof(buf), "sensor_%d", i); - remove_proc_entry(buf, adapter->info_dir); - } - - if (adapter->info_dir != NULL) { - for (index = 0; ; index++) { - if (igb_proc_entries[index].read == NULL) - break; - remove_proc_entry(igb_proc_entries[index].name, - adapter->info_dir); - } - remove_proc_entry("info", adapter->eth_dir); - } - - if (adapter->eth_dir != NULL) - remove_proc_entry(pci_name(adapter->pdev), igb_top_dir); -} - -/* called from igb_main.c */ -void igb_procfs_exit(struct igb_adapter *adapter) -{ - igb_del_proc_entries(adapter); -} - -int igb_procfs_topdir_init(void) -{ - igb_top_dir = proc_mkdir("driver/igb", NULL); - if (igb_top_dir == NULL) - return -ENOMEM; - - return 0; -} - -void igb_procfs_topdir_exit(void) -{ - remove_proc_entry("driver/igb", NULL); -} - -/* called from igb_main.c */ -int igb_procfs_init(struct igb_adapter *adapter) -{ - int rc = 0; - int i; - int index; - char buf[16]; /* much larger than the sensor number will ever be */ - - adapter->eth_dir = NULL; - adapter->info_dir = NULL; - for (i = 0; i < E1000_MAX_SENSORS; i++) - adapter->therm_dir[i] = NULL; - - if ( igb_top_dir == NULL ) { - rc = -ENOMEM; - goto fail; - } - - adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir); - if (adapter->eth_dir == NULL) { - rc = -ENOMEM; - goto fail; - } - - adapter->info_dir = proc_mkdir("info", adapter->eth_dir); - if (adapter->info_dir == NULL) { - rc = -ENOMEM; - goto fail; - } - for (index = 0; ; index++) { - if (igb_proc_entries[index].read == NULL) { - break; - } - if (!(create_proc_read_entry(igb_proc_entries[index].name, - 0444, - adapter->info_dir, - igb_proc_entries[index].read, - adapter))) { - - rc = -ENOMEM; - goto fail; - } - } - if (igb_thermal_present(adapter) == false) - goto exit; - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - - if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0) - continue; - - snprintf(buf, sizeof(buf), "sensor_%d", i); - adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir); - if (adapter->therm_dir[i] == NULL) { - rc = -ENOMEM; - goto fail; - } - for (index = 0; ; index++) { - if (igb_internal_entries[index].read == NULL) - break; - /* - * therm_data struct contains pointer the read func - * will be needing - */ - adapter->therm_data[i].hw = &adapter->hw; - adapter->therm_data[i].sensor_data = - &adapter->hw.mac.thermal_sensor_data.sensor[i]; - - if (!(create_proc_read_entry( - igb_internal_entries[index].name, - 0444, - adapter->therm_dir[i], - igb_internal_entries[index].read, - &adapter->therm_data[i]))) { - rc = -ENOMEM; - goto fail; - } - } - } - goto exit; - -fail: - igb_del_proc_entries(adapter); -exit: - return rc; -} - -#endif /* !IGB_HWMON */ -#endif /* IGB_PROCFS */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c deleted file mode 100644 index 454b70ce..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c +++ /dev/null @@ -1,944 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/****************************************************************************** - Copyright(c) 2011 Richard Cochran <richardcochran@gmail.com> for some of the - 82576 and 82580 code -******************************************************************************/ - -#include "igb.h" - -#include <linux/module.h> -#include <linux/device.h> -#include <linux/pci.h> -#include <linux/ptp_classify.h> - -#define INCVALUE_MASK 0x7fffffff -#define ISGN 0x80000000 - -/* - * The 82580 timesync updates the system timer every 8ns by 8ns, - * and this update value cannot be reprogrammed. - * - * Neither the 82576 nor the 82580 offer registers wide enough to hold - * nanoseconds time values for very long. For the 82580, SYSTIM always - * counts nanoseconds, but the upper 24 bits are not available. The - * frequency is adjusted by changing the 32 bit fractional nanoseconds - * register, TIMINCA. - * - * For the 82576, the SYSTIM register time unit is affect by the - * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this - * field are needed to provide the nominal 16 nanosecond period, - * leaving 19 bits for fractional nanoseconds. - * - * We scale the NIC clock cycle by a large factor so that relatively - * small clock corrections can be added or subtracted at each clock - * tick. The drawbacks of a large factor are a) that the clock - * register overflows more quickly (not such a big deal) and b) that - * the increment per tick has to fit into 24 bits. As a result we - * need to use a shift of 19 so we can fit a value of 16 into the - * TIMINCA register. - * - * - * SYSTIMH SYSTIML - * +--------------+ +---+---+------+ - * 82576 | 32 | | 8 | 5 | 19 | - * +--------------+ +---+---+------+ - * \________ 45 bits _______/ fract - * - * +----------+---+ +--------------+ - * 82580 | 24 | 8 | | 32 | - * +----------+---+ +--------------+ - * reserved \______ 40 bits _____/ - * - * - * The 45 bit 82576 SYSTIM overflows every - * 2^45 * 10^-9 / 3600 = 9.77 hours. - * - * The 40 bit 82580 SYSTIM overflows every - * 2^40 * 10^-9 / 60 = 18.3 minutes. - */ - -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) -#define IGB_PTP_TX_TIMEOUT (HZ * 15) -#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) -#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) -#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) -#define IGB_NBITS_82580 40 - -/* - * SYSTIM read access for the 82576 - */ - -static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) -{ - struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); - struct e1000_hw *hw = &igb->hw; - u64 val; - u32 lo, hi; - - lo = E1000_READ_REG(hw, E1000_SYSTIML); - hi = E1000_READ_REG(hw, E1000_SYSTIMH); - - val = ((u64) hi) << 32; - val |= lo; - - return val; -} - -/* - * SYSTIM read access for the 82580 - */ - -static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) -{ - struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); - struct e1000_hw *hw = &igb->hw; - u64 val; - u32 lo, hi; - - /* The timestamp latches on lowest register read. For the 82580 - * the lowest register is SYSTIMR instead of SYSTIML. However we only - * need to provide nanosecond resolution, so we just ignore it. - */ - E1000_READ_REG(hw, E1000_SYSTIMR); - lo = E1000_READ_REG(hw, E1000_SYSTIML); - hi = E1000_READ_REG(hw, E1000_SYSTIMH); - - val = ((u64) hi) << 32; - val |= lo; - - return val; -} - -/* - * SYSTIM read access for I210/I211 - */ - -static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) -{ - struct e1000_hw *hw = &adapter->hw; - u32 sec, nsec; - - /* The timestamp latches on lowest register read. For I210/I211, the - * lowest register is SYSTIMR. Since we only need to provide nanosecond - * resolution, we can ignore it. - */ - E1000_READ_REG(hw, E1000_SYSTIMR); - nsec = E1000_READ_REG(hw, E1000_SYSTIML); - sec = E1000_READ_REG(hw, E1000_SYSTIMH); - - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - -static void igb_ptp_write_i210(struct igb_adapter *adapter, - const struct timespec *ts) -{ - struct e1000_hw *hw = &adapter->hw; - - /* - * Writing the SYSTIMR register is not necessary as it only provides - * sub-nanosecond resolution. - */ - E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec); - E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec); -} - -/** - * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @hwtstamps: timestamp structure to update - * @systim: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions. - * - * The 'tmreg_lock' spinlock is used to protect the consistency of the - * system time value. This is needed because reading the 64 bit time - * value involves reading two (or three) 32 bit registers. The first - * read latches the value. Ditto for writing. - * - * In addition, here have extended the system time with an overflow - * counter in software. - **/ -static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) -{ - unsigned long flags; - u64 ns; - - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - spin_lock_irqsave(&adapter->tmreg_lock, flags); - - ns = timecounter_cyc2time(&adapter->tc, systim); - - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ns); - break; - case e1000_i210: - case e1000_i211: - memset(hwtstamps, 0, sizeof(*hwtstamps)); - /* Upper 32 bits contain s, lower 32 bits contain ns. */ - hwtstamps->hwtstamp = ktime_set(systim >> 32, - systim & 0xFFFFFFFF); - break; - default: - break; - } -} - -/* - * PTP clock operations - */ - -static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 incvalue; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - rate = ppb; - rate <<= 14; - rate = div_u64(rate, 1953125); - - incvalue = 16 << IGB_82576_TSYNC_SHIFT; - - if (neg_adj) - incvalue -= rate; - else - incvalue += rate; - - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK)); - - return 0; -} - -static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 inca; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); - - /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x - * as quickly. Account for this by dividing the adjustment by 2.5. - */ - if (hw->mac.type == e1000_i354) { - u32 status = E1000_READ_REG(hw, E1000_STATUS); - - if ((status & E1000_STATUS_2P5_SKU) && - !(status & E1000_STATUS_2P5_SKU_OVER)) { - rate <<= 1; - rate = div_u64(rate, 5); - } - } - - inca = rate & INCVALUE_MASK; - if (neg_adj) - inca |= ISGN; - - E1000_WRITE_REG(hw, E1000_TIMINCA, inca); - - return 0; -} - -static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - s64 now; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - now = timecounter_read(&igb->tc); - now += delta; - timecounter_init(&igb->tc, &igb->cc, now); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - struct timespec now, then = ns_to_timespec(delta); - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_read_i210(igb, &now); - now = timespec_add(now, then); - igb_ptp_write_i210(igb, (const struct timespec *)&now); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, - struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - u64 ns; - u32 remainder; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - ns = timecounter_read(&igb->tc); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); - ts->tv_nsec = remainder; - - return 0; -} - -static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, - struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_read_i210(igb, ts); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, - const struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - u64 ns; - - ns = ts->tv_sec * 1000000000ULL; - ns += ts->tv_nsec; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - timecounter_init(&igb->tc, &igb->cc, ns); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, - const struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_write_i210(igb, ts); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) -{ - return -EOPNOTSUPP; -} - -/** - * igb_ptp_tx_work - * @work: pointer to work struct - * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. - */ -void igb_ptp_tx_work(struct work_struct *work) -{ - struct igb_adapter *adapter = container_of(work, struct igb_adapter, - ptp_tx_work); - struct e1000_hw *hw = &adapter->hw; - u32 tsynctxctl; - - if (!adapter->ptp_tx_skb) - return; - - if (time_is_before_jiffies(adapter->ptp_tx_start + - IGB_PTP_TX_TIMEOUT)) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - adapter->tx_hwtstamp_timeouts++; - dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang"); - return; - } - - tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL); - if (tsynctxctl & E1000_TSYNCTXCTL_VALID) - igb_ptp_tx_hwtstamp(adapter); - else - /* reschedule to check later */ - schedule_work(&adapter->ptp_tx_work); -} - -static void igb_ptp_overflow_check(struct work_struct *work) -{ - struct igb_adapter *igb = - container_of(work, struct igb_adapter, ptp_overflow_work.work); - struct timespec ts; - - igb->ptp_caps.gettime(&igb->ptp_caps, &ts); - - pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); - - schedule_delayed_work(&igb->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); -} - -/** - * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched - * @adapter: private network adapter structure - * - * This watchdog task is scheduled to detect error case where hardware has - * dropped an Rx packet that was timestamped when the ring is full. The - * particular error is rare but leaves the device in a state unable to timestamp - * any future packets. - */ -void igb_ptp_rx_hang(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct igb_ring *rx_ring; - u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL); - unsigned long rx_event; - int n; - - if (hw->mac.type != e1000_82576) - return; - - /* If we don't have a valid timestamp in the registers, just update the - * timeout counter and exit - */ - if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) { - adapter->last_rx_ptp_check = jiffies; - return; - } - - /* Determine the most recent watchdog or rx_timestamp event */ - rx_event = adapter->last_rx_ptp_check; - for (n = 0; n < adapter->num_rx_queues; n++) { - rx_ring = adapter->rx_ring[n]; - if (time_after(rx_ring->last_rx_timestamp, rx_event)) - rx_event = rx_ring->last_rx_timestamp; - } - - /* Only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5 * HZ)) { - E1000_READ_REG(hw, E1000_RXSTMPH); - adapter->last_rx_ptp_check = jiffies; - adapter->rx_hwtstamp_cleared++; - dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang"); - } -} - -/** - * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp - * @adapter: Board private structure. - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - regval = E1000_READ_REG(hw, E1000_TXSTMPL); - regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32; - - igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; -} - -/** - * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp - * @q_vector: Pointer to interrupt specific structure - * @va: Pointer to address containing Rx buffer - * @skb: Buffer containing timestamp and packet - * - * This function is meant to retrieve a timestamp from the first buffer of an - * incoming frame. The value is stored in little endian format starting on - * byte 8. - */ -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, - unsigned char *va, - struct sk_buff *skb) -{ - __le64 *regval = (__le64 *)va; - - /* - * The timestamp is recorded in little endian format. - * DWORD: 0 1 2 3 - * Field: Reserved Reserved SYSTIML SYSTIMH - */ - igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), - le64_to_cpu(regval[1])); -} - -/** - * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register - * @q_vector: Pointer to interrupt specific structure - * @skb: Buffer containing timestamp and packet - * - * This function is meant to retrieve a timestamp from the internal registers - * of the adapter and store it in the skb. - */ -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; - - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. - */ - if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; - - regval = E1000_READ_REG(hw, E1000_RXSTMPL); - regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32; - - igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); -} - -/** - * igb_ptp_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * 82576 cannot timestamp all packets, which it needs to do to - * support both V1 Sync and Delay_Req messages - */ - if (hw->mac.type != e1000_82576) { - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - } - /* fall through */ - default: - config.rx_filter = HWTSTAMP_FILTER_NONE; - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - is_l2 = true; - is_l4 = true; - - if ((hw->mac.type == e1000_i210) || - (hw->mac.type == e1000_i211)) { - regval = E1000_READ_REG(hw, E1000_RXPBS); - regval |= E1000_RXPBS_CFG_TS_EN; - E1000_WRITE_REG(hw, E1000_RXPBS, regval); - } - } - - /* enable/disable TX */ - regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - E1000_WRITE_REG(hw, E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - E1000_WRITE_REG(hw, E1000_ETQF(3), 0); - - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT)); - E1000_WRITE_REG(hw, E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf); - } else { - E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK); - } - E1000_WRITE_FLUSH(hw); - - /* clear TX/RX time stamp registers, just to be sure */ - regval = E1000_READ_REG(hw, E1000_TXSTMPL); - regval = E1000_READ_REG(hw, E1000_TXSTMPH); - regval = E1000_READ_REG(hw, E1000_RXSTMPL); - regval = E1000_READ_REG(hw, E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -void igb_ptp_init(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - - switch (hw->mac.type) { - case e1000_82576: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 999999881; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; - adapter->ptp_caps.gettime = igb_ptp_gettime_82576; - adapter->ptp_caps.settime = igb_ptp_settime_82576; - adapter->ptp_caps.enable = igb_ptp_enable; - adapter->cc.read = igb_ptp_read_82576; - adapter->cc.mask = CLOCKSOURCE_MASK(64); - adapter->cc.mult = 1; - adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | - INCVALUE_82576); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; - adapter->ptp_caps.gettime = igb_ptp_gettime_82576; - adapter->ptp_caps.settime = igb_ptp_settime_82576; - adapter->ptp_caps.enable = igb_ptp_enable; - adapter->cc.read = igb_ptp_read_82580; - adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); - adapter->cc.mult = 1; - adapter->cc.shift = 0; - /* Enable the timer functions by clearing bit 31. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - break; - case e1000_i210: - case e1000_i211: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; - adapter->ptp_caps.gettime = igb_ptp_gettime_i210; - adapter->ptp_caps.settime = igb_ptp_settime_i210; - adapter->ptp_caps.enable = igb_ptp_enable; - /* Enable the timer functions by clearing bit 31. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - break; - default: - adapter->ptp_clock = NULL; - return; - } - - E1000_WRITE_FLUSH(hw); - - spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - - /* Initialize the clock and overflow work for devices that need it. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec ts = ktime_to_timespec(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - - INIT_DELAYED_WORK(&adapter->ptp_overflow_work, - igb_ptp_overflow_check); - - schedule_delayed_work(&adapter->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); - } - - /* Initialize the time sync interrupts for devices that support it. */ - if (hw->mac.type >= e1000_82580) { - E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); - E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); - } - - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, - &adapter->pdev->dev); - if (IS_ERR(adapter->ptp_clock)) { - adapter->ptp_clock = NULL; - dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { - dev_info(&adapter->pdev->dev, "added PHC on %s\n", - adapter->netdev->name); - adapter->flags |= IGB_FLAG_PTP; - } -} - -/** - * igb_ptp_stop - Disable PTP device and stop the overflow check. - * @adapter: Board private structure. - * - * This function stops the PTP support and cancels the delayed work. - **/ -void igb_ptp_stop(struct igb_adapter *adapter) -{ - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - cancel_delayed_work_sync(&adapter->ptp_overflow_work); - break; - case e1000_i210: - case e1000_i211: - /* No delayed work to cancel. */ - break; - default: - return; - } - - cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - } - - if (adapter->ptp_clock) { - ptp_clock_unregister(adapter->ptp_clock); - dev_info(&adapter->pdev->dev, "removed PHC on %s\n", - adapter->netdev->name); - adapter->flags &= ~IGB_FLAG_PTP; - } -} - -/** - * igb_ptp_reset - Re-enable the adapter for PTP following a reset. - * @adapter: Board private structure. - * - * This function handles the reset work required to re-enable the PTP device. - **/ -void igb_ptp_reset(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - if (!(adapter->flags & IGB_FLAG_PTP)) - return; - - switch (adapter->hw.mac.type) { - case e1000_82576: - /* Dial the nominal frequency. */ - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | - INCVALUE_82576); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - /* Enable the timer functions and interrupts. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); - E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); - break; - default: - /* No work to do. */ - return; - } - - /* Re-initialize the timer. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec ts = ktime_to_timespec(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - } -} diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h index 18da64a3..9d49b45e 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c index 015c8952..205da562 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h index e51e7c4e..c6d4c568 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c deleted file mode 100644 index bde3a83c..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c +++ /dev/null @@ -1,1482 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "kcompat.h" - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) -/* From lib/vsprintf.c */ -#include <asm/div64.h> - -static int skip_atoi(const char **s) -{ - int i=0; - - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; - return i; -} - -#define _kc_ZEROPAD 1 /* pad with zero */ -#define _kc_SIGN 2 /* unsigned/signed long */ -#define _kc_PLUS 4 /* show plus */ -#define _kc_SPACE 8 /* space if plus */ -#define _kc_LEFT 16 /* left justified */ -#define _kc_SPECIAL 32 /* 0x */ -#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) -{ - char c,sign,tmp[66]; - const char *digits; - const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; - const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - int i; - - digits = (type & _kc_LARGE) ? large_digits : small_digits; - if (type & _kc_LEFT) - type &= ~_kc_ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & _kc_ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & _kc_SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & _kc_PLUS) { - sign = '+'; - size--; - } else if (type & _kc_SPACE) { - sign = ' '; - size--; - } - } - if (type & _kc_SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++]='0'; - else while (num != 0) - tmp[i++] = digits[do_div(num,base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { - while(size-->0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - } - if (sign) { - if (buf <= end) - *buf = sign; - ++buf; - } - if (type & _kc_SPECIAL) { - if (base==8) { - if (buf <= end) - *buf = '0'; - ++buf; - } else if (base==16) { - if (buf <= end) - *buf = '0'; - ++buf; - if (buf <= end) - *buf = digits[33]; - ++buf; - } - } - if (!(type & _kc_LEFT)) { - while (size-- > 0) { - if (buf <= end) - *buf = c; - ++buf; - } - } - while (i < precision--) { - if (buf <= end) - *buf = '0'; - ++buf; - } - while (i-- > 0) { - if (buf <= end) - *buf = tmp[i]; - ++buf; - } - while (size-- > 0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - return buf; -} - -int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) -{ - int len; - unsigned long long num; - int i, base; - char *str, *end, c; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - - str = buf; - end = buf + size - 1; - - if (end < buf - 1) { - end = ((void *) -1); - size = end - buf + 1; - } - - for (; *fmt ; ++fmt) { - if (*fmt != '%') { - if (str <= end) - *str = *fmt; - ++str; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= _kc_LEFT; goto repeat; - case '+': flags |= _kc_PLUS; goto repeat; - case ' ': flags |= _kc_SPACE; goto repeat; - case '#': flags |= _kc_SPECIAL; goto repeat; - case '0': flags |= _kc_ZEROPAD; goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= _kc_LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & _kc_LEFT)) { - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - } - c = (unsigned char) va_arg(args, int); - if (str <= end) - *str = c; - ++str; - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = "<NULL>"; - - len = strnlen(s, precision); - - if (!(flags & _kc_LEFT)) { - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - } - for (i = 0; i < len; ++i) { - if (str <= end) - *str = *s; - ++str; ++s; - } - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 'p': - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= _kc_ZEROPAD; - } - str = number(str, end, - (unsigned long) va_arg(args, void *), - 16, field_width, precision, flags); - continue; - - - case 'n': - /* FIXME: - * What does C99 say about the overflow case here? */ - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - if (str <= end) - *str = '%'; - ++str; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= _kc_LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= _kc_SIGN; - case 'u': - break; - - default: - if (str <= end) - *str = '%'; - ++str; - if (*fmt) { - if (str <= end) - *str = *fmt; - ++str; - } else { - --fmt; - } - continue; - } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & _kc_SIGN) - num = (signed long) num; - } else if (qualifier == 'Z') { - num = va_arg(args, size_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & _kc_SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & _kc_SIGN) - num = (signed int) num; - } - str = number(str, end, num, base, - field_width, precision, flags); - } - if (str <= end) - *str = '\0'; - else if (size > 0) - /* don't write out a null byte if the buf size is zero */ - *end = '\0'; - /* the trailing null byte doesn't count towards the total - * ++str; - */ - return str-buf; -} - -int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = _kc_vsnprintf(buf,size,fmt,args); - va_end(args); - return i; -} -#endif /* < 2.4.8 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) - -/**************************************/ -/* PCI DMA MAPPING */ - -#if defined(CONFIG_HIGHMEM) - -#ifndef PCI_DRAM_OFFSET -#define PCI_DRAM_OFFSET 0 -#endif - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return (((u64) (page - mem_map) << PAGE_SHIFT) + offset + - PCI_DRAM_OFFSET); -} - -#else /* CONFIG_HIGHMEM */ - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return pci_map_single(dev, (void *)page_address(page) + offset, size, - direction); -} - -#endif /* CONFIG_HIGHMEM */ - -void -_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, - int direction) -{ - return pci_unmap_single(dev, dma_addr, size, direction); -} - -#endif /* 2.4.13 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) - -/**************************************/ -/* PCI DRIVER API */ - -int -_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) -{ - if (!pci_dma_supported(dev, mask)) - return -EIO; - dev->dma_mask = mask; - return 0; -} - -int -_kc_pci_request_regions(struct pci_dev *dev, char *res_name) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) { - if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } - } - return 0; -} - -void -_kc_pci_release_regions(struct pci_dev *dev) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) - release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - - else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) - release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - } -} - -/**************************************/ -/* NETWORK DRIVER API */ - -struct net_device * -_kc_alloc_etherdev(int sizeof_priv) -{ - struct net_device *dev; - int alloc_size; - - alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; - dev = kzalloc(alloc_size, GFP_KERNEL); - if (!dev) - return NULL; - - if (sizeof_priv) - dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); - dev->name[0] = '\0'; - ether_setup(dev); - - return dev; -} - -int -_kc_is_valid_ether_addr(u8 *addr) -{ - const char zaddr[6] = { 0, }; - - return !(addr[0] & 1) && memcmp(addr, zaddr, 6); -} - -#endif /* 2.4.3 => 2.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) - -int -_kc_pci_set_power_state(struct pci_dev *dev, int state) -{ - return 0; -} - -int -_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) -{ - return 0; -} - -#endif /* 2.4.6 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, - int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i + 1; -} - -/* - * Original Copyright: - * find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + ffs(tmp); -} - -size_t _kc_strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - -#ifndef do_div -#if BITS_PER_LONG == 32 -uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base) -{ - uint64_t rem = *n; - uint64_t b = base; - uint64_t res, d = 1; - uint32_t high = rem >> 32; - - /* Reduce the thing a bit first */ - res = 0; - if (high >= base) { - high /= base; - res = (uint64_t) high << 32; - rem -= (uint64_t) (high*base) << 32; - } - - while ((int64_t)b > 0 && b < rem) { - b = b+b; - d = d+d; - } - - do { - if (rem >= b) { - rem -= b; - res += d; - } - b >>= 1; - d >>= 1; - } while (d); - - *n = res; - return rem; -} -#endif /* BITS_PER_LONG == 32 */ -#endif /* do_div */ -#endif /* 2.6.0 => 2.4.6 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = vsnprintf(buf, size, fmt, args); - va_end(args); - return (i >= size) ? (size - 1) : i; -} -#endif /* < 2.6.4 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) -DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; -#endif /* < 2.6.10 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) -char *_kc_kstrdup(const char *s, unsigned int gfp) -{ - size_t len; - char *buf; - - if (!s) - return NULL; - - len = strlen(s) + 1; - buf = kmalloc(len, gfp); - if (buf) - memcpy(buf, s, len); - return buf; -} -#endif /* < 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) -void *_kc_kzalloc(size_t size, int flags) -{ - void *ret = kmalloc(size, flags); - if (ret) - memset(ret, 0, size); - return ret; -} -#endif /* <= 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) -int _kc_skb_pad(struct sk_buff *skb, int pad) -{ - int ntail; - - /* If the skbuff is non linear tailroom is always zero.. */ - if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return 0; - } - - ntail = skb->data_len + pad - (skb->end - skb->tail); - if (likely(skb_cloned(skb) || ntail > 0)) { - if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); - goto free_skb; - } - -#ifdef MAX_SKB_FRAGS - if (skb_is_nonlinear(skb) && - !__pskb_pull_tail(skb, skb->data_len)) - goto free_skb; - -#endif - memset(skb->data + skb->len, 0, pad); - return 0; - -free_skb: - kfree_skb(skb); - return -ENOMEM; -} - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) -int _kc_pci_save_state(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct adapter_struct *adapter = netdev_priv(netdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset, pcie_link_status; - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) - /* no ->dev for 2.4 kernels */ - WARN_ON(pdev->dev.driver_data == NULL); -#endif - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset) { - if (!pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - } - pci_config_space_ich8lan(); -#ifdef HAVE_PCI_ERS - if (adapter->config_space == NULL) -#else - WARN_ON(adapter->config_space != NULL); -#endif - adapter->config_space = kmalloc(size, GFP_KERNEL); - if (!adapter->config_space) { - printk(KERN_ERR "Out of memory in pci_save_state\n"); - return -ENOMEM; - } - for (i = 0; i < (size / 4); i++) - pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); - return 0; -} - -void _kc_pci_restore_state(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct adapter_struct *adapter = netdev_priv(netdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset; - u16 pcie_link_status; - - if (adapter->config_space != NULL) { - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset && - !pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - - pci_config_space_ich8lan(); - for (i = 0; i < (size / 4); i++) - pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); -#ifndef HAVE_PCI_ERS - kfree(adapter->config_space); - adapter->config_space = NULL; -#endif - } -} -#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ - -#ifdef HAVE_PCI_ERS -void _kc_free_netdev(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - - if (adapter->config_space != NULL) - kfree(adapter->config_space); -#ifdef CONFIG_SYSFS - if (netdev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)netdev - netdev->padded); - } else { - BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); - netdev->reg_state = NETREG_RELEASED; - class_device_put(&netdev->class_dev); - } -#else - kfree((char *)netdev - netdev->padded); -#endif -} -#endif - -void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) -{ - void *p; - - p = kzalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; -} -#endif /* <= 2.6.19 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev) -{ - return ((struct adapter_struct *)netdev_priv(netdev))->pdev; -} -#endif /* < 2.6.21 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -/* hexdump code taken from lib/hexdump.c */ -static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, - int groupsize, unsigned char *linebuf, - size_t linebuflen, bool ascii) -{ - const u8 *ptr = buf; - u8 ch; - int j, lx = 0; - int ascii_column; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - if (!len) - goto nil; - if (len > rowsize) /* limit to one line at a time */ - len = rowsize; - if ((len % groupsize) != 0) /* no mixed size output */ - groupsize = 1; - - switch (groupsize) { - case 8: { - const u64 *ptr8 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); - ascii_column = 17 * ngroups + 2; - break; - } - - case 4: { - const u32 *ptr4 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); - ascii_column = 9 * ngroups + 2; - break; - } - - case 2: { - const u16 *ptr2 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); - ascii_column = 5 * ngroups + 2; - break; - } - - default: - for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { - ch = ptr[j]; - linebuf[lx++] = hex_asc(ch >> 4); - linebuf[lx++] = hex_asc(ch & 0x0f); - linebuf[lx++] = ' '; - } - if (j) - lx--; - - ascii_column = 3 * rowsize + 2; - break; - } - if (!ascii) - goto nil; - - while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) - linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; -nil: - linebuf[lx++] = '\0'; -} - -void _kc_print_hex_dump(const char *level, - const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) -{ - const u8 *ptr = buf; - int i, linelen, remaining = len; - unsigned char linebuf[200]; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - for (i = 0; i < len; i += rowsize) { - linelen = min(remaining, rowsize); - remaining -= rowsize; - _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); - - switch (prefix_type) { - case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); - break; - case DUMP_PREFIX_OFFSET: - printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); - break; - default: - printk("%s%s%s\n", level, prefix_str, linebuf); - break; - } - } -} - -#ifdef HAVE_I2C_SUPPORT -struct i2c_client * -_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) -{ - struct i2c_client *client; - int status; - - client = kzalloc(sizeof *client, GFP_KERNEL); - if (!client) - return NULL; - - client->adapter = adap; - - client->dev.platform_data = info->platform_data; - - client->flags = info->flags; - client->addr = info->addr; - - strlcpy(client->name, info->type, sizeof(client->name)); - - /* Check for address business */ - status = i2c_check_addr(adap, client->addr); - if (status) - goto out_err; - - client->dev.parent = &client->adapter->dev; - client->dev.bus = &i2c_bus_type; - - status = i2c_attach_client(client); - if (status) - goto out_err; - - dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", - client->name, dev_name(&client->dev)); - - return client; - -out_err: - dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x " - "(%d)\n", client->name, client->addr, status); - kfree(client); - return NULL; -} -#endif /* HAVE_I2C_SUPPORT */ -#endif /* < 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -#ifdef NAPI -struct net_device *napi_to_poll_dev(const struct napi_struct *napi) -{ - struct adapter_q_vector *q_vector = container_of(napi, - struct adapter_q_vector, - napi); - return &q_vector->poll_dev; -} - -int __kc_adapter_clean(struct net_device *netdev, int *budget) -{ - int work_done; - int work_to_do = min(*budget, netdev->quota); - /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ - struct napi_struct *napi = netdev->priv; - work_done = napi->poll(napi, work_to_do); - *budget -= work_done; - netdev->quota -= work_done; - return (work_done >= work_to_do) ? 1 : 0; -} -#endif /* NAPI */ -#endif /* <= 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) -void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) -{ - struct pci_dev *parent = pdev->bus->self; - u16 link_state; - int pos; - - if (!parent) - return; - - pos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (pos) { - pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); - link_state &= ~state; - pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); - } -} -#endif /* < 2.6.26 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) -#ifdef HAVE_TX_MQ -void _kc_netif_tx_stop_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_stop_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_stop_subqueue(netdev, i); -} -void _kc_netif_tx_wake_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_wake_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_wake_subqueue(netdev, i); -} -void _kc_netif_tx_start_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_start_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_start_subqueue(netdev, i); -} -#endif /* HAVE_TX_MQ */ - -#ifndef __WARN_printf -void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) -{ - va_list args; - - printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - - dump_stack(); -} -#endif /* __WARN_printf */ -#endif /* < 2.6.27 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) - -int -_kc_pci_prepare_to_sleep(struct pci_dev *dev) -{ - pci_power_t target_state; - int error; - - target_state = pci_choose_state(dev, PMSG_SUSPEND); - - pci_enable_wake(dev, target_state, true); - - error = pci_set_power_state(dev, target_state); - - if (error) - pci_enable_wake(dev, target_state, false); - - return error; -} - -int -_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) -{ - int err; - - err = pci_enable_wake(dev, PCI_D3cold, enable); - if (err) - goto out; - - err = pci_enable_wake(dev, PCI_D3hot, enable); - -out: - return err; -} -#endif /* < 2.6.28 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) -static void __kc_pci_set_master(struct pci_dev *pdev, bool enable) -{ - u16 old_cmd, cmd; - - pci_read_config_word(pdev, PCI_COMMAND, &old_cmd); - if (enable) - cmd = old_cmd | PCI_COMMAND_MASTER; - else - cmd = old_cmd & ~PCI_COMMAND_MASTER; - if (cmd != old_cmd) { - dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n", - enable ? "enabling" : "disabling"); - pci_write_config_word(pdev, PCI_COMMAND, cmd); - } -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ) - pdev->is_busmaster = enable; -#endif -} - -void _kc_pci_clear_master(struct pci_dev *dev) -{ - __kc_pci_set_master(dev, false); -} -#endif /* < 2.6.29 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) -#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) -int _kc_pci_num_vf(struct pci_dev *dev) -{ - int num_vf = 0; -#ifdef CONFIG_PCI_IOV - struct pci_dev *vfdev; - - /* loop through all ethernet devices starting at PF dev */ - vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL); - while (vfdev) { - if (vfdev->is_virtfn && vfdev->physfn == dev) - num_vf++; - - vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev); - } - -#endif - return num_vf; -} -#endif /* RHEL_RELEASE_CODE */ -#endif /* < 2.6.34 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) -#ifdef HAVE_TX_MQ -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) -#ifndef CONFIG_NETDEVICES_MULTIQUEUE -void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) -{ - unsigned int real_num = dev->real_num_tx_queues; - struct Qdisc *qdisc; - int i; - - if (unlikely(txq > dev->num_tx_queues)) - ; - else if (txq > real_num) - dev->real_num_tx_queues = txq; - else if ( txq < real_num) { - dev->real_num_tx_queues = txq; - for (i = txq; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; - if (qdisc) { - spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); - spin_unlock_bh(qdisc_lock(qdisc)); - } - } - } -} -#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ -#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ -#endif /* HAVE_TX_MQ */ - -ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, - const void __user *from, size_t count) -{ - loff_t pos = *ppos; - size_t res; - - if (pos < 0) - return -EINVAL; - if (pos >= available || !count) - return 0; - if (count > available - pos) - count = available - pos; - res = copy_from_user(to + pos, from, count); - if (res == count) - return -EFAULT; - count -= res; - *ppos = pos + count; - return count; -} - -#endif /* < 2.6.35 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) -static const u32 _kc_flags_dup_features = - (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); - -u32 _kc_ethtool_op_get_flags(struct net_device *dev) -{ - return dev->features & _kc_flags_dup_features; -} - -int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) -{ - if (data & ~supported) - return -EINVAL; - - dev->features = ((dev->features & ~_kc_flags_dup_features) | - (data & _kc_flags_dup_features)); - return 0; -} -#endif /* < 2.6.36 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) - - - -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ -#endif /* < 2.6.39 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) -void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, - int off, int size, unsigned int truesize) -{ - skb_fill_page_desc(skb, i, page, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += truesize; -} - -int _kc_simple_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - - return 0; -} - -#endif /* < 3.4.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) -#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) -static inline int __kc_pcie_cap_version(struct pci_dev *dev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); - return reg16 & PCI_EXP_FLAGS_VERS; -} - -static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev) -{ - return true; -} - -static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_ENDPOINT || - type == PCI_EXP_TYPE_LEG_END; -} - -static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - int pos; - u16 pcie_flags_reg; - - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - (type == PCI_EXP_TYPE_DOWNSTREAM && - pcie_flags_reg & PCI_EXP_FLAGS_SLOT); -} - -static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_RC_EC; -} - -static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos) -{ - if (!pci_is_pcie(dev)) - return false; - - switch (pos) { - case PCI_EXP_FLAGS_TYPE: - return true; - case PCI_EXP_DEVCAP: - case PCI_EXP_DEVCTL: - case PCI_EXP_DEVSTA: - return __kc_pcie_cap_has_devctl(dev); - case PCI_EXP_LNKCAP: - case PCI_EXP_LNKCTL: - case PCI_EXP_LNKSTA: - return __kc_pcie_cap_has_lnkctl(dev); - case PCI_EXP_SLTCAP: - case PCI_EXP_SLTCTL: - case PCI_EXP_SLTSTA: - return __kc_pcie_cap_has_sltctl(dev); - case PCI_EXP_RTCTL: - case PCI_EXP_RTCAP: - case PCI_EXP_RTSTA: - return __kc_pcie_cap_has_rtctl(dev); - case PCI_EXP_DEVCAP2: - case PCI_EXP_DEVCTL2: - case PCI_EXP_LNKCAP2: - case PCI_EXP_LNKCTL2: - case PCI_EXP_LNKSTA2: - return __kc_pcie_cap_version(dev) > 1; - default: - return false; - } -} - -/* - * Note that these accessor functions are only for the "PCI Express - * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the - * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.) - */ -int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val) -{ - int ret; - - *val = 0; - if (pos & 1) - return -EINVAL; - - if (__kc_pcie_capability_reg_implemented(dev, pos)) { - ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val); - /* - * Reset *val to 0 if pci_read_config_word() fails, it may - * have been written as 0xFFFF if hardware error happens - * during pci_read_config_word(). - */ - if (ret) - *val = 0; - return ret; - } - - /* - * For Functions that do not implement the Slot Capabilities, - * Slot Status, and Slot Control registers, these spaces must - * be hardwired to 0b, with the exception of the Presence Detect - * State bit in the Slot Status register of Downstream Ports, - * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8) - */ - if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA && - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) { - *val = PCI_EXP_SLTSTA_PDS; - } - - return 0; -} - -int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) -{ - if (pos & 1) - return -EINVAL; - - if (!__kc_pcie_capability_reg_implemented(dev, pos)) - return 0; - - return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); -} - -int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, - u16 clear, u16 set) -{ - int ret; - u16 val; - - ret = __kc_pcie_capability_read_word(dev, pos, &val); - if (!ret) { - val &= ~clear; - val |= set; - ret = __kc_pcie_capability_write_word(dev, pos, val); - } - - return ret; -} -#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */ -#endif /* < 3.7.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) -#endif /* 3.9.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#ifdef CONFIG_PCI_IOV -int __kc_pci_vfs_assigned(struct pci_dev *dev) -{ - unsigned int vfs_assigned = 0; -#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED - int pos; - struct pci_dev *vfdev; - unsigned short dev_id; - - /* only search if we are a PF */ - if (!dev->is_physfn) - return 0; - - /* find SR-IOV capability */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return 0; - - /* - * determine the device ID for the VFs, the vendor ID will be the - * same as the PF so there is no need to check for that one - */ - pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); - - /* loop through all the VFs to see if we own any that are assigned */ - vfdev = pci_get_device(dev->vendor, dev_id, NULL); - while (vfdev) { - /* - * It is considered assigned if it is a virtual function with - * our dev as the physical function and the assigned bit is set - */ - if (vfdev->is_virtfn && (vfdev->physfn == dev) && - (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) - vfs_assigned++; - - vfdev = pci_get_device(dev->vendor, dev_id, vfdev); - } - -#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ - return vfs_assigned; -} - -#endif /* CONFIG_PCI_IOV */ -#endif /* 3.10.0 */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index e2cf71e0..84826b26 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> @@ -3891,7 +3891,7 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) #define HAVE_NDO_DFLT_BRIDGE_ADD_MASK -#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) #define HAVE_NDO_FDB_ADD_VID #endif /* !RHEL 7.2 */ #endif /* >= 3.19.0 */ @@ -3901,12 +3901,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) /* vlan_tx_xx functions got renamed to skb_vlan */ #define vlan_tx_tag_get skb_vlan_tag_get #define vlan_tx_tag_present skb_vlan_tag_present -#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) #define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS #endif /* !RHEL 7.2 */ #endif /* 4.0.0 */ -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) )) /* ndo_bridge_getlink adds new nlflags parameter */ #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS #endif /* >= 4.1.0 */ @@ -3915,4 +3916,21 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) /* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */ #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL #endif /* >= 4.2.0 */ + +/* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) + * For older kernels backported this commit, need to use renamed functions. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#endif + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) ) +#define HAVE_VF_VLAN_PROTO +#endif /* >= 4.9.0 */ + #endif /* _KCOMPAT_H_ */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c deleted file mode 100644 index e1a89388..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c +++ /dev/null @@ -1,1171 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* - * net/core/ethtool.c - Ethtool ioctl handler - * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> - * - * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. We fall back to calling do_ioctl() - * for drivers which haven't been converted to ethtool_ops yet. - * - * It's GPL, stupid. - * - * Modification by sfeldma@pobox.com to work as backward compat - * solution for pre-ethtool_ops kernels. - * - copied struct ethtool_ops from ethtool.h - * - defined SET_ETHTOOL_OPS - * - put in some #ifndef NETIF_F_xxx wrappers - * - changes refs to dev->ethtool_ops to ethtool_ops - * - changed dev_ethtool to ethtool_ioctl - * - remove EXPORT_SYMBOL()s - * - added _kc_ prefix in built-in ethtool_op_xxx ops. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/mii.h> -#include <linux/ethtool.h> -#include <linux/netdevice.h> -#include <asm/uaccess.h> - -#include "kcompat.h" - -#undef SUPPORTED_10000baseT_Full -#define SUPPORTED_10000baseT_Full (1 << 12) -#undef ADVERTISED_10000baseT_Full -#define ADVERTISED_10000baseT_Full (1 << 12) -#undef SPEED_10000 -#define SPEED_10000 10000 - -#undef ethtool_ops -#define ethtool_ops _kc_ethtool_ops - -struct _kc_ethtool_ops { - int (*get_settings)(struct net_device *, struct ethtool_cmd *); - int (*set_settings)(struct net_device *, struct ethtool_cmd *); - void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); - int (*get_regs_len)(struct net_device *); - void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); - void (*get_wol)(struct net_device *, struct ethtool_wolinfo *); - int (*set_wol)(struct net_device *, struct ethtool_wolinfo *); - u32 (*get_msglevel)(struct net_device *); - void (*set_msglevel)(struct net_device *, u32); - int (*nway_reset)(struct net_device *); - u32 (*get_link)(struct net_device *); - int (*get_eeprom_len)(struct net_device *); - int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *); - int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *); - void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *); - int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *); - void (*get_pauseparam)(struct net_device *, - struct ethtool_pauseparam*); - int (*set_pauseparam)(struct net_device *, - struct ethtool_pauseparam*); - u32 (*get_rx_csum)(struct net_device *); - int (*set_rx_csum)(struct net_device *, u32); - u32 (*get_tx_csum)(struct net_device *); - int (*set_tx_csum)(struct net_device *, u32); - u32 (*get_sg)(struct net_device *); - int (*set_sg)(struct net_device *, u32); - u32 (*get_tso)(struct net_device *); - int (*set_tso)(struct net_device *, u32); - int (*self_test_count)(struct net_device *); - void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); - void (*get_strings)(struct net_device *, u32 stringset, u8 *); - int (*phys_id)(struct net_device *, u32); - int (*get_stats_count)(struct net_device *); - void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, - u64 *); -} *ethtool_ops = NULL; - -#undef SET_ETHTOOL_OPS -#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops)) - -/* - * Some useful ethtool_ops methods that are device independent. If we find that - * all drivers want to do the same thing here, we can turn these into dev_() - * function calls. - */ - -#undef ethtool_op_get_link -#define ethtool_op_get_link _kc_ethtool_op_get_link -u32 _kc_ethtool_op_get_link(struct net_device *dev) -{ - return netif_carrier_ok(dev) ? 1 : 0; -} - -#undef ethtool_op_get_tx_csum -#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum -u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev) -{ -#ifdef NETIF_F_IP_CSUM - return (dev->features & NETIF_F_IP_CSUM) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_tx_csum -#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum -int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_IP_CSUM - if (data) -#ifdef NETIF_F_IPV6_CSUM - dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - else - dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); -#else - dev->features |= NETIF_F_IP_CSUM; - else - dev->features &= ~NETIF_F_IP_CSUM; -#endif -#endif - - return 0; -} - -#undef ethtool_op_get_sg -#define ethtool_op_get_sg _kc_ethtool_op_get_sg -u32 _kc_ethtool_op_get_sg(struct net_device *dev) -{ -#ifdef NETIF_F_SG - return (dev->features & NETIF_F_SG) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_sg -#define ethtool_op_set_sg _kc_ethtool_op_set_sg -int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_SG - if (data) - dev->features |= NETIF_F_SG; - else - dev->features &= ~NETIF_F_SG; -#endif - - return 0; -} - -#undef ethtool_op_get_tso -#define ethtool_op_get_tso _kc_ethtool_op_get_tso -u32 _kc_ethtool_op_get_tso(struct net_device *dev) -{ -#ifdef NETIF_F_TSO - return (dev->features & NETIF_F_TSO) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_tso -#define ethtool_op_set_tso _kc_ethtool_op_set_tso -int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_TSO - if (data) - dev->features |= NETIF_F_TSO; - else - dev->features &= ~NETIF_F_TSO; -#endif - - return 0; -} - -/* Handlers for each ethtool command */ - -static int ethtool_get_settings(struct net_device *dev, void *useraddr) -{ - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - int err; - - if (!ethtool_ops->get_settings) - return -EOPNOTSUPP; - - err = ethtool_ops->get_settings(dev, &cmd); - if (err < 0) - return err; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - return 0; -} - -static int ethtool_set_settings(struct net_device *dev, void *useraddr) -{ - struct ethtool_cmd cmd; - - if (!ethtool_ops->set_settings) - return -EOPNOTSUPP; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - return ethtool_ops->set_settings(dev, &cmd); -} - -static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) -{ - struct ethtool_drvinfo info; - struct ethtool_ops *ops = ethtool_ops; - - if (!ops->get_drvinfo) - return -EOPNOTSUPP; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GDRVINFO; - ops->get_drvinfo(dev, &info); - - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); - if (ops->get_eeprom_len) - info.eedump_len = ops->get_eeprom_len(dev); - - if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -static int ethtool_get_regs(struct net_device *dev, char *useraddr) -{ - struct ethtool_regs regs; - struct ethtool_ops *ops = ethtool_ops; - void *regbuf; - int reglen, ret; - - if (!ops->get_regs || !ops->get_regs_len) - return -EOPNOTSUPP; - - if (copy_from_user(®s, useraddr, sizeof(regs))) - return -EFAULT; - - reglen = ops->get_regs_len(dev); - if (regs.len > reglen) - regs.len = reglen; - - regbuf = kmalloc(reglen, GFP_USER); - if (!regbuf) - return -ENOMEM; - - ops->get_regs(dev, ®s, regbuf); - - ret = -EFAULT; - if (copy_to_user(useraddr, ®s, sizeof(regs))) - goto out; - useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, reglen)) - goto out; - ret = 0; - -out: - kfree(regbuf); - return ret; -} - -static int ethtool_get_wol(struct net_device *dev, char *useraddr) -{ - struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; - - if (!ethtool_ops->get_wol) - return -EOPNOTSUPP; - - ethtool_ops->get_wol(dev, &wol); - - if (copy_to_user(useraddr, &wol, sizeof(wol))) - return -EFAULT; - return 0; -} - -static int ethtool_set_wol(struct net_device *dev, char *useraddr) -{ - struct ethtool_wolinfo wol; - - if (!ethtool_ops->set_wol) - return -EOPNOTSUPP; - - if (copy_from_user(&wol, useraddr, sizeof(wol))) - return -EFAULT; - - return ethtool_ops->set_wol(dev, &wol); -} - -static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - -static int ethtool_nway_reset(struct net_device *dev) -{ - if (!ethtool_ops->nway_reset) - return -EOPNOTSUPP; - - return ethtool_ops->nway_reset(dev); -} - -static int ethtool_get_link(struct net_device *dev, void *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) -{ - struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->get_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(eeprom.len, GFP_USER); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->get_eeprom(dev, &eeprom, data); - if (ret) - goto out; - - ret = -EFAULT; - if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) - goto out; - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) -{ - struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->set_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(eeprom.len, GFP_USER); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->set_eeprom(dev, &eeprom, data); - if (ret) - goto out; - - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - ret = -EFAULT; - -out: - kfree(data); - return ret; -} - -static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) -{ - struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; - - if (!ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - ethtool_ops->get_coalesce(dev, &coalesce); - - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - return 0; -} - -static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) -{ - struct ethtool_coalesce coalesce; - - if (!ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) - return -EFAULT; - - return ethtool_ops->set_coalesce(dev, &coalesce); -} - -static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; - - if (!ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - ethtool_ops->get_ringparam(dev, &ringparam); - - if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_ringparam ringparam; - - if (!ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) - return -EFAULT; - - return ethtool_ops->set_ringparam(dev, &ringparam); -} - -static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; - - if (!ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - ethtool_ops->get_pauseparam(dev, &pauseparam); - - if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_pauseparam pauseparam; - - if (!ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) - return -EFAULT; - - return ethtool_ops->set_pauseparam(dev, &pauseparam); -} - -static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_tx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_tx_csum(dev, edata.data); -} - -static int ethtool_get_sg(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_sg(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_sg(dev, edata.data); -} - -static int ethtool_get_tso(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_tso(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_tso) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_tso(dev, edata.data); -} - -static int ethtool_self_test(struct net_device *dev, char *useraddr) -{ - struct ethtool_test test; - struct ethtool_ops *ops = ethtool_ops; - u64 *data; - int ret; - - if (!ops->self_test || !ops->self_test_count) - return -EOPNOTSUPP; - - if (copy_from_user(&test, useraddr, sizeof(test))) - return -EFAULT; - - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->self_test(dev, &test, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &test, sizeof(test))) - goto out; - useraddr += sizeof(test); - if (copy_to_user(useraddr, data, test.len * sizeof(u64))) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_get_strings(struct net_device *dev, void *useraddr) -{ - struct ethtool_gstrings gstrings; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->get_strings) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; - } - - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - ops->get_strings(dev, gstrings.string_set, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_phys_id(struct net_device *dev, void *useraddr) -{ - struct ethtool_value id; - - if (!ethtool_ops->phys_id) - return -EOPNOTSUPP; - - if (copy_from_user(&id, useraddr, sizeof(id))) - return -EFAULT; - - return ethtool_ops->phys_id(dev, id.data); -} - -static int ethtool_get_stats(struct net_device *dev, void *useraddr) -{ - struct ethtool_stats stats; - struct ethtool_ops *ops = ethtool_ops; - u64 *data; - int ret; - - if (!ops->get_ethtool_stats || !ops->get_stats_count) - return -EOPNOTSUPP; - - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->get_ethtool_stats(dev, &stats, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -/* The main entry point in this file. Called from net/core/dev.c */ - -#define ETHTOOL_OPS_COMPAT -int ethtool_ioctl(struct ifreq *ifr) -{ - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); - void *useraddr = (void *) ifr->ifr_data; - u32 ethcmd; - - /* - * XXX: This can be pushed down into the ethtool_* handlers that - * need it. Keep existing behavior for the moment. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (!dev || !netif_device_present(dev)) - return -ENODEV; - - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) - return -EFAULT; - - switch (ethcmd) { - case ETHTOOL_GSET: - return ethtool_get_settings(dev, useraddr); - case ETHTOOL_SSET: - return ethtool_set_settings(dev, useraddr); - case ETHTOOL_GDRVINFO: - return ethtool_get_drvinfo(dev, useraddr); - case ETHTOOL_GREGS: - return ethtool_get_regs(dev, useraddr); - case ETHTOOL_GWOL: - return ethtool_get_wol(dev, useraddr); - case ETHTOOL_SWOL: - return ethtool_set_wol(dev, useraddr); - case ETHTOOL_GMSGLVL: - return ethtool_get_msglevel(dev, useraddr); - case ETHTOOL_SMSGLVL: - return ethtool_set_msglevel(dev, useraddr); - case ETHTOOL_NWAY_RST: - return ethtool_nway_reset(dev); - case ETHTOOL_GLINK: - return ethtool_get_link(dev, useraddr); - case ETHTOOL_GEEPROM: - return ethtool_get_eeprom(dev, useraddr); - case ETHTOOL_SEEPROM: - return ethtool_set_eeprom(dev, useraddr); - case ETHTOOL_GCOALESCE: - return ethtool_get_coalesce(dev, useraddr); - case ETHTOOL_SCOALESCE: - return ethtool_set_coalesce(dev, useraddr); - case ETHTOOL_GRINGPARAM: - return ethtool_get_ringparam(dev, useraddr); - case ETHTOOL_SRINGPARAM: - return ethtool_set_ringparam(dev, useraddr); - case ETHTOOL_GPAUSEPARAM: - return ethtool_get_pauseparam(dev, useraddr); - case ETHTOOL_SPAUSEPARAM: - return ethtool_set_pauseparam(dev, useraddr); - case ETHTOOL_GRXCSUM: - return ethtool_get_rx_csum(dev, useraddr); - case ETHTOOL_SRXCSUM: - return ethtool_set_rx_csum(dev, useraddr); - case ETHTOOL_GTXCSUM: - return ethtool_get_tx_csum(dev, useraddr); - case ETHTOOL_STXCSUM: - return ethtool_set_tx_csum(dev, useraddr); - case ETHTOOL_GSG: - return ethtool_get_sg(dev, useraddr); - case ETHTOOL_SSG: - return ethtool_set_sg(dev, useraddr); - case ETHTOOL_GTSO: - return ethtool_get_tso(dev, useraddr); - case ETHTOOL_STSO: - return ethtool_set_tso(dev, useraddr); - case ETHTOOL_TEST: - return ethtool_self_test(dev, useraddr); - case ETHTOOL_GSTRINGS: - return ethtool_get_strings(dev, useraddr); - case ETHTOOL_PHYS_ID: - return ethtool_phys_id(dev, useraddr); - case ETHTOOL_GSTATS: - return ethtool_get_stats(dev, useraddr); - default: - return -EOPNOTSUPP; - } - - return -EOPNOTSUPP; -} - -#define mii_if_info _kc_mii_if_info -struct _kc_mii_if_info { - int phy_id; - int advertising; - int phy_id_mask; - int reg_num_mask; - - unsigned int full_duplex : 1; /* is full duplex? */ - unsigned int force_media : 1; /* is autoneg. disabled? */ - - struct net_device *dev; - int (*mdio_read) (struct net_device *dev, int phy_id, int location); - void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val); -}; - -struct ethtool_cmd; -struct mii_ioctl_data; - -#undef mii_link_ok -#define mii_link_ok _kc_mii_link_ok -#undef mii_nway_restart -#define mii_nway_restart _kc_mii_nway_restart -#undef mii_ethtool_gset -#define mii_ethtool_gset _kc_mii_ethtool_gset -#undef mii_ethtool_sset -#define mii_ethtool_sset _kc_mii_ethtool_sset -#undef mii_check_link -#define mii_check_link _kc_mii_check_link -extern int _kc_mii_link_ok (struct mii_if_info *mii); -extern int _kc_mii_nway_restart (struct mii_if_info *mii); -extern int _kc_mii_ethtool_gset(struct mii_if_info *mii, - struct ethtool_cmd *ecmd); -extern int _kc_mii_ethtool_sset(struct mii_if_info *mii, - struct ethtool_cmd *ecmd); -extern void _kc_mii_check_link (struct mii_if_info *mii); -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) -#undef generic_mii_ioctl -#define generic_mii_ioctl _kc_generic_mii_ioctl -extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, - struct mii_ioctl_data *mii_data, int cmd, - unsigned int *duplex_changed); -#endif /* > 2.4.6 */ - - -struct _kc_pci_dev_ext { - struct pci_dev *dev; - void *pci_drvdata; - struct pci_driver *driver; -}; - -struct _kc_net_dev_ext { - struct net_device *dev; - unsigned int carrier; -}; - - -/**************************************/ -/* mii support */ - -int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) -{ - struct net_device *dev = mii->dev; - u32 advert, bmcr, lpa, nego; - - ecmd->supported = - (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | - SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); - - /* only supports twisted-pair */ - ecmd->port = PORT_MII; - - /* only supports internal transceiver */ - ecmd->transceiver = XCVR_INTERNAL; - - /* this isn't fully supported at higher layers */ - ecmd->phy_address = mii->phy_id; - - ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII; - advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); - if (advert & ADVERTISE_10HALF) - ecmd->advertising |= ADVERTISED_10baseT_Half; - if (advert & ADVERTISE_10FULL) - ecmd->advertising |= ADVERTISED_10baseT_Full; - if (advert & ADVERTISE_100HALF) - ecmd->advertising |= ADVERTISED_100baseT_Half; - if (advert & ADVERTISE_100FULL) - ecmd->advertising |= ADVERTISED_100baseT_Full; - - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA); - if (bmcr & BMCR_ANENABLE) { - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->autoneg = AUTONEG_ENABLE; - - nego = mii_nway_result(advert & lpa); - if (nego == LPA_100FULL || nego == LPA_100HALF) - ecmd->speed = SPEED_100; - else - ecmd->speed = SPEED_10; - if (nego == LPA_100FULL || nego == LPA_10FULL) { - ecmd->duplex = DUPLEX_FULL; - mii->full_duplex = 1; - } else { - ecmd->duplex = DUPLEX_HALF; - mii->full_duplex = 0; - } - } else { - ecmd->autoneg = AUTONEG_DISABLE; - - ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; - ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; - } - - /* ignore maxtxpkt, maxrxpkt for now */ - - return 0; -} - -int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) -{ - struct net_device *dev = mii->dev; - - if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100) - return -EINVAL; - if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL) - return -EINVAL; - if (ecmd->port != PORT_MII) - return -EINVAL; - if (ecmd->transceiver != XCVR_INTERNAL) - return -EINVAL; - if (ecmd->phy_address != mii->phy_id) - return -EINVAL; - if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE) - return -EINVAL; - - /* ignore supported, maxtxpkt, maxrxpkt */ - - if (ecmd->autoneg == AUTONEG_ENABLE) { - u32 bmcr, advert, tmp; - - if ((ecmd->advertising & (ADVERTISED_10baseT_Half | - ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full)) == 0) - return -EINVAL; - - /* advertise only what has been requested */ - advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); - tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); - if (ADVERTISED_10baseT_Half) - tmp |= ADVERTISE_10HALF; - if (ADVERTISED_10baseT_Full) - tmp |= ADVERTISE_10FULL; - if (ADVERTISED_100baseT_Half) - tmp |= ADVERTISE_100HALF; - if (ADVERTISED_100baseT_Full) - tmp |= ADVERTISE_100FULL; - if (advert != tmp) { - mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); - mii->advertising = tmp; - } - - /* turn on autonegotiation, and force a renegotiate */ - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); - mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); - - mii->force_media = 0; - } else { - u32 bmcr, tmp; - - /* turn off auto negotiation, set speed and duplexity */ - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX); - if (ecmd->speed == SPEED_100) - tmp |= BMCR_SPEED100; - if (ecmd->duplex == DUPLEX_FULL) { - tmp |= BMCR_FULLDPLX; - mii->full_duplex = 1; - } else - mii->full_duplex = 0; - if (bmcr != tmp) - mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); - - mii->force_media = 1; - } - return 0; -} - -int _kc_mii_link_ok (struct mii_if_info *mii) -{ - /* first, a dummy read, needed to latch some MII phys */ - mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR); - if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS) - return 1; - return 0; -} - -int _kc_mii_nway_restart (struct mii_if_info *mii) -{ - int bmcr; - int r = -EINVAL; - - /* if autoneg is off, it's an error */ - bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR); - - if (bmcr & BMCR_ANENABLE) { - bmcr |= BMCR_ANRESTART; - mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr); - r = 0; - } - - return r; -} - -void _kc_mii_check_link (struct mii_if_info *mii) -{ - int cur_link = mii_link_ok(mii); - int prev_link = netif_carrier_ok(mii->dev); - - if (cur_link && !prev_link) - netif_carrier_on(mii->dev); - else if (prev_link && !cur_link) - netif_carrier_off(mii->dev); -} - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) -int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, - struct mii_ioctl_data *mii_data, int cmd, - unsigned int *duplex_chg_out) -{ - int rc = 0; - unsigned int duplex_changed = 0; - - if (duplex_chg_out) - *duplex_chg_out = 0; - - mii_data->phy_id &= mii_if->phy_id_mask; - mii_data->reg_num &= mii_if->reg_num_mask; - - switch(cmd) { - case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ - case SIOCGMIIPHY: - mii_data->phy_id = mii_if->phy_id; - /* fall through */ - - case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */ - case SIOCGMIIREG: - mii_data->val_out = - mii_if->mdio_read(mii_if->dev, mii_data->phy_id, - mii_data->reg_num); - break; - - case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */ - case SIOCSMIIREG: { - u16 val = mii_data->val_in; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (mii_data->phy_id == mii_if->phy_id) { - switch(mii_data->reg_num) { - case MII_BMCR: { - unsigned int new_duplex = 0; - if (val & (BMCR_RESET|BMCR_ANENABLE)) - mii_if->force_media = 0; - else - mii_if->force_media = 1; - if (mii_if->force_media && - (val & BMCR_FULLDPLX)) - new_duplex = 1; - if (mii_if->full_duplex != new_duplex) { - duplex_changed = 1; - mii_if->full_duplex = new_duplex; - } - break; - } - case MII_ADVERTISE: - mii_if->advertising = val; - break; - default: - /* do nothing */ - break; - } - } - - mii_if->mdio_write(mii_if->dev, mii_data->phy_id, - mii_data->reg_num, val); - break; - } - - default: - rc = -EOPNOTSUPP; - break; - } - - if ((rc == 0) && (duplex_chg_out) && (duplex_changed)) - *duplex_chg_out = 1; - - return rc; -} -#endif /* > 2.4.6 */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h index 222c2c71..59415469 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c index 24015844..e17b7f18 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h index c6abb020..00a584f4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c index c6f4130d..30de47eb 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h index 02be92ab..41024400 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c index ef7ce629..f00fe796 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h index a6ab30d2..98b74000 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c index 93659ca0..88b33fa0 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h index 9bd6f534..6ae5926f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h index a6690451..5e6f9ac9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c index 11472bd3..bc3cb2f4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h index cad28622..48f7dcfc 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c index 92fc9fc7..d26016c9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> @@ -86,7 +86,7 @@ const char ixgbe_driver_version[] = DRV_VERSION; * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, * Class, Class Mask, private data (not used) } */ -DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = { +const struct pci_device_id ixgbe_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)}, diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h index 124f00de..5ced84f8 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h index d161600b..c6f8e21f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c index e3f5275e..234fa632 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h index bbe5a9e3..5ae171ac 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h deleted file mode 100644 index 5e3559fd..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h +++ /dev/null @@ -1,73 +0,0 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -#ifndef _IXGBE_SRIOV_H_ -#define _IXGBE_SRIOV_H_ - -int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, - int entries, u16 *hash_list, u32 vf); -void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); -int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf); -void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe); -void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf); -void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf); -void ixgbe_msg_task(struct ixgbe_adapter *adapter); -int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, - int vf, unsigned char *mac_addr); -void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); -void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); -#ifdef IFLA_VF_MAX -int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE -int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); -#endif -int ixgbe_ndo_get_vf_config(struct net_device *netdev, - int vf, struct ifla_vf_info *ivi); -#endif -void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); -#ifdef CONFIG_PCI_IOV -int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); -void ixgbe_enable_sriov(struct ixgbe_adapter *adapter); -#endif -int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter); -#ifdef IFLA_VF_MAX -void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); -#endif /* IFLA_VF_MAX */ -void ixgbe_dump_registers(struct ixgbe_adapter *adapter); - -/* - * These are defined in ixgbe_type.h on behalf of the VF driver - * but we need them here unwrapped for the PF driver. - */ -#define IXGBE_DEV_ID_82599_VF 0x10ED -#define IXGBE_DEV_ID_X540_VF 0x1515 - -#endif /* _IXGBE_SRIOV_H_ */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h index 6b21c879..bda61fa4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c index b99d9e84..2affe242 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h index 77e8952d..38bcc87b 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c index 5f2523ed..d84c7ccb 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h index bf27579b..4c7a6408 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> @@ -3140,4 +3140,16 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) #endif /* >= 3.16.0 */ +/* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) + * For older kernels backported this commit, need to use renamed functions. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)) +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#endif + #endif /* _KCOMPAT_H_ */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h index a0e5cb6b..58cbadd3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -25,6 +25,11 @@ #ifndef _KNI_DEV_H_ #define _KNI_DEV_H_ +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/if.h> #include <linux/wait.h> #include <linux/sched.h> @@ -39,10 +44,11 @@ #include <exec-env/rte_kni_common.h> #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ +#define MBUF_BURST_SZ 32 + /** * A structure describing the private information for a kni device. */ - struct kni_dev { /* kni list */ struct list_head list; @@ -50,7 +56,7 @@ struct kni_dev { struct net_device_stats stats; int status; uint16_t group_id; /* Group ID of a group of KNI devices */ - unsigned core_id; /* Core ID to bind */ + uint32_t core_id; /* Core ID to bind */ char name[RTE_KNI_NAMESIZE]; /* Network device name */ struct task_struct *pthread; @@ -84,38 +90,36 @@ struct kni_dev { /* response queue */ void *resp_q; - void * sync_kva; + void *sync_kva; void *sync_va; void *mbuf_kva; void *mbuf_va; /* mbuf size */ - unsigned mbuf_size; + uint32_t mbuf_size; /* synchro for request processing */ unsigned long synchro; #ifdef RTE_KNI_VHOST - struct kni_vhost_queue* vhost_queue; + struct kni_vhost_queue *vhost_queue; + volatile enum { BE_STOP = 0x1, BE_START = 0x2, BE_FINISH = 0x4, - }vq_status; + } vq_status; #endif + /* buffers */ + void *pa[MBUF_BURST_SZ]; + void *va[MBUF_BURST_SZ]; + void *alloc_pa[MBUF_BURST_SZ]; + void *alloc_va[MBUF_BURST_SZ]; }; -#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args) -#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args) -#ifdef RTE_KNI_KO_DEBUG - #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args) -#else - #define KNI_DBG(args...) -#endif - #ifdef RTE_KNI_VHOST -unsigned int +uint32_t kni_poll(struct file *file, struct socket *sock, poll_table * wait); int kni_chk_vhost_rx(struct kni_dev *kni); int kni_vhost_init(struct kni_dev *kni); @@ -127,23 +131,22 @@ struct kni_vhost_queue { int vnet_hdr_sz; struct kni_dev *kni; int sockfd; - unsigned int flags; - struct sk_buff* cache; - struct rte_kni_fifo* fifo; + uint32_t flags; + struct sk_buff *cache; + struct rte_kni_fifo *fifo; }; #endif -#ifdef RTE_KNI_VHOST_DEBUG_RX - #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args) -#else - #define KNI_DBG_RX(args...) -#endif +void kni_net_rx(struct kni_dev *kni); +void kni_net_init(struct net_device *dev); +void kni_net_config_lo_mode(char *lo_str); +void kni_net_poll_resp(struct kni_dev *kni); +void kni_set_ethtool_ops(struct net_device *netdev); -#ifdef RTE_KNI_VHOST_DEBUG_TX - #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args) -#else - #define KNI_DBG_TX(args...) -#endif +int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +void ixgbe_kni_remove(struct pci_dev *pdev); +int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +void igb_kni_remove(struct pci_dev *pdev); #endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c index 06b6d463..0c88589c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c @@ -31,6 +31,7 @@ static int kni_check_if_running(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + if (priv->lad_dev) return 0; else @@ -41,6 +42,7 @@ static void kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info); } @@ -48,6 +50,7 @@ static int kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd); } @@ -55,6 +58,7 @@ static int kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd); } @@ -62,6 +66,7 @@ static void kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol); } @@ -69,6 +74,7 @@ static int kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol); } @@ -76,6 +82,7 @@ static int kni_nway_reset(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev); } @@ -83,6 +90,7 @@ static int kni_get_eeprom_len(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev); } @@ -91,6 +99,7 @@ kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom, bytes); } @@ -100,6 +109,7 @@ kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom, bytes); } @@ -108,6 +118,7 @@ static void kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring); } @@ -115,6 +126,7 @@ static int kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring); } @@ -122,6 +134,7 @@ static void kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause); } @@ -129,6 +142,7 @@ static int kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev, pause); } @@ -137,6 +151,7 @@ static u32 kni_get_msglevel(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev); } @@ -144,6 +159,7 @@ static void kni_set_msglevel(struct net_device *dev, u32 data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data); } @@ -151,6 +167,7 @@ static int kni_get_regs_len(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev); } @@ -158,6 +175,7 @@ static void kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p); } @@ -165,6 +183,7 @@ static void kni_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset, data); } @@ -173,6 +192,7 @@ static int kni_get_sset_count(struct net_device *dev, int sset) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset); } @@ -181,24 +201,25 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats, data); } struct ethtool_ops kni_ethtool_ops = { - .begin = kni_check_if_running, + .begin = kni_check_if_running, .get_drvinfo = kni_get_drvinfo, .get_settings = kni_get_settings, .set_settings = kni_set_settings, .get_regs_len = kni_get_regs_len, - .get_regs = kni_get_regs, - .get_wol = kni_get_wol, - .set_wol = kni_set_wol, - .nway_reset = kni_nway_reset, - .get_link = ethtool_op_get_link, + .get_regs = kni_get_regs, + .get_wol = kni_get_wol, + .set_wol = kni_set_wol, + .nway_reset = kni_nway_reset, + .get_link = ethtool_op_get_link, .get_eeprom_len = kni_get_eeprom_len, - .get_eeprom = kni_get_eeprom, - .set_eeprom = kni_set_eeprom, + .get_eeprom = kni_get_eeprom, + .set_eeprom = kni_set_eeprom, .get_ringparam = kni_get_ringparam, .set_ringparam = kni_set_ringparam, .get_pauseparam = kni_get_pauseparam, @@ -207,7 +228,7 @@ struct ethtool_ops kni_ethtool_ops = { .set_msglevel = kni_set_msglevel, .get_strings = kni_get_strings, .get_sset_count = kni_get_sset_count, - .get_ethtool_stats = kni_get_ethtool_stats, + .get_ethtool_stats = kni_get_ethtool_stats, }; void diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h index 3ea750e2..025ec1c9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -30,13 +30,13 @@ /** * Adds num elements into the fifo. Return the number actually written */ -static inline unsigned -kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) +static inline uint32_t +kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num) { - unsigned i = 0; - unsigned fifo_write = fifo->write; - unsigned fifo_read = fifo->read; - unsigned new_write = fifo_write; + uint32_t i = 0; + uint32_t fifo_write = fifo->write; + uint32_t fifo_read = fifo->read; + uint32_t new_write = fifo_write; for (i = 0; i < num; i++) { new_write = (new_write + 1) & (fifo->len - 1); @@ -54,12 +54,12 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) /** * Get up to num elements from the fifo. Return the number actully read */ -static inline unsigned -kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) +static inline uint32_t +kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num) { - unsigned i = 0; - unsigned new_read = fifo->read; - unsigned fifo_write = fifo->write; + uint32_t i = 0; + uint32_t new_read = fifo->read; + uint32_t fifo_write = fifo->write; for (i = 0; i < num; i++) { if (new_read == fifo_write) @@ -76,16 +76,16 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) /** * Get the num of elements in the fifo */ -static inline unsigned +static inline uint32_t kni_fifo_count(struct rte_kni_fifo *fifo) { - return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1); + return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); } /** * Get the num of available elements in the fifo */ -static inline unsigned +static inline uint32_t kni_fifo_free_count(struct rte_kni_fifo *fifo) { return (fifo->read - fifo->write - 1) & (fifo->len - 1); @@ -96,7 +96,7 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo) * Initializes the kni fifo structure */ static inline void -kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size) +kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size) { fifo->write = 0; fifo->read = 0; diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c index 59d15ca6..33b61f2a 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -30,6 +30,7 @@ #include <linux/pci.h> #include <linux/kthread.h> #include <linux/rwsem.h> +#include <linux/mutex.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -47,52 +48,15 @@ MODULE_DESCRIPTION("Kernel Module for managing kni devices"); #define KNI_MAX_DEVICES 32 -extern void kni_net_rx(struct kni_dev *kni); -extern void kni_net_init(struct net_device *dev); -extern void kni_net_config_lo_mode(char *lo_str); -extern void kni_net_poll_resp(struct kni_dev *kni); -extern void kni_set_ethtool_ops(struct net_device *netdev); - -extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -extern void ixgbe_kni_remove(struct pci_dev *pdev); -extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -extern void igb_kni_remove(struct pci_dev *pdev); - -static int kni_open(struct inode *inode, struct file *file); -static int kni_release(struct inode *inode, struct file *file); -static int kni_ioctl(struct inode *inode, unsigned int ioctl_num, - unsigned long ioctl_param); -static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num, - unsigned long ioctl_param); -static int kni_dev_remove(struct kni_dev *dev); - -static int __init kni_parse_kthread_mode(void); - -/* KNI processing for single kernel thread mode */ -static int kni_thread_single(void *unused); -/* KNI processing for multiple kernel thread mode */ -static int kni_thread_multiple(void *param); - -static struct file_operations kni_fops = { - .owner = THIS_MODULE, - .open = kni_open, - .release = kni_release, - .unlocked_ioctl = (void *)kni_ioctl, - .compat_ioctl = (void *)kni_compat_ioctl, -}; - -static struct miscdevice kni_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = KNI_DEVICE, - .fops = &kni_fops, -}; +extern const struct pci_device_id ixgbe_pci_tbl[]; +extern const struct pci_device_id igb_pci_tbl[]; /* loopback mode */ -static char *lo_mode = NULL; +static char *lo_mode; /* Kernel thread mode */ -static char *kthread_mode = NULL; -static unsigned multiple_kthread_on = 0; +static char *kthread_mode; +static uint32_t multiple_kthread_on; #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ @@ -100,20 +64,24 @@ static int kni_net_id; struct kni_net { unsigned long device_in_use; /* device in use flag */ + struct mutex kni_kthread_lock; struct task_struct *kni_kthread; struct rw_semaphore kni_list_lock; struct list_head kni_list_head; }; -static int __net_init kni_init_net(struct net *net) +static int __net_init +kni_init_net(struct net *net) { #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); + + memset(knet, 0, sizeof(*knet)); #else struct kni_net *knet; int ret; - knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL); + knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); if (!knet) { ret = -ENOMEM; return ret; @@ -123,6 +91,8 @@ static int __net_init kni_init_net(struct net *net) /* Clear the bit of device in use */ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + mutex_init(&knet->kni_kthread_lock); + init_rwsem(&knet->kni_list_lock); INIT_LIST_HEAD(&knet->kni_list_head); @@ -137,11 +107,15 @@ static int __net_init kni_init_net(struct net *net) #endif } -static void __net_exit kni_exit_net(struct net *net) +static void __net_exit +kni_exit_net(struct net *net) { -#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS - struct kni_net *knet = net_generic(net, kni_net_id); + struct kni_net *knet __maybe_unused; + + knet = net_generic(net, kni_net_id); + mutex_destroy(&knet->kni_kthread_lock); +#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS kfree(knet); #endif } @@ -155,72 +129,56 @@ static struct pernet_operations kni_net_ops = { #endif }; -static int __init -kni_init(void) +static int +kni_thread_single(void *data) { - int rc; - - KNI_PRINT("######## DPDK kni module loading ########\n"); - - if (kni_parse_kthread_mode() < 0) { - KNI_ERR("Invalid parameter for kthread_mode\n"); - return -EINVAL; - } + struct kni_net *knet = data; + int j; + struct kni_dev *dev; -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - rc = register_pernet_subsys(&kni_net_ops); + while (!kthread_should_stop()) { + down_read(&knet->kni_list_lock); + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { + list_for_each_entry(dev, &knet->kni_list_head, list) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); #else - rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } + } + up_read(&knet->kni_list_lock); +#ifdef RTE_KNI_PREEMPT_DEFAULT + /* reschedule out for a while */ + schedule_timeout_interruptible( + usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); #endif - if (rc) - return -EPERM; - - rc = misc_register(&kni_misc); - if (rc != 0) { - KNI_ERR("Misc registration failed\n"); - goto out; } - /* Configure the lo mode according to the input parameter */ - kni_net_config_lo_mode(lo_mode); - - KNI_PRINT("######## DPDK kni module loaded ########\n"); - return 0; - -out: -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); -#else - register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif - return rc; } -static void __exit -kni_exit(void) +static int +kni_thread_multiple(void *param) { - misc_deregister(&kni_misc); -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); + int j; + struct kni_dev *dev = (struct kni_dev *)param; + + while (!kthread_should_stop()) { + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); #else - register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); + kni_net_rx(dev); #endif - KNI_PRINT("####### DPDK kni module unloaded #######\n"); -} - -static int __init -kni_parse_kthread_mode(void) -{ - if (!kthread_mode) - return 0; - - if (strcmp(kthread_mode, "single") == 0) - return 0; - else if (strcmp(kthread_mode, "multiple") == 0) - multiple_kthread_on = 1; - else - return -1; + kni_net_poll_resp(dev); + } +#ifdef RTE_KNI_PREEMPT_DEFAULT + schedule_timeout_interruptible( + usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } return 0; } @@ -235,21 +193,31 @@ kni_open(struct inode *inode, struct file *file) if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) return -EBUSY; - /* Create kernel thread for single mode */ - if (multiple_kthread_on == 0) { - KNI_PRINT("Single kernel thread for all KNI devices\n"); - /* Create kernel thread for RX */ - knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet, - "kni_single"); - if (IS_ERR(knet->kni_kthread)) { - KNI_ERR("Unable to create kernel threaed\n"); - return PTR_ERR(knet->kni_kthread); - } - } else - KNI_PRINT("Multiple kernel thread mode enabled\n"); - file->private_data = get_net(net); - KNI_PRINT("/dev/kni opened\n"); + pr_debug("/dev/kni opened\n"); + + return 0; +} + +static int +kni_dev_remove(struct kni_dev *dev) +{ + if (!dev) + return -ENODEV; + +#ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL + if (dev->pci_dev) { + if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) + ixgbe_kni_remove(dev->pci_dev); + else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) + igb_kni_remove(dev->pci_dev); + } +#endif + + if (dev->net_dev) { + unregister_netdev(dev->net_dev); + free_netdev(dev->net_dev); + } return 0; } @@ -263,9 +231,13 @@ kni_release(struct inode *inode, struct file *file) /* Stop kernel thread for single mode */ if (multiple_kthread_on == 0) { + mutex_lock(&knet->kni_kthread_lock); /* Stop kernel thread */ - kthread_stop(knet->kni_kthread); - knet->kni_kthread = NULL; + if (knet->kni_kthread != NULL) { + kthread_stop(knet->kni_kthread); + knet->kni_kthread = NULL; + } + mutex_unlock(&knet->kni_kthread_lock); } down_write(&knet->kni_list_lock); @@ -288,121 +260,78 @@ kni_release(struct inode *inode, struct file *file) clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); put_net(net); - KNI_PRINT("/dev/kni closed\n"); + pr_debug("/dev/kni closed\n"); return 0; } static int -kni_thread_single(void *data) +kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) { - struct kni_net *knet = data; - int j; - struct kni_dev *dev; + if (!kni || !dev) + return -1; - while (!kthread_should_stop()) { - down_read(&knet->kni_list_lock); - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { - list_for_each_entry(dev, &knet->kni_list_head, list) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else - kni_net_rx(dev); -#endif - kni_net_poll_resp(dev); - } - } - up_read(&knet->kni_list_lock); -#ifdef RTE_KNI_PREEMPT_DEFAULT - /* reschedule out for a while */ - schedule_timeout_interruptible(usecs_to_jiffies( \ - KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif + /* Check if network name has been used */ + if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { + pr_err("KNI name %s duplicated\n", dev->name); + return -1; } return 0; } static int -kni_thread_multiple(void *param) +kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) { - int j; - struct kni_dev *dev = (struct kni_dev *)param; - - while (!kthread_should_stop()) { - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else - kni_net_rx(dev); -#endif - kni_net_poll_resp(dev); + /** + * Create a new kernel thread for multiple mode, set its core affinity, + * and finally wake it up. + */ + if (multiple_kthread_on) { + kni->pthread = kthread_create(kni_thread_multiple, + (void *)kni, "kni_%s", kni->name); + if (IS_ERR(kni->pthread)) { + kni_dev_remove(kni); + return -ECANCELED; } -#ifdef RTE_KNI_PREEMPT_DEFAULT - schedule_timeout_interruptible(usecs_to_jiffies( \ - KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif - } - - return 0; -} - -static int -kni_dev_remove(struct kni_dev *dev) -{ - if (!dev) - return -ENODEV; - - switch (dev->device_id) { - #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): - #include <rte_pci_dev_ids.h> - igb_kni_remove(dev->pci_dev); - break; - #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev): - #include <rte_pci_dev_ids.h> - ixgbe_kni_remove(dev->pci_dev); - break; - default: - break; - } - - if (dev->net_dev) { - unregister_netdev(dev->net_dev); - free_netdev(dev->net_dev); - } - return 0; -} + if (force_bind) + kthread_bind(kni->pthread, kni->core_id); + wake_up_process(kni->pthread); + } else { + mutex_lock(&knet->kni_kthread_lock); + + if (knet->kni_kthread == NULL) { + knet->kni_kthread = kthread_create(kni_thread_single, + (void *)knet, "kni_single"); + if (IS_ERR(knet->kni_kthread)) { + mutex_unlock(&knet->kni_kthread_lock); + kni_dev_remove(kni); + return -ECANCELED; + } -static int -kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) -{ - if (!kni || !dev) - return -1; + if (force_bind) + kthread_bind(knet->kni_kthread, kni->core_id); + wake_up_process(knet->kni_kthread); + } - /* Check if network name has been used */ - if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { - KNI_ERR("KNI name %s duplicated\n", dev->name); - return -1; + mutex_unlock(&knet->kni_kthread_lock); } return 0; } static int -kni_ioctl_create(struct net *net, - unsigned int ioctl_num, unsigned long ioctl_param) +kni_ioctl_create(struct net *net, uint32_t ioctl_num, + unsigned long ioctl_param) { struct kni_net *knet = net_generic(net, kni_net_id); int ret; struct rte_kni_device_info dev_info; - struct pci_dev *pci = NULL; - struct pci_dev *found_pci = NULL; struct net_device *net_dev = NULL; - struct net_device *lad_dev = NULL; struct kni_dev *kni, *dev, *n; - printk(KERN_INFO "KNI: Creating kni...\n"); + pr_info("Creating kni...\n"); /* Check the buffer size, to avoid warning */ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) return -EINVAL; @@ -410,17 +339,21 @@ kni_ioctl_create(struct net *net, /* Copy kni info from user space */ ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); if (ret) { - KNI_ERR("copy_from_user in kni_ioctl_create"); + pr_err("copy_from_user in kni_ioctl_create"); return -EIO; } + /* Check if name is zero-ended */ + if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { + pr_err("kni.name not zero-terminated"); + return -EINVAL; + } + /** - * Check if the cpu core id is valid for binding, - * for multiple kernel thread mode. + * Check if the cpu core id is valid for binding. */ - if (multiple_kthread_on && dev_info.force_bind && - !cpu_online(dev_info.core_id)) { - KNI_ERR("cpu %u is not online\n", dev_info.core_id); + if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { + pr_err("cpu %u is not online\n", dev_info.core_id); return -EINVAL; } @@ -435,12 +368,12 @@ kni_ioctl_create(struct net *net, up_read(&knet->kni_list_lock); net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, -#ifdef NET_NAME_UNKNOWN - NET_NAME_UNKNOWN, +#ifdef NET_NAME_USER + NET_NAME_USER, #endif kni_net_init); if (net_dev == NULL) { - KNI_ERR("error allocating device \"%s\"\n", dev_info.name); + pr_err("error allocating device \"%s\"\n", dev_info.name); return -EBUSY; } @@ -464,44 +397,43 @@ kni_ioctl_create(struct net *net, kni->sync_va = dev_info.sync_va; kni->sync_kva = phys_to_virt(dev_info.sync_phys); - kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys); - kni->mbuf_va = dev_info.mbuf_va; - #ifdef RTE_KNI_VHOST kni->vhost_queue = NULL; kni->vq_status = BE_STOP; #endif kni->mbuf_size = dev_info.mbuf_size; - KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", + pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", (unsigned long long) dev_info.tx_phys, kni->tx_q); - KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", + pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", (unsigned long long) dev_info.rx_phys, kni->rx_q); - KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", + pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", (unsigned long long) dev_info.alloc_phys, kni->alloc_q); - KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n", + pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", (unsigned long long) dev_info.free_phys, kni->free_q); - KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n", + pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", (unsigned long long) dev_info.req_phys, kni->req_q); - KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", + pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", (unsigned long long) dev_info.resp_phys, kni->resp_q); - KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n", - (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva); - KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va); - KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size); + pr_debug("mbuf_size: %u\n", kni->mbuf_size); - KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n", + pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n", dev_info.bus, dev_info.devid, dev_info.function, dev_info.vendor_id, dev_info.device_id); +#ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL + struct pci_dev *found_pci = NULL; + struct net_device *lad_dev = NULL; + struct pci_dev *pci = NULL; + pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); /* Support Ethtool */ while (pci) { - KNI_PRINT("pci_bus: %02x:%02x:%02x \n", + pr_debug("pci_bus: %02x:%02x:%02x\n", pci->bus->number, PCI_SLOT(pci->devfn), PCI_FUNC(pci->devfn)); @@ -510,28 +442,21 @@ kni_ioctl_create(struct net *net, (PCI_SLOT(pci->devfn) == dev_info.devid) && (PCI_FUNC(pci->devfn) == dev_info.function)) { found_pci = pci; - switch (dev_info.device_id) { - #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): - #include <rte_pci_dev_ids.h> - ret = igb_kni_probe(found_pci, &lad_dev); - break; - #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \ - case (dev): - #include <rte_pci_dev_ids.h> + + if (pci_match_id(ixgbe_pci_tbl, found_pci)) ret = ixgbe_kni_probe(found_pci, &lad_dev); - break; - default: + else if (pci_match_id(igb_pci_tbl, found_pci)) + ret = igb_kni_probe(found_pci, &lad_dev); + else ret = -1; - break; - } - KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n", + pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n", pci, lad_dev); if (ret == 0) { kni->lad_dev = lad_dev; kni_set_ethtool_ops(kni->net_dev); } else { - KNI_ERR("Device not supported by ethtool"); + pr_err("Device not supported by ethtool"); kni->lad_dev = NULL; } @@ -544,9 +469,10 @@ kni_ioctl_create(struct net *net, } if (pci) pci_dev_put(pci); +#endif if (kni->lad_dev) - memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); else /* * Generate random mac address. eth_random_addr() is the newer @@ -556,9 +482,11 @@ kni_ioctl_create(struct net *net, ret = register_netdev(net_dev); if (ret) { - KNI_ERR("error %i registering device \"%s\"\n", + pr_err("error %i registering device \"%s\"\n", ret, dev_info.name); + kni->net_dev = NULL; kni_dev_remove(kni); + free_netdev(net_dev); return -ENODEV; } @@ -566,22 +494,9 @@ kni_ioctl_create(struct net *net, kni_vhost_init(kni); #endif - /** - * Create a new kernel thread for multiple mode, set its core affinity, - * and finally wake it up. - */ - if (multiple_kthread_on) { - kni->pthread = kthread_create(kni_thread_multiple, - (void *)kni, - "kni_%s", kni->name); - if (IS_ERR(kni->pthread)) { - kni_dev_remove(kni); - return -ECANCELED; - } - if (dev_info.force_bind) - kthread_bind(kni->pthread, kni->core_id); - wake_up_process(kni->pthread); - } + ret = kni_run_thread(knet, kni, dev_info.force_bind); + if (ret != 0) + return ret; down_write(&knet->kni_list_lock); list_add(&kni->list, &knet->kni_list_head); @@ -591,8 +506,8 @@ kni_ioctl_create(struct net *net, } static int -kni_ioctl_release(struct net *net, - unsigned int ioctl_num, unsigned long ioctl_param) +kni_ioctl_release(struct net *net, uint32_t ioctl_num, + unsigned long ioctl_param) { struct kni_net *knet = net_generic(net, kni_net_id); int ret = -EINVAL; @@ -600,11 +515,11 @@ kni_ioctl_release(struct net *net, struct rte_kni_device_info dev_info; if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) - return -EINVAL; + return -EINVAL; ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); if (ret) { - KNI_ERR("copy_from_user in kni_ioctl_release"); + pr_err("copy_from_user in kni_ioctl_release"); return -EIO; } @@ -631,21 +546,19 @@ kni_ioctl_release(struct net *net, break; } up_write(&knet->kni_list_lock); - printk(KERN_INFO "KNI: %s release kni named %s\n", + pr_info("%s release kni named %s\n", (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); return ret; } static int -kni_ioctl(struct inode *inode, - unsigned int ioctl_num, - unsigned long ioctl_param) +kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) { int ret = -EINVAL; struct net *net = current->nsproxy->net_ns; - KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); + pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); /* * Switch according to the ioctl called @@ -661,7 +574,7 @@ kni_ioctl(struct inode *inode, ret = kni_ioctl_release(net, ioctl_num, ioctl_param); break; default: - KNI_DBG("IOCTL default\n"); + pr_debug("IOCTL default\n"); break; } @@ -669,16 +582,99 @@ kni_ioctl(struct inode *inode, } static int -kni_compat_ioctl(struct inode *inode, - unsigned int ioctl_num, +kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) { /* 32 bits app on 64 bits OS to be supported later */ - KNI_PRINT("Not implemented.\n"); + pr_debug("Not implemented.\n"); return -EINVAL; } +static const struct file_operations kni_fops = { + .owner = THIS_MODULE, + .open = kni_open, + .release = kni_release, + .unlocked_ioctl = (void *)kni_ioctl, + .compat_ioctl = (void *)kni_compat_ioctl, +}; + +static struct miscdevice kni_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = KNI_DEVICE, + .fops = &kni_fops, +}; + +static int __init +kni_parse_kthread_mode(void) +{ + if (!kthread_mode) + return 0; + + if (strcmp(kthread_mode, "single") == 0) + return 0; + else if (strcmp(kthread_mode, "multiple") == 0) + multiple_kthread_on = 1; + else + return -1; + + return 0; +} + +static int __init +kni_init(void) +{ + int rc; + + if (kni_parse_kthread_mode() < 0) { + pr_err("Invalid parameter for kthread_mode\n"); + return -EINVAL; + } + + if (multiple_kthread_on == 0) + pr_debug("Single kernel thread for all KNI devices\n"); + else + pr_debug("Multiple kernel thread mode enabled\n"); + +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + rc = register_pernet_subsys(&kni_net_ops); +#else + rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif + if (rc) + return -EPERM; + + rc = misc_register(&kni_misc); + if (rc != 0) { + pr_err("Misc registration failed\n"); + goto out; + } + + /* Configure the lo mode according to the input parameter */ + kni_net_config_lo_mode(lo_mode); + + return 0; + +out: +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + unregister_pernet_subsys(&kni_net_ops); +#else + unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); +#endif + return rc; +} + +static void __exit +kni_exit(void) +{ + misc_deregister(&kni_misc); +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + unregister_pernet_subsys(&kni_net_ops); +#else + unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); +#endif +} + module_init(kni_init); module_exit(kni_exit); diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c index fc82193a..4ac99cfe 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c @@ -44,23 +44,103 @@ #define WD_TIMEOUT 5 /*jiffies */ -#define MBUF_BURST_SZ 32 - #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */ /* typedef for rx function */ typedef void (*kni_net_rx_t)(struct kni_dev *kni); -static int kni_net_tx(struct sk_buff *skb, struct net_device *dev); static void kni_net_rx_normal(struct kni_dev *kni); -static void kni_net_rx_lo_fifo(struct kni_dev *kni); -static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni); -static int kni_net_process_request(struct kni_dev *kni, - struct rte_kni_request *req); /* kni rx function pointer, with default to normal rx */ static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal; +/* physical address to kernel virtual address */ +static void * +pa2kva(void *pa) +{ + return phys_to_virt((unsigned long)pa); +} + +/* physical address to virtual address */ +static void * +pa2va(void *pa, struct rte_kni_mbuf *m) +{ + void *va; + + va = (void *)((unsigned long)pa + + (unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr); + return va; +} + +/* mbuf data kernel virtual address from mbuf kernel virtual address */ +static void * +kva2data_kva(struct rte_kni_mbuf *m) +{ + return phys_to_virt(m->buf_physaddr + m->data_off); +} + +/* virtual address to physical address */ +static void * +va2pa(void *va, struct rte_kni_mbuf *m) +{ + void *pa; + + pa = (void *)((unsigned long)va - + ((unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr)); + return pa; +} + +/* + * It can be called to process the request. + */ +static int +kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) +{ + int ret = -1; + void *resp_va; + uint32_t num; + int ret_val; + + if (!kni || !req) { + pr_err("No kni instance or request\n"); + return -EINVAL; + } + + mutex_lock(&kni->sync_lock); + + /* Construct data */ + memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); + num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); + if (num < 1) { + pr_err("Cannot send to req_q\n"); + ret = -EBUSY; + goto fail; + } + + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { + ret = -ETIME; + goto fail; + } + num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); + if (num != 1 || resp_va != kni->sync_va) { + /* This should never happen */ + pr_err("No data in resp_q\n"); + ret = -ENODATA; + goto fail; + } + + memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); + ret = 0; + +fail: + mutex_unlock(&kni->sync_lock); + return ret; +} + /* * Open and close */ @@ -116,18 +196,112 @@ kni_net_config(struct net_device *dev, struct ifmap *map) } /* + * Transmit a packet (called by the kernel) + */ +#ifdef RTE_KNI_VHOST +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#else +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + int len = 0; + uint32_t ret; + struct kni_dev *kni = netdev_priv(dev); + struct rte_kni_mbuf *pkt_kva = NULL; + void *pkt_pa = NULL; + void *pkt_va = NULL; + + /* save the timestamp */ +#ifdef HAVE_TRANS_START_HELPER + netif_trans_update(dev); +#else + dev->trans_start = jiffies; +#endif + + /* Check if the length of skb is less than mbuf size */ + if (skb->len > kni->mbuf_size) + goto drop; + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = pa2kva(pkt_pa); + data_kva = kva2data_kva(pkt_kva); + pkt_va = pa2va(pkt_pa, pkt_kva); + + len = skb->len; + memcpy(data_kva, skb->data, len); + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, &pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + pr_err("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + pr_err("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return NETDEV_TX_OK; + +drop: + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#endif + +/* * RX: normal working mode */ static void kni_net_rx_normal(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num_rx, num_fq; + uint32_t i, num_rx, num_fq; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct sk_buff *skb; struct net_device *dev = kni->net_dev; @@ -139,24 +313,22 @@ kni_net_rx_normal(struct kni_dev *kni) } /* Calculate the number of entries to dequeue from rx_q */ - num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ); + num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ); /* Burst dequeue from rx_q */ - num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx); + num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx); if (num_rx == 0) return; /* Transfer received packets to netif */ for (i = 0; i < num_rx; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va - + kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); skb = dev_alloc_skb(len + 2); if (!skb) { - KNI_ERR("Out of mem, dropping pkts\n"); /* Update statistics */ kni->stats.rx_dropped++; continue; @@ -178,9 +350,8 @@ kni_net_rx_normal(struct kni_dev *kni) if (!kva->next) break; - kva = kva->next - kni->mbuf_va + kni->mbuf_kva; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(va2pa(kva->next, kva)); + data_kva = kva2data_kva(kva); } } @@ -197,10 +368,10 @@ kni_net_rx_normal(struct kni_dev *kni) } /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)va, num_rx); + ret = kni_fifo_put(kni->free_q, kni->va, num_rx); if (ret != num_rx) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into free_q\n"); + pr_err("Fail to enqueue entries into free_q\n"); } /* @@ -209,15 +380,12 @@ kni_net_rx_normal(struct kni_dev *kni) static void kni_net_rx_lo_fifo(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num, num_rq, num_tq, num_aq, num_fq; + uint32_t i, num, num_rq, num_tq, num_aq, num_fq; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct rte_kni_mbuf *alloc_kva; - struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ]; void *alloc_data_kva; /* Get the number of entries in rx_q */ @@ -236,33 +404,32 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) num = min(num_rq, num_tq); num = min(num, num_aq); num = min(num, num_fq); - num = min(num, (unsigned)MBUF_BURST_SZ); + num = min_t(uint32_t, num, MBUF_BURST_SZ); /* Return if no entry to dequeue from rx_q */ if (num == 0) return; /* Burst dequeue from rx_q */ - ret = kni_fifo_get(kni->rx_q, (void **)va, num); + ret = kni_fifo_get(kni->rx_q, kni->pa, num); if (ret == 0) return; /* Failing should not happen */ /* Dequeue entries from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num); + ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num); if (ret) { num = ret; /* Copy mbufs */ for (i = 0; i < num; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; - - alloc_kva = (void *)alloc_va[i] - kni->mbuf_va + - kni->mbuf_kva; - alloc_data_kva = alloc_kva->buf_addr + - alloc_kva->data_off - kni->mbuf_va + - kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); + + alloc_kva = pa2kva(kni->alloc_pa[i]); + alloc_data_kva = kva2data_kva(alloc_kva); + kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva); + memcpy(alloc_data_kva, data_kva, len); alloc_kva->pkt_len = len; alloc_kva->data_len = len; @@ -272,17 +439,17 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) } /* Burst enqueue mbufs into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num); + ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into tx_q\n"); + pr_err("Fail to enqueue mbufs into tx_q\n"); } /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)va, num); + ret = kni_fifo_put(kni->free_q, kni->va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into free_q\n"); + pr_err("Fail to enqueue mbufs into free_q\n"); /** * Update statistic, and enqueue/dequeue failure is impossible, @@ -298,13 +465,11 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num_rq, num_fq, num; + uint32_t i, num_rq, num_fq, num; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct sk_buff *skb; struct net_device *dev = kni->net_dev; @@ -316,28 +481,26 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Calculate the number of entries to dequeue from rx_q */ num = min(num_rq, num_fq); - num = min(num, (unsigned)MBUF_BURST_SZ); + num = min_t(uint32_t, num, MBUF_BURST_SZ); /* Return if no entry to dequeue from rx_q */ if (num == 0) return; /* Burst dequeue mbufs from rx_q */ - ret = kni_fifo_get(kni->rx_q, (void **)va, num); + ret = kni_fifo_get(kni->rx_q, kni->pa, num); if (ret == 0) return; /* Copy mbufs to sk buffer and then call tx interface */ for (i = 0; i < num; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + - kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); skb = dev_alloc_skb(len + 2); - if (skb == NULL) - KNI_ERR("Out of mem, dropping pkts\n"); - else { + if (skb) { /* Align IP on 16B boundary */ skb_reserve(skb, 2); memcpy(skb_put(skb, len), data_kva, len); @@ -349,7 +512,6 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Simulate real usage, allocate/copy skb twice */ skb = dev_alloc_skb(len + 2); if (skb == NULL) { - KNI_ERR("Out of mem, dropping pkts\n"); kni->stats.rx_dropped++; continue; } @@ -370,9 +532,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) if (!kva->next) break; - kva = kva->next - kni->mbuf_va + kni->mbuf_kva; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(va2pa(kva->next, kva)); + data_kva = kva2data_kva(kva); } } @@ -387,10 +548,10 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) } /* enqueue all the mbufs from rx_q into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)&va, num); + ret = kni_fifo_put(kni->free_q, kni->va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into free_q\n"); + pr_err("Fail to enqueue mbufs into free_q\n"); } /* rx interface */ @@ -405,114 +566,18 @@ kni_net_rx(struct kni_dev *kni) } /* - * Transmit a packet (called by the kernel) - */ -#ifdef RTE_KNI_VHOST -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#else -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - int len = 0; - unsigned ret; - struct kni_dev *kni = netdev_priv(dev); - struct rte_kni_mbuf *pkt_kva = NULL; - struct rte_kni_mbuf *pkt_va = NULL; - - /* save the timestamp */ -#ifdef HAVE_TRANS_START_HELPER - netif_trans_update(dev); -#else - dev->trans_start = jiffies; -#endif - - /* Check if the length of skb is less than mbuf size */ - if (skb->len > kni->mbuf_size) - goto drop; - - /** - * Check if it has at least one free entry in tx_q and - * one entry in alloc_q. - */ - if (kni_fifo_free_count(kni->tx_q) == 0 || - kni_fifo_count(kni->alloc_q) == 0) { - /** - * If no free entry in tx_q or no entry in alloc_q, - * drops skb and goes out. - */ - goto drop; - } - - /* dequeue a mbuf from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); - if (likely(ret == 1)) { - void *data_kva; - - pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; - data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va - + kni->mbuf_kva; - - len = skb->len; - memcpy(data_kva, skb->data, len); - if (unlikely(len < ETH_ZLEN)) { - memset(data_kva + len, 0, ETH_ZLEN - len); - len = ETH_ZLEN; - } - pkt_kva->pkt_len = len; - pkt_kva->data_len = len; - - /* enqueue mbuf into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); - if (unlikely(ret != 1)) { - /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbuf into tx_q\n"); - goto drop; - } - } else { - /* Failing should not happen */ - KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); - goto drop; - } - - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_bytes += len; - kni->stats.tx_packets++; - - return NETDEV_TX_OK; - -drop: - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#endif - -/* * Deal with a transmit timeout. */ static void -kni_net_tx_timeout (struct net_device *dev) +kni_net_tx_timeout(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies, - jiffies - dev->trans_start); + pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies, + jiffies - dev_trans_start(dev)); kni->stats.tx_errors++; netif_wake_queue(dev); - return; } /* @@ -521,8 +586,8 @@ kni_net_tx_timeout (struct net_device *dev) static int kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - KNI_DBG("kni_net_ioctl %d\n", - ((struct kni_dev *)netdev_priv(dev))->group_id); + pr_debug("kni_net_ioctl group:%d cmd:%d\n", + ((struct kni_dev *)netdev_priv(dev))->group_id, cmd); return 0; } @@ -539,7 +604,7 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu) struct rte_kni_request req; struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu); + pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu); memset(&req, 0, sizeof(req)); req.req_id = RTE_KNI_REQ_CHANGE_MTU; @@ -562,61 +627,13 @@ kni_net_poll_resp(struct kni_dev *kni) } /* - * It can be called to process the request. - */ -static int -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) -{ - int ret = -1; - void *resp_va; - unsigned num; - int ret_val; - - if (!kni || !req) { - KNI_ERR("No kni instance or request\n"); - return -EINVAL; - } - - mutex_lock(&kni->sync_lock); - - /* Construct data */ - memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); - num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); - if (num < 1) { - KNI_ERR("Cannot send to req_q\n"); - ret = -EBUSY; - goto fail; - } - - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { - ret = -ETIME; - goto fail; - } - num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); - if (num != 1 || resp_va != kni->sync_va) { - /* This should never happen */ - KNI_ERR("No data in resp_q\n"); - ret = -ENODATA; - goto fail; - } - - memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); - ret = 0; - -fail: - mutex_unlock(&kni->sync_lock); - return ret; -} - -/* * Return statistics to the caller */ static struct net_device_stats * kni_net_stats(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); + return &kni->stats; } @@ -626,7 +643,7 @@ kni_net_stats(struct net_device *dev) static int kni_net_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, - const void *saddr, unsigned int len) + const void *saddr, uint32_t len) { struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); @@ -637,7 +654,6 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev, return dev->hard_header_len; } - /* * Re-fill the eth header */ @@ -662,9 +678,11 @@ kni_net_rebuild_header(struct sk_buff *skb) * * Returns 0 on success, negative on failure **/ -static int kni_net_set_mac(struct net_device *netdev, void *p) +static int +kni_net_set_mac(struct net_device *netdev, void *p) { struct sockaddr *addr = p; + if (!is_valid_ether_addr((unsigned char *)(addr->sa_data))) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); @@ -672,7 +690,8 @@ static int kni_net_set_mac(struct net_device *netdev, void *p) } #ifdef HAVE_CHANGE_CARRIER_CB -static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) +static int +kni_net_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) netif_carrier_on(dev); @@ -711,8 +730,6 @@ kni_net_init(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("kni_net_init\n"); - init_waitqueue_head(&kni->wq); mutex_init(&kni->sync_lock); @@ -726,18 +743,18 @@ void kni_net_config_lo_mode(char *lo_str) { if (!lo_str) { - KNI_PRINT("loopback disabled"); + pr_debug("loopback disabled"); return; } if (!strcmp(lo_str, "lo_mode_none")) - KNI_PRINT("loopback disabled"); + pr_debug("loopback disabled"); else if (!strcmp(lo_str, "lo_mode_fifo")) { - KNI_PRINT("loopback mode=lo_mode_fifo enabled"); + pr_debug("loopback mode=lo_mode_fifo enabled"); kni_net_rx_func = kni_net_rx_lo_fifo; } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) { - KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled"); + pr_debug("loopback mode=lo_mode_fifo_skb enabled"); kni_net_rx_func = kni_net_rx_lo_fifo_skb; } else - KNI_PRINT("Incognizant parameter, loopback disabled"); + pr_debug("Incognizant parameter, loopback disabled"); } diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c index a3ca8499..f54c34b1 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c @@ -32,6 +32,7 @@ #include <linux/sched.h> #include <linux/if_tun.h> #include <linux/version.h> +#include <linux/file.h> #include "compat.h" #include "kni_dev.h" @@ -39,21 +40,12 @@ #define RX_BURST_SZ 4 -extern void put_unused_fd(unsigned int fd); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0) -extern struct file* -sock_alloc_file(struct socket *sock, - int flags, const char *dname); - -extern int get_unused_fd_flags(unsigned flags); - -extern void fd_install(unsigned int fd, struct file *file); - +#ifdef HAVE_STATIC_SOCK_MAP_FD static int kni_sock_map_fd(struct socket *sock) { struct file *file; int fd = get_unused_fd_flags(0); + if (fd < 0) return fd; @@ -65,8 +57,6 @@ static int kni_sock_map_fd(struct socket *sock) fd_install(fd, file); return fd; } -#else -#define kni_sock_map_fd(s) sock_map_fd(s, 0) #endif static struct proto kni_raw_proto = { @@ -77,13 +67,13 @@ static struct proto kni_raw_proto = { static inline int kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, - unsigned offset, unsigned len) + uint32_t offset, uint32_t len) { struct rte_kni_mbuf *pkt_kva = NULL; struct rte_kni_mbuf *pkt_va = NULL; int ret; - KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n", + pr_debug("tx offset=%d, len=%d, iovlen=%d\n", #ifdef HAVE_IOV_ITER_MSGHDR offset, len, (int)m->msg_iter.iov->iov_len); #else @@ -110,7 +100,7 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; data_kva = pkt_kva->buf_addr + pkt_kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + - kni->mbuf_va + kni->mbuf_kva; #ifdef HAVE_IOV_ITER_MSGHDR copy_from_iter(data_kva, len, &m->msg_iter); @@ -129,12 +119,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); if (unlikely(ret != 1)) { /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + pr_err("Fail to enqueue mbuf into tx_q\n"); goto drop; } } else { /* Failing should not happen */ - KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + pr_err("Fail to dequeue mbuf from alloc_q\n"); goto drop; } @@ -153,12 +143,12 @@ drop: static inline int kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, - unsigned offset, unsigned len) + uint32_t offset, uint32_t len) { uint32_t pkt_len; struct rte_kni_mbuf *kva; struct rte_kni_mbuf *va; - void * data_kva; + void *data_kva; struct sk_buff *skb; struct kni_vhost_queue *q = kni->vhost_queue; @@ -173,19 +163,19 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, if (unlikely(skb == NULL)) return 0; - kva = (struct rte_kni_mbuf*)skb->data; + kva = (struct rte_kni_mbuf *)skb->data; /* free skb to cache */ skb->data = NULL; - if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1))) + if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1)) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into rx cache fifo\n"); + pr_err("Fail to enqueue entries into rx cache fifo\n"); pkt_len = kva->data_len; if (unlikely(pkt_len > len)) goto drop; - KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", + pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", #ifdef HAVE_IOV_ITER_MSGHDR offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); #else @@ -205,12 +195,12 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, kni->stats.rx_packets++; /* enqueue mbufs into free_q */ - va = (void*)kva - kni->mbuf_kva + kni->mbuf_va; - if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1))) + va = (void *)kva - kni->mbuf_kva + kni->mbuf_va; + if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1)) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into free_q\n"); + pr_err("Fail to enqueue entries into free_q\n"); - KNI_DBG_RX("receive done %d\n", pkt_len); + pr_debug("receive done %d\n", pkt_len); return pkt_len; @@ -221,29 +211,25 @@ drop: return 0; } -static unsigned int -kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) +static uint32_t +kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; - unsigned int mask = 0; + uint32_t mask = 0; if (unlikely(q == NULL || q->kni == NULL)) return POLLERR; kni = q->kni; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) - KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n", +#ifdef HAVE_SOCKET_WQ + pr_debug("start kni_poll on group %d, wq 0x%16llx\n", kni->group_id, (uint64_t)sock->wq); -#else - KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n", - kni->group_id, (uint64_t)&sock->wait); -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) poll_wait(file, &sock->wq->wait, wait); #else + pr_debug("start kni_poll on group %d, wait at 0x%16llx\n", + kni->group_id, (uint64_t)&sock->wait); poll_wait(file, &sock->wait, wait); #endif @@ -252,11 +238,12 @@ kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) if (sock_writeable(&q->sk) || #ifdef SOCKWQ_ASYNC_NOSPACE - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && + sock_writeable(&q->sk))) #else - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && + sock_writeable(&q->sk))) #endif - sock_writeable(&q->sk))) mask |= POLLOUT | POLLWRNORM; return mask; @@ -269,7 +256,7 @@ kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, struct rte_kni_mbuf *kva; kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; - (skb)->data = (unsigned char*)kva; + (skb)->data = (unsigned char *)kva; (skb)->len = kva->data_len; skb_queue_tail(&q->sk.sk_receive_queue, skb); } @@ -279,6 +266,7 @@ kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, struct sk_buff **skb, struct rte_kni_mbuf **va) { int i; + for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) kni_vhost_enqueue(kni, q, *skb, *va); } @@ -287,9 +275,9 @@ int kni_chk_vhost_rx(struct kni_dev *kni) { struct kni_vhost_queue *q = kni->vhost_queue; - unsigned nb_in, nb_mbuf, nb_skb; - const unsigned BURST_MASK = RX_BURST_SZ - 1; - unsigned nb_burst, nb_backlog, i; + uint32_t nb_in, nb_mbuf, nb_skb; + const uint32_t BURST_MASK = RX_BURST_SZ - 1; + uint32_t nb_burst, nb_backlog, i; struct sk_buff *skb[RX_BURST_SZ]; struct rte_kni_mbuf *va[RX_BURST_SZ]; @@ -305,20 +293,18 @@ kni_chk_vhost_rx(struct kni_dev *kni) nb_mbuf = kni_fifo_count(kni->rx_q); nb_in = min(nb_mbuf, nb_skb); - nb_in = min(nb_in, (unsigned)RX_BURST_SZ); + nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ); nb_burst = (nb_in & ~BURST_MASK); nb_backlog = (nb_in & BURST_MASK); /* enqueue skb_queue per BURST_SIZE bulk */ - if (0 != nb_burst) { - if (unlikely(RX_BURST_SZ != kni_fifo_get( - kni->rx_q, (void **)&va, - RX_BURST_SZ))) + if (nb_burst != 0) { + if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ) + != RX_BURST_SZ)) goto except; - if (unlikely(RX_BURST_SZ != kni_fifo_get( - q->fifo, (void **)&skb, - RX_BURST_SZ))) + if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ) + != RX_BURST_SZ)) goto except; kni_vhost_enqueue_burst(kni, q, skb, va); @@ -326,12 +312,10 @@ kni_chk_vhost_rx(struct kni_dev *kni) /* all leftover, do one by one */ for (i = 0; i < nb_backlog; ++i) { - if (unlikely(1 != kni_fifo_get( - kni->rx_q,(void **)&va, 1))) + if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1)) goto except; - if (unlikely(1 != kni_fifo_get( - q->fifo, (void **)&skb, 1))) + if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1)) goto except; kni_vhost_enqueue(kni, q, *skb, *va); @@ -342,7 +326,7 @@ kni_chk_vhost_rx(struct kni_dev *kni) ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); - KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", + pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", nb_mbuf, nb_skb, nb_in); } @@ -350,7 +334,7 @@ kni_chk_vhost_rx(struct kni_dev *kni) except: /* Failing should not happen */ - KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n"); + pr_err("Fail to enqueue fifo, it shouldn't happen\n"); BUG_ON(1); return 0; @@ -373,7 +357,7 @@ kni_sock_sndmsg(struct socket *sock, if (unlikely(q == NULL || q->kni == NULL)) return 0; - KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", + pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", #ifdef HAVE_IOV_ITER_MSGHDR len, q->flags, (int)m->msg_iter.iov->iov_len); #else @@ -420,13 +404,14 @@ kni_sock_rcvmsg(struct socket *sock, #ifdef RTE_KNI_VHOST_VNET_HDR_EN if (likely(q->flags & IFF_VNET_HDR)) { vnet_hdr_len = q->vnet_hdr_sz; - if ((len -= vnet_hdr_len) < 0) + len -= vnet_hdr_len; + if (len < 0) return -EINVAL; } #endif - if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni, - m, vnet_hdr_len, len)))) + pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len); + if (unlikely(pkt_len == 0)) return 0; #ifdef RTE_KNI_VHOST_VNET_HDR_EN @@ -440,7 +425,7 @@ kni_sock_rcvmsg(struct socket *sock, #endif /* HAVE_IOV_ITER_MSGHDR */ return -EFAULT; #endif /* RTE_KNI_VHOST_VNET_HDR_EN */ - KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", + pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", (unsigned long)len, q->flags, pkt_len); return pkt_len + vnet_hdr_len; @@ -448,25 +433,24 @@ kni_sock_rcvmsg(struct socket *sock, /* dummy tap like ioctl */ static int -kni_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) +kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; - unsigned int __user *up = argp; + uint32_t __user *up = argp; struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; - unsigned int u; + uint32_t u; int __user *sp = argp; int s; int ret; - KNI_DBG("tap ioctl cmd 0x%08x\n", cmd); + pr_debug("tap ioctl cmd 0x%08x\n", cmd); switch (cmd) { case TUNSETIFF: - KNI_DBG("TUNSETIFF\n"); + pr_debug("TUNSETIFF\n"); /* ignore the name, just look at flags */ if (get_user(u, &ifr->ifr_flags)) return -EFAULT; @@ -480,7 +464,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return ret; case TUNGETIFF: - KNI_DBG("TUNGETIFF\n"); + pr_debug("TUNGETIFF\n"); rcu_read_lock_bh(); kni = rcu_dereference_bh(q->kni); if (kni) @@ -491,14 +475,14 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return -ENOLINK; ret = 0; - if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) || - put_user(q->flags, &ifr->ifr_flags)) + if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) + || put_user(q->flags, &ifr->ifr_flags)) ret = -EFAULT; dev_put(kni->net_dev); return ret; case TUNGETFEATURES: - KNI_DBG("TUNGETFEATURES\n"); + pr_debug("TUNGETFEATURES\n"); u = IFF_TAP | IFF_NO_PI; #ifdef RTE_KNI_VHOST_VNET_HDR_EN u |= IFF_VNET_HDR; @@ -508,7 +492,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return 0; case TUNSETSNDBUF: - KNI_DBG("TUNSETSNDBUF\n"); + pr_debug("TUNSETSNDBUF\n"); if (get_user(u, up)) return -EFAULT; @@ -519,7 +503,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, s = q->vnet_hdr_sz; if (put_user(s, sp)) return -EFAULT; - KNI_DBG("TUNGETVNETHDRSZ %d\n", s); + pr_debug("TUNGETVNETHDRSZ %d\n", s); return 0; case TUNSETVNETHDRSZ: @@ -528,12 +512,12 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, if (s < (int)sizeof(struct virtio_net_hdr)) return -EINVAL; - KNI_DBG("TUNSETVNETHDRSZ %d\n", s); + pr_debug("TUNSETVNETHDRSZ %d\n", s); q->vnet_hdr_sz = s; return 0; case TUNSETOFFLOAD: - KNI_DBG("TUNSETOFFLOAD %lx\n", arg); + pr_debug("TUNSETOFFLOAD %lx\n", arg); #ifdef RTE_KNI_VHOST_VNET_HDR_EN /* not support any offload yet */ if (!(q->flags & IFF_VNET_HDR)) @@ -545,26 +529,26 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, #endif default: - KNI_DBG("NOT SUPPORT\n"); + pr_debug("NOT SUPPORT\n"); return -EINVAL; } } static int -kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd, +kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) { /* 32 bits app on 64 bits OS to be supported later */ - KNI_PRINT("Not implemented.\n"); + pr_debug("Not implemented.\n"); return -EINVAL; } #define KNI_VHOST_WAIT_WQ_SAFE() \ -do { \ +do { \ while ((BE_FINISH | BE_STOP) == kni->vq_status) \ - msleep(1); \ -}while(0) \ + msleep(1); \ +} while (0) \ static int @@ -577,7 +561,8 @@ kni_sock_release(struct socket *sock) if (q == NULL) return 0; - if (NULL != (kni = q->kni)) { + kni = q->kni; + if (kni != NULL) { kni->vq_status = BE_STOP; KNI_VHOST_WAIT_WQ_SAFE(); kni->vhost_queue = NULL; @@ -592,18 +577,17 @@ kni_sock_release(struct socket *sock) sock_put(&q->sk); - KNI_DBG("dummy sock release done\n"); + pr_debug("dummy sock release done\n"); return 0; } int -kni_sock_getname (struct socket *sock, - struct sockaddr *addr, - int *sockaddr_len, int peer) +kni_sock_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) { - KNI_DBG("dummy sock getname\n"); - ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET; + pr_debug("dummy sock getname\n"); + ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET; return 0; } @@ -646,7 +630,7 @@ kni_sk_destruct(struct sock *sk) /* make sure there's no packet in buffer */ while (skb_dequeue(&sk->sk_receive_queue) != NULL) - ; + ; mb(); @@ -673,7 +657,7 @@ kni_vhost_backend_init(struct kni_dev *kni) if (kni->vhost_queue != NULL) return -1; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#ifdef HAVE_SK_ALLOC_KERN_PARAM q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &kni_raw_proto, 0); #else @@ -694,8 +678,9 @@ kni_vhost_backend_init(struct kni_dev *kni) } /* cache init */ - q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), - GFP_KERNEL); + q->cache = kzalloc( + RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), + GFP_KERNEL); if (!q->cache) goto free_fd; @@ -708,7 +693,7 @@ kni_vhost_backend_init(struct kni_dev *kni) for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { elem = &q->cache[i]; - kni_fifo_put(fifo, (void**)&elem, 1); + kni_fifo_put(fifo, (void **)&elem, 1); } q->fifo = fifo; @@ -738,14 +723,12 @@ kni_vhost_backend_init(struct kni_dev *kni) kni->vq_status = BE_START; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) - KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx," - "sk->sk_wq=0x%16llx", +#ifdef HAVE_SOCKET_WQ + pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx", q->sockfd, (uint64_t)q->sock->wq, (uint64_t)q->sk.sk_wq); #else - KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx," - "sk->sk_sleep=0x%16llx", + pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx", q->sockfd, (uint64_t)&q->sock->wait, (uint64_t)q->sk.sk_sleep); #endif @@ -768,7 +751,7 @@ free_sock: q->sock = NULL; free_sk: - sk_free((struct sock*)q); + sk_free((struct sock *)q); return err; } @@ -781,6 +764,7 @@ show_sock_fd(struct device *dev, struct device_attribute *attr, struct net_device *net_dev = container_of(dev, struct net_device, dev); struct kni_dev *kni = netdev_priv(net_dev); int sockfd = -1; + if (kni->vhost_queue != NULL) sockfd = kni->vhost_queue->sockfd; return snprintf(buf, 10, "%d\n", sockfd); @@ -792,6 +776,7 @@ show_sock_en(struct device *dev, struct device_attribute *attr, { struct net_device *net_dev = container_of(dev, struct net_device, dev); struct kni_dev *kni = netdev_priv(net_dev); + return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); } @@ -804,7 +789,7 @@ set_sock_en(struct device *dev, struct device_attribute *attr, unsigned long en; int err = 0; - if (0 != kstrtoul(buf, 0, &en)) + if (kstrtoul(buf, 0, &en) != 0) return -EINVAL; if (en) @@ -818,7 +803,7 @@ static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); static struct attribute *dev_attrs[] = { &dev_attr_sock_fd.attr, &dev_attr_sock_en.attr, - NULL, + NULL, }; static const struct attribute_group dev_attr_grp = { @@ -836,7 +821,7 @@ kni_vhost_backend_release(struct kni_dev *kni) /* dettach from kni */ q->kni = NULL; - KNI_DBG("release backend done\n"); + pr_debug("release backend done\n"); return 0; } @@ -851,7 +836,7 @@ kni_vhost_init(struct kni_dev *kni) kni->vq_status = BE_STOP; - KNI_DBG("kni_vhost_init done\n"); + pr_debug("kni_vhost_init done\n"); return 0; } diff --git a/src/dpdk/lib/librte_ether/rte_dev_info.h b/src/dpdk/lib/librte_ether/rte_dev_info.h index 574683d3..aab6d1a6 100644 --- a/src/dpdk/lib/librte_ether/rte_dev_info.h +++ b/src/dpdk/lib/librte_ether/rte_dev_info.h @@ -34,6 +34,8 @@ #ifndef _RTE_DEV_INFO_H_ #define _RTE_DEV_INFO_H_ +#include <stdint.h> + /* * Placeholder for accessing device registers */ diff --git a/src/dpdk/lib/librte_ether/rte_eth_ctrl.h b/src/dpdk/lib/librte_ether/rte_eth_ctrl.h index 563e80f8..83869042 100644 --- a/src/dpdk/lib/librte_ether/rte_eth_ctrl.h +++ b/src/dpdk/lib/librte_ether/rte_eth_ctrl.h @@ -34,6 +34,10 @@ #ifndef _RTE_ETH_CTRL_H_ #define _RTE_ETH_CTRL_H_ +#include <stdint.h> +#include <rte_common.h> +#include "rte_ether.h" + /** * @file * @@ -95,6 +99,7 @@ enum rte_filter_type { RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_HASH, RTE_ETH_FILTER_L2_TUNNEL, + RTE_ETH_FILTER_GENERIC, RTE_ETH_FILTER_MAX }; @@ -420,8 +425,6 @@ struct rte_eth_l2_flow { struct rte_eth_ipv4_flow { uint32_t src_ip; /**< IPv4 source address in big endian. */ uint32_t dst_ip; /**< IPv4 destination address in big endian. */ - // TREX_PATCH (ip_id) - uint16_t ip_id; /**< IPv4 IP ID to match */ uint8_t tos; /**< Type of service to match. */ uint8_t ttl; /**< Time to live to match. */ uint8_t proto; /**< Protocol, next header in big endian. */ @@ -464,8 +467,6 @@ struct rte_eth_ipv6_flow { uint8_t tc; /**< Traffic class to match. */ uint8_t proto; /**< Protocol, next header to match. */ uint8_t hop_limits; /**< Hop limits to match. */ - // TREX_PATCH (flow_label) - uint32_t flow_label; /**<flow label to match. */ }; /** @@ -594,9 +595,6 @@ struct rte_eth_fdir_action { /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported flex bytes start from in flexible payload. */ - // TREX_PATCH - // Index for statistics counter that will count FDIR matches. - uint16_t stat_count_index; }; /** diff --git a/src/dpdk/lib/librte_ether/rte_ethdev.c b/src/dpdk/lib/librte_ether/rte_ethdev.c index e7bc9d6d..eb0a94a9 100644 --- a/src/dpdk/lib/librte_ether/rte_ethdev.c +++ b/src/dpdk/lib/librte_ether/rte_ethdev.c @@ -58,7 +58,6 @@ #include <rte_atomic.h> #include <rte_branch_prediction.h> #include <rte_common.h> -#include <rte_ring.h> #include <rte_mempool.h> #include <rte_malloc.h> #include <rte_mbuf.h> @@ -72,6 +71,7 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; static struct rte_eth_dev_data *rte_eth_dev_data; +static uint8_t eth_dev_last_created_port; static uint8_t nb_ports; /* spinlock for eth device callbacks */ @@ -189,8 +189,23 @@ rte_eth_dev_find_free_port(void) return RTE_MAX_ETHPORTS; } +static struct rte_eth_dev * +eth_dev_get(uint8_t port_id) +{ + struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; + + eth_dev->data = &rte_eth_dev_data[port_id]; + eth_dev->attached = DEV_ATTACHED; + TAILQ_INIT(&(eth_dev->link_intr_cbs)); + + eth_dev_last_created_port = port_id; + nb_ports++; + + return eth_dev; +} + struct rte_eth_dev * -rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) +rte_eth_dev_allocate(const char *name) { uint8_t port_id; struct rte_eth_dev *eth_dev; @@ -210,28 +225,44 @@ rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) return NULL; } - eth_dev = &rte_eth_devices[port_id]; - eth_dev->data = &rte_eth_dev_data[port_id]; + memset(&rte_eth_dev_data[port_id], 0, sizeof(struct rte_eth_dev_data)); + eth_dev = eth_dev_get(port_id); snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); eth_dev->data->port_id = port_id; - eth_dev->attached = DEV_ATTACHED; - eth_dev->dev_type = type; - nb_ports++; + eth_dev->data->mtu = ETHER_MTU; + return eth_dev; } -static int -rte_eth_dev_create_unique_device_name(char *name, size_t size, - struct rte_pci_device *pci_dev) +/* + * Attach to a port already registered by the primary process, which + * makes sure that the same device would have the same port id both + * in the primary and secondary process. + */ +static struct rte_eth_dev * +eth_dev_attach_secondary(const char *name) { - int ret; + uint8_t i; + struct rte_eth_dev *eth_dev; - ret = snprintf(name, size, "%d:%d.%d", - pci_dev->addr.bus, pci_dev->addr.devid, - pci_dev->addr.function); - if (ret < 0) - return ret; - return 0; + if (rte_eth_dev_data == NULL) + rte_eth_dev_data_alloc(); + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (strcmp(rte_eth_dev_data[i].name, name) == 0) + break; + } + if (i == RTE_MAX_ETHPORTS) { + RTE_PMD_DEBUG_TRACE( + "device %s is not driven by the primary process\n", + name); + return NULL; + } + + eth_dev = eth_dev_get(i); + RTE_ASSERT(eth_dev->data->port_id == i); + + return eth_dev; } int @@ -245,9 +276,9 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) return 0; } -static int -rte_eth_dev_init(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) +int +rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) { struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; @@ -257,40 +288,43 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, eth_drv = (struct eth_driver *)pci_drv; - /* Create unique Ethernet device name using PCI address */ - rte_eth_dev_create_unique_device_name(ethdev_name, - sizeof(ethdev_name), pci_dev); - - eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI); - if (eth_dev == NULL) - return -ENOMEM; + rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, + sizeof(ethdev_name)); if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(ethdev_name); + if (eth_dev == NULL) + return -ENOMEM; + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", eth_drv->dev_private_size, RTE_CACHE_LINE_SIZE); if (eth_dev->data->dev_private == NULL) rte_panic("Cannot allocate memzone for private port data\n"); + } else { + eth_dev = eth_dev_attach_secondary(ethdev_name); + if (eth_dev == NULL) { + /* + * if we failed to attach a device, it means the + * device is skipped in primary process, due to + * some errors. If so, we return a positive value, + * to let EAL skip it for the secondary process + * as well. + */ + return 1; + } } - eth_dev->pci_dev = pci_dev; + eth_dev->device = &pci_dev->device; + eth_dev->intr_handle = &pci_dev->intr_handle; eth_dev->driver = eth_drv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - - /* init user callbacks */ - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - - /* - * Set the default MTU. - */ - eth_dev->data->mtu = ETHER_MTU; /* Invoke PMD device initialization function */ diag = (*eth_drv->eth_dev_init)(eth_dev); if (diag == 0) return 0; - RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x) failed\n", - pci_drv->name, + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n", + pci_drv->driver.name, (unsigned) pci_dev->id.vendor_id, (unsigned) pci_dev->id.device_id); if (rte_eal_process_type() == RTE_PROC_PRIMARY) @@ -299,8 +333,8 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, return diag; } -static int -rte_eth_dev_uninit(struct rte_pci_device *pci_dev) +int +rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev) { const struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; @@ -310,9 +344,8 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) if (pci_dev == NULL) return -EINVAL; - /* Create unique Ethernet device name using PCI address */ - rte_eth_dev_create_unique_device_name(ethdev_name, - sizeof(ethdev_name), pci_dev); + rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, + sizeof(ethdev_name)); eth_dev = rte_eth_dev_allocated(ethdev_name); if (eth_dev == NULL) @@ -333,35 +366,13 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(eth_dev->data->dev_private); - eth_dev->pci_dev = NULL; + eth_dev->device = NULL; eth_dev->driver = NULL; eth_dev->data = NULL; return 0; } -/** - * Register an Ethernet [Poll Mode] driver. - * - * Function invoked by the initialization function of an Ethernet driver - * to simultaneously register itself as a PCI driver and as an Ethernet - * Poll Mode Driver. - * Invokes the rte_eal_pci_register() function to register the *pci_drv* - * structure embedded in the *eth_drv* structure, after having stored the - * address of the rte_eth_dev_init() function in the *devinit* field of - * the *pci_drv* structure. - * During the PCI probing phase, the rte_eth_dev_init() function is - * invoked for each PCI [Ethernet device] matching the embedded PCI - * identifiers provided by the driver. - */ -void -rte_eth_driver_register(struct eth_driver *eth_drv) -{ - eth_drv->pci_drv.devinit = rte_eth_dev_init; - eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; - rte_eal_pci_register(ð_drv->pci_drv); -} - int rte_eth_dev_is_valid_port(uint8_t port_id) { @@ -385,27 +396,6 @@ rte_eth_dev_count(void) return nb_ports; } -static enum rte_eth_dev_type -rte_eth_dev_get_device_type(uint8_t port_id) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN); - return rte_eth_devices[port_id].dev_type; -} - -static int -rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - if (addr == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - *addr = rte_eth_devices[port_id].pci_dev->addr; - return 0; -} - int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) { @@ -435,6 +425,9 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -EINVAL; } + if (!nb_ports) + return -ENODEV; + *port_id = RTE_MAX_ETHPORTS; for (i = 0; i < RTE_MAX_ETHPORTS; i++) { @@ -450,35 +443,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -ENODEV; } -/* TREX_PATCH removed "static" */ -int -rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id) -{ - int i; - struct rte_pci_device *pci_dev = NULL; - - if (addr == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - *port_id = RTE_MAX_ETHPORTS; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - - pci_dev = rte_eth_devices[i].pci_dev; - - if (pci_dev && - !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) { - - *port_id = i; - - return 0; - } - } - return -ENODEV; -} - static int rte_eth_dev_is_detachable(uint8_t port_id) { @@ -504,127 +468,49 @@ rte_eth_dev_is_detachable(uint8_t port_id) return 1; } -/* attach the new physical device, then store port_id of the device */ -static int -rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id) +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint8_t *port_id) { - /* re-construct pci_device_list */ - if (rte_eal_pci_scan()) - goto err; - /* Invoke probe func of the driver can handle the new device. */ - if (rte_eal_pci_probe_one(addr)) - goto err; + int ret = -1; + int current = rte_eth_dev_count(); + char *name = NULL; + char *args = NULL; - if (rte_eth_dev_get_port_by_addr(addr, port_id)) + if ((devargs == NULL) || (port_id == NULL)) { + ret = -EINVAL; goto err; + } - return 0; -err: - return -1; -} - -/* detach the new physical device, then store pci_addr of the device */ -static int -rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr) -{ - struct rte_pci_addr freed_addr; - struct rte_pci_addr vp; - - /* get pci address by port id */ - if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr)) + /* parse devargs, then retrieve device name and args */ + if (rte_eal_parse_devargs_str(devargs, &name, &args)) goto err; - /* Zeroed pci addr means the port comes from virtual device */ - vp.domain = vp.bus = vp.devid = vp.function = 0; - if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0) + ret = rte_eal_dev_attach(name, args); + if (ret < 0) goto err; - /* invoke devuninit func of the pci driver, - * also remove the device from pci_device_list */ - if (rte_eal_pci_detach(&freed_addr)) + /* no point looking at the port count if no port exists */ + if (!rte_eth_dev_count()) { + RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name); + ret = -1; goto err; + } - *addr = freed_addr; - return 0; -err: - return -1; -} - -/* attach the new virtual device, then store port_id of the device */ -static int -rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id) -{ - char *name = NULL, *args = NULL; - int ret = -1; - - /* parse vdevargs, then retrieve device name and args */ - if (rte_eal_parse_devargs_str(vdevargs, &name, &args)) - goto end; - - /* walk around dev_driver_list to find the driver of the device, - * then invoke probe function of the driver. - * rte_eal_vdev_init() updates port_id allocated after - * initialization. + /* if nothing happened, there is a bug here, since some driver told us + * it did attach a device, but did not create a port. */ - if (rte_eal_vdev_init(name, args)) - goto end; - - if (rte_eth_dev_get_port_by_name(name, port_id)) - goto end; - - ret = 0; -end: - free(name); - free(args); - - return ret; -} - -/* detach the new virtual device, then store the name of the device */ -static int -rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname) -{ - char name[RTE_ETH_NAME_MAX_LEN]; - - /* get device name by port id */ - if (rte_eth_dev_get_name_by_port(port_id, name)) - goto err; - /* walk around dev_driver_list to find the driver of the device, - * then invoke uninit function of the driver */ - if (rte_eal_vdev_uninit(name)) - goto err; - - strncpy(vdevname, name, sizeof(name)); - return 0; -err: - return -1; -} - -/* attach the new device, then store port_id of the device */ -int -rte_eth_dev_attach(const char *devargs, uint8_t *port_id) -{ - struct rte_pci_addr addr; - int ret = -1; - - if ((devargs == NULL) || (port_id == NULL)) { - ret = -EINVAL; + if (current == rte_eth_dev_count()) { + ret = -1; goto err; } - if (eal_parse_pci_DomBDF(devargs, &addr) == 0) { - ret = rte_eth_dev_attach_pdev(&addr, port_id); - if (ret < 0) - goto err; - } else { - ret = rte_eth_dev_attach_vdev(devargs, port_id); - if (ret < 0) - goto err; - } + *port_id = eth_dev_last_created_port; + ret = 0; - return 0; err: - RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n"); + free(name); + free(args); return ret; } @@ -632,7 +518,6 @@ err: int rte_eth_dev_detach(uint8_t port_id, char *name) { - struct rte_pci_addr addr; int ret = -1; if (name == NULL) { @@ -640,33 +525,19 @@ rte_eth_dev_detach(uint8_t port_id, char *name) goto err; } - /* check whether the driver supports detach feature, or not */ + /* FIXME: move this to eal, once device flags are relocated there */ if (rte_eth_dev_is_detachable(port_id)) goto err; - if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) { - ret = rte_eth_dev_get_addr_by_port(port_id, &addr); - if (ret < 0) - goto err; - - ret = rte_eth_dev_detach_pdev(port_id, &addr); - if (ret < 0) - goto err; - - snprintf(name, RTE_ETH_NAME_MAX_LEN, - "%04x:%02x:%02x.%d", - addr.domain, addr.bus, - addr.devid, addr.function); - } else { - ret = rte_eth_dev_detach_vdev(port_id, name); - if (ret < 0) - goto err; - } + snprintf(name, sizeof(rte_eth_devices[port_id].data->name), + "%s", rte_eth_devices[port_id].data->name); + ret = rte_eal_dev_detach(name); + if (ret < 0) + goto err; return 0; err: - RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n"); return ret; } @@ -712,6 +583,9 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->rx_queue_release)(rxq[i]); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; } dev->data->nb_rx_queues = nb_queues; return 0; @@ -863,6 +737,9 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->tx_queue_release)(txq[i]); + + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; } dev->data->nb_tx_queues = nb_queues; return 0; @@ -1033,39 +910,61 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return 0; } +void +_rte_eth_dev_reset(struct rte_eth_dev *dev) +{ + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow reset\n", + dev->data->port_id); + return; + } + + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + + memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf)); +} + static void rte_eth_dev_config_restore(uint8_t port_id) { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; - struct ether_addr addr; + struct ether_addr *addr; uint16_t i; uint32_t pool = 0; + uint64_t pool_mask; dev = &rte_eth_devices[port_id]; rte_eth_dev_info_get(port_id, &dev_info); - if (RTE_ETH_DEV_SRIOV(dev).active) - pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx; - - /* replay MAC address configuration */ - for (i = 0; i < dev_info.max_mac_addrs; i++) { - addr = dev->data->mac_addrs[i]; - - /* skip zero address */ - if (is_zero_ether_addr(&addr)) - continue; - - /* add address to the hardware */ - if (*dev->dev_ops->mac_addr_add && - (dev->data->mac_pool_sel[i] & (1ULL << pool))) - (*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool); - else { - RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n", - port_id); - /* exit the loop but not return an error */ - break; + /* replay MAC address configuration including default MAC */ + addr = &dev->data->mac_addrs[0]; + if (*dev->dev_ops->mac_addr_set != NULL) + (*dev->dev_ops->mac_addr_set)(dev, addr); + else if (*dev->dev_ops->mac_addr_add != NULL) + (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool); + + if (*dev->dev_ops->mac_addr_add != NULL) { + for (i = 1; i < dev_info.max_mac_addrs; i++) { + addr = &dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(addr)) + continue; + + pool = 0; + pool_mask = dev->data->mac_pool_sel[i]; + + do { + if (pool_mask & 1ULL) + (*dev->dev_ops->mac_addr_add)(dev, + addr, i, pool); + pool_mask >>= 1; + pool++; + } while (pool_mask); } } @@ -1191,6 +1090,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, uint32_t mbp_buf_size; struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + void **rxq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1249,6 +1149,14 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, return -EINVAL; } + rxq = dev->data->rx_queues; + if (rxq[rx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, + -ENOTSUP); + (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]); + rxq[rx_queue_id] = NULL; + } + if (rx_conf == NULL) rx_conf = &dev_info.default_rxconf; @@ -1270,6 +1178,7 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + void **txq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1302,6 +1211,14 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, return -EINVAL; } + txq = dev->data->tx_queues; + if (txq[tx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, + -ENOTSUP); + (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]); + txq[tx_queue_id] = NULL; + } + if (tx_conf == NULL) tx_conf = &dev_info.default_txconf; @@ -1480,54 +1397,6 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link) } } -// TREX_PATCH -// return in stats, statistics starting from start, for len counters. -int -rte_eth_fdir_stats_get(uint8_t port_id, uint32_t *stats, uint32_t start, uint32_t len) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - // Only xl710 support this - i40e_trex_fdir_stats_get(dev, stats, start, len); - - return 0; -} - -// TREX_PATCH -// zero statistics counters, starting from start, for len counters. -int -rte_eth_fdir_stats_reset(uint8_t port_id, uint32_t *stats, uint32_t start, uint32_t len) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - // Only xl710 support this - i40e_trex_fdir_stats_reset(dev, stats, start, len); - - return 0; -} - -// TREX_PATCH -int -rte_eth_get_fw_ver(int port_id, uint32_t *version) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - // Only xl710 support this - return i40e_trex_get_fw_ver(dev, version); -} - int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats) { @@ -1572,8 +1441,10 @@ get_xstats_count(uint8_t port_id) } else count = 0; count += RTE_NB_STATS; - count += dev->data->nb_rx_queues * RTE_NB_RXQ_STATS; - count += dev->data->nb_tx_queues * RTE_NB_TXQ_STATS; + count += RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * + RTE_NB_RXQ_STATS; + count += RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * + RTE_NB_TXQ_STATS; return count; } @@ -1587,6 +1458,7 @@ rte_eth_xstats_get_names(uint8_t port_id, int cnt_expected_entries; int cnt_driver_entries; uint32_t idx, id_queue; + uint16_t num_q; cnt_expected_entries = get_xstats_count(port_id); if (xstats_names == NULL || cnt_expected_entries < 0 || @@ -1603,7 +1475,8 @@ rte_eth_xstats_get_names(uint8_t port_id, "%s", rte_stats_strings[idx].name); cnt_used_entries++; } - for (id_queue = 0; id_queue < dev->data->nb_rx_queues; id_queue++) { + num_q = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { snprintf(xstats_names[cnt_used_entries].name, sizeof(xstats_names[0].name), @@ -1613,7 +1486,8 @@ rte_eth_xstats_get_names(uint8_t port_id, } } - for (id_queue = 0; id_queue < dev->data->nb_tx_queues; id_queue++) { + num_q = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { snprintf(xstats_names[cnt_used_entries].name, sizeof(xstats_names[0].name), @@ -1649,14 +1523,18 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, unsigned count = 0, i, q; signed xcount = 0; uint64_t val, *stats_ptr; + uint16_t nb_rxqs, nb_txqs; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + /* Return generic statistics */ - count = RTE_NB_STATS + (dev->data->nb_rx_queues * RTE_NB_RXQ_STATS) + - (dev->data->nb_tx_queues * RTE_NB_TXQ_STATS); + count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + + (nb_txqs * RTE_NB_TXQ_STATS); /* implemented by the driver */ if (dev->dev_ops->xstats_get != NULL) { @@ -1687,7 +1565,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, } /* per-rxq stats */ - for (q = 0; q < dev->data->nb_rx_queues; q++) { + for (q = 0; q < nb_rxqs; q++) { for (i = 0; i < RTE_NB_RXQ_STATS; i++) { stats_ptr = RTE_PTR_ADD(ð_stats, rte_rxq_stats_strings[i].offset + @@ -1698,7 +1576,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, } /* per-txq stats */ - for (q = 0; q < dev->data->nb_tx_queues; q++) { + for (q = 0; q < nb_txqs; q++) { for (i = 0; i < RTE_NB_TXQ_STATS; i++) { stats_ptr = RTE_PTR_ADD(ð_stats, rte_txq_stats_strings[i].offset + @@ -1708,8 +1586,11 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, } } - for (i = 0; i < count + xcount; i++) + for (i = 0; i < count; i++) xstats[i].id = i; + /* add an offset to driver-specific stats */ + for ( ; i < count + xcount; i++) + xstats[i].id += count; return count + xcount; } @@ -1766,6 +1647,18 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, STAT_QMAP_RX); } +int +rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP); + return (*dev->dev_ops->fw_version_get)(dev, fw_version, fw_size); +} + void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) { @@ -1785,7 +1678,6 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->pci_dev = dev->pci_dev; dev_info->driver_name = dev->data->drv_name; dev_info->nb_rx_queues = dev->data->nb_rx_queues; dev_info->nb_tx_queues = dev->data->nb_tx_queues; @@ -2354,32 +2246,6 @@ rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) return 0; } -int -rte_eth_dev_set_vf_rxmode(uint8_t port_id, uint16_t vf, - uint16_t rx_mode, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf); - return -EINVAL; - } - - if (rx_mode == 0) { - RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n"); - return -EINVAL; - } - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP); - return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on); -} /* * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find @@ -2469,76 +2335,6 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) return (*dev->dev_ops->uc_all_hash_table_set)(dev, on); } -int -rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP); - return (*dev->dev_ops->set_vf_rx)(dev, vf, on); -} - -int -rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP); - return (*dev->dev_ops->set_vf_tx)(dev, vf, on); -} - -int -rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id, - uint64_t vf_mask, uint8_t vlan_on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - if (vlan_id > ETHER_MAX_VLAN_ID) { - RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n", - vlan_id); - return -EINVAL; - } - - if (vf_mask == 0) { - RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n"); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP); - return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id, - vf_mask, vlan_on); -} - int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate) { @@ -2569,45 +2365,12 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate); } -int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate, - uint64_t q_msk) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_link link; - - if (q_msk == 0) - return 0; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - link = dev->data->dev_link; - - if (vf > dev_info.max_vfs) { - RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: " - "invalid vf id=%d\n", port_id, vf); - return -EINVAL; - } - - if (tx_rate > link.link_speed) { - RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, " - "bigger than link speed= %d\n", - tx_rate, link.link_speed); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP); - return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk); -} - int rte_eth_mirror_rule_set(uint8_t port_id, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); if (mirror_conf->rule_type == 0) { @@ -2643,7 +2406,7 @@ rte_eth_mirror_rule_set(uint8_t port_id, int rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -2678,14 +2441,15 @@ rte_eth_dev_callback_register(uint8_t port_id, } /* create a new callback. */ - if (user_cb == NULL) + if (user_cb == NULL) { user_cb = rte_zmalloc("INTR_USER_CALLBACK", sizeof(struct rte_eth_dev_callback), 0); - if (user_cb != NULL) { - user_cb->cb_fn = cb_fn; - user_cb->cb_arg = cb_arg; - user_cb->event = event; - TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + } } rte_spinlock_unlock(&rte_eth_dev_cb_lock); @@ -2737,7 +2501,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id, void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event) + enum rte_eth_event_type event, void *cb_arg) { struct rte_eth_dev_callback *cb_lst; struct rte_eth_dev_callback dev_cb; @@ -2748,6 +2512,9 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev, continue; dev_cb = *cb_lst; cb_lst->active = 1; + if (cb_arg != NULL) + dev_cb.cb_arg = (void *) cb_arg; + rte_spinlock_unlock(&rte_eth_dev_cb_lock); dev_cb.cb_fn(dev->data->port_id, dev_cb.event, dev_cb.cb_arg); @@ -2769,7 +2536,13 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; - intr_handle = &dev->pci_dev->intr_handle; + + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; if (!intr_handle->intr_vec) { RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); return -EPERM; @@ -2797,7 +2570,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, + dev->data->drv_name, ring_name, dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); @@ -2829,7 +2602,12 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, return -EINVAL; } - intr_handle = &dev->pci_dev->intr_handle; + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; if (!intr_handle->intr_vec) { RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); return -EPERM; @@ -3431,15 +3209,15 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_de return; } + eth_dev->intr_handle = &pci_dev->intr_handle; + eth_dev->data->dev_flags = 0; if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE) - eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; eth_dev->data->kdrv = pci_dev->kdrv; - eth_dev->data->numa_node = pci_dev->numa_node; - eth_dev->data->drv_name = pci_dev->driver->name; + eth_dev->data->numa_node = pci_dev->device.numa_node; + eth_dev->data->drv_name = pci_dev->driver->driver.name; } int diff --git a/src/dpdk/lib/librte_ether/rte_ethdev.h b/src/dpdk/lib/librte_ether/rte_ethdev.h index 5339d3be..c17bbda8 100644 --- a/src/dpdk/lib/librte_ether/rte_ethdev.h +++ b/src/dpdk/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> +#include <rte_errno.h> #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -190,6 +191,9 @@ struct rte_mbuf; /** * A structure used to retrieve statistics for an Ethernet port. + * Not all statistics fields in struct rte_eth_stats are supported + * by any type of network interface card (NIC). If any statistics + * field is not supported, its value is 0. */ struct rte_eth_stats { uint64_t ipackets; /**< Total number of successfully received packets. */ @@ -198,7 +202,7 @@ struct rte_eth_stats { uint64_t obytes; /**< Total number of successfully transmitted bytes. */ uint64_t imissed; /**< Total of RX packets dropped by the HW, - * because there are no available mbufs (i.e. RX queues are full). + * because there are no available buffer (i.e. RX queues are full). */ uint64_t ierrors; /**< Total number of erroneous received packets. */ uint64_t oerrors; /**< Total number of failed transmitted packets. */ @@ -255,6 +259,7 @@ struct rte_eth_stats { /** * A structure used to retrieve link-level information of an Ethernet port. */ +__extension__ struct rte_eth_link { uint32_t link_speed; /**< ETH_SPEED_NUM_ */ uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ @@ -346,6 +351,7 @@ struct rte_eth_rxmode { enum rte_eth_rx_mq_mode mq_mode; uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + __extension__ uint16_t header_split : 1, /**< Header Split enable. */ hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ hw_vlan_filter : 1, /**< VLAN filter enable. */ @@ -645,6 +651,7 @@ struct rte_eth_txmode { /* For i40e specifically */ uint16_t pvid; + __extension__ uint8_t hw_vlan_reject_tagged : 1, /**< If set, reject sending out tagged pkts */ hw_vlan_reject_untagged : 1, @@ -696,6 +703,29 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + + /** + * Max allowed number of segments per whole packet. + * + * - For TSO packet this is the total number of data descriptors allowed + * by device. + * + * @see nb_mtu_seg_max + */ + uint16_t nb_seg_max; + + /** + * Max number of segments per one MTU. + * + * - For non-TSO packet, this is the maximum allowed number of segments + * in a single transmit packet. + * + * - For TSO packet each segment within the TSO may span up to this + * value. + * + * @see nb_seg_max + */ + uint16_t nb_mtu_seg_max; }; /** @@ -767,8 +797,6 @@ struct rte_fdir_conf { struct rte_eth_fdir_masks mask; struct rte_eth_fdir_flex_conf flex_conf; /**< Flex payload configuration. */ - // TREX_PATCH - uint8_t flexbytes_offset; }; /** @@ -853,6 +881,7 @@ struct rte_eth_conf { #define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 #define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 #define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 +#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 /** * TX offload capabilities of a device. @@ -866,6 +895,11 @@ struct rte_eth_conf { #define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 #define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 +#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 /** * Ethernet device information @@ -930,23 +964,26 @@ struct rte_eth_txq_info { /** * An Ethernet device extended statistic structure * - * This structure is used by ethdev->eth_xstats_get() to provide - * statistics that are not provided in the generic rte_eth_stats + * This structure is used by rte_eth_xstats_get() to provide + * statistics that are not provided in the generic *rte_eth_stats* * structure. + * It maps a name id, corresponding to an index in the array returned + * by rte_eth_xstats_get_names(), to a statistic value. */ struct rte_eth_xstat { - uint64_t id; - uint64_t value; + uint64_t id; /**< The index in xstats name array. */ + uint64_t value; /**< The statistic counter value. */ }; /** - * A name-key lookup element for extended statistics. + * A name element for extended statistics. * - * This structure is used to map between names and ID numbers - * for extended ethernet statistics. + * An array of this structure is returned by rte_eth_xstats_get_names(). + * It lists the names of extended statistics for a PMD. The *rte_eth_xstat* + * structure references these names by their array index. */ struct rte_eth_xstat_name { - char name[RTE_ETH_XSTATS_NAME_SIZE]; + char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */ }; #define ETH_DCB_NUM_TCS 8 @@ -1142,6 +1179,10 @@ typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); /**< @internal Check DD bit of specific RX descriptor */ +typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); +/**< @internal Get firmware information of an Ethernet device. */ + typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); @@ -1183,6 +1224,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1241,39 +1287,11 @@ typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, uint8_t on); /**< @internal Set all Unicast Hash bitmap */ -typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint16_t rx_mode, - uint8_t on); -/**< @internal Set a VF receive mode */ - -typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint8_t on); -/**< @internal Set a VF receive mode */ - -typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint8_t on); -/**< @internal Enable or disable a VF transmit */ - -typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev, - uint16_t vlan, - uint64_t vf_mask, - uint8_t vlan_on); -/**< @internal Set VF VLAN pool filter */ - typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t tx_rate); /**< @internal Set queue TX rate */ -typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint16_t tx_rate, - uint64_t q_msk); -/**< @internal Set VF TX rate */ - typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, @@ -1423,11 +1441,18 @@ struct eth_dev_ops { eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ eth_dev_close_t dev_close; /**< Close device. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */ - eth_link_update_t link_update; /**< Get device link state. */ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs. */ + mtu_set_t mtu_set; /**< Set MTU. */ + eth_stats_get_t stats_get; /**< Get generic device statistics. */ eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ @@ -1436,109 +1461,93 @@ struct eth_dev_ops { /**< Get names of extended statistics. */ eth_queue_stats_mapping_set_t queue_stats_mapping_set; /**< Configure per queue stat counter mapping. */ + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + eth_rxq_info_get_t rxq_info_get; /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */ + eth_fw_version_get_t fw_version_get; /**< Get firmware version. */ eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; - /**< Get packet types supported and identified by device*/ - mtu_set_t mtu_set; /**< Set MTU. */ - vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ - vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ + /**< Get packet types supported and identified by device. */ + + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ - vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion */ - eth_queue_start_t rx_queue_start;/**< Start RX for a queue.*/ - eth_queue_stop_t rx_queue_stop;/**< Stop RX for a queue.*/ - eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/ - eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/ - eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/ - eth_queue_release_t rx_queue_release;/**< Release RX queue.*/ - eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */ - eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */ - /**< Enable Rx queue interrupt. */ - eth_rx_enable_intr_t rx_queue_intr_enable; - /**< Disable Rx queue interrupt.*/ - eth_rx_disable_intr_t rx_queue_intr_disable; - eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue.*/ - eth_queue_release_t tx_queue_release;/**< Release TX queue.*/ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion. */ + + eth_queue_start_t rx_queue_start;/**< Start RX for a queue. */ + eth_queue_stop_t rx_queue_stop; /**< Stop RX for a queue. */ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue. */ + eth_queue_stop_t tx_queue_stop; /**< Stop TX for a queue. */ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue. */ + eth_queue_release_t rx_queue_release; /**< Release RX queue. */ + eth_rx_queue_count_t rx_queue_count;/**< Get Rx queue count. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit. */ + eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */ + eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt. */ + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue. */ + eth_queue_release_t tx_queue_release; /**< Release TX queue. */ + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ - priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control.*/ - eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address */ - eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address */ - eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address */ - eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array */ - eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap */ - eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule.*/ - eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule.*/ - eth_set_vf_rx_mode_t set_vf_rx_mode; /**< Set VF RX mode */ - eth_set_vf_rx_t set_vf_rx; /**< enable/disable a VF receive */ - eth_set_vf_tx_t set_vf_tx; /**< enable/disable a VF transmit */ - eth_set_vf_vlan_filter_t set_vf_vlan_filter; /**< Set VF VLAN filter */ - /** Add UDP tunnel port. */ - eth_udp_tunnel_port_add_t udp_tunnel_port_add; - /** Del UDP tunnel port. */ - eth_udp_tunnel_port_del_t udp_tunnel_port_del; - eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit */ - eth_set_vf_rate_limit_t set_vf_rate_limit; /**< Set VF rate limit */ - /** Update redirection table. */ - reta_update_t reta_update; - /** Query redirection table. */ - reta_query_t reta_query; - - eth_get_reg_t get_reg; - /**< Get registers */ - eth_get_eeprom_length_t get_eeprom_length; - /**< Get eeprom length */ - eth_get_eeprom_t get_eeprom; - /**< Get eeprom data */ - eth_set_eeprom_t set_eeprom; - /**< Set eeprom */ - /* bypass control */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control. */ + + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array. */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */ + + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule. */ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule. */ + + eth_udp_tunnel_port_add_t udp_tunnel_port_add; /** Add UDP tunnel port. */ + eth_udp_tunnel_port_del_t udp_tunnel_port_del; /** Del UDP tunnel port. */ + eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; + /** Config ether type of l2 tunnel. */ + eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Enable/disable l2 tunnel offload functions. */ + + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */ + + rss_hash_update_t rss_hash_update; /** Configure RSS hash protocols. */ + rss_hash_conf_get_t rss_hash_conf_get; /** Get current RSS hash configuration. */ + reta_update_t reta_update; /** Update redirection table. */ + reta_query_t reta_query; /** Query redirection table. */ + + eth_get_reg_t get_reg; /**< Get registers. */ + eth_get_eeprom_length_t get_eeprom_length; /**< Get eeprom length. */ + eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ + eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ + + /* bypass control */ #ifdef RTE_NIC_BYPASS - bypass_init_t bypass_init; - bypass_state_set_t bypass_state_set; - bypass_state_show_t bypass_state_show; - bypass_event_set_t bypass_event_set; - bypass_event_show_t bypass_event_show; - bypass_wd_timeout_set_t bypass_wd_timeout_set; - bypass_wd_timeout_show_t bypass_wd_timeout_show; - bypass_ver_show_t bypass_ver_show; - bypass_wd_reset_t bypass_wd_reset; + bypass_init_t bypass_init; + bypass_state_set_t bypass_state_set; + bypass_state_show_t bypass_state_show; + bypass_event_set_t bypass_event_set; + bypass_event_show_t bypass_event_show; + bypass_wd_timeout_set_t bypass_wd_timeout_set; + bypass_wd_timeout_show_t bypass_wd_timeout_show; + bypass_ver_show_t bypass_ver_show; + bypass_wd_reset_t bypass_wd_reset; #endif - /** Configure RSS hash protocols. */ - rss_hash_update_t rss_hash_update; - /** Get current RSS hash configuration. */ - rss_hash_conf_get_t rss_hash_conf_get; - eth_filter_ctrl_t filter_ctrl; - /**< common filter control. */ - eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */ - eth_rxq_info_get_t rxq_info_get; - /**< retrieve RX queue information. */ - eth_txq_info_get_t txq_info_get; - /**< retrieve TX queue information. */ + eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ + + eth_get_dcb_info get_dcb_info; /** Get DCB information. */ + + eth_timesync_enable_t timesync_enable; /** Turn IEEE1588/802.1AS timestamping on. */ - eth_timesync_enable_t timesync_enable; + eth_timesync_disable_t timesync_disable; /** Turn IEEE1588/802.1AS timestamping off. */ - eth_timesync_disable_t timesync_disable; - /** Read the IEEE1588/802.1AS RX timestamp. */ eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; - /** Read the IEEE1588/802.1AS TX timestamp. */ + /** Read the IEEE1588/802.1AS RX timestamp. */ eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; - - /** Get DCB information */ - eth_get_dcb_info get_dcb_info; - /** Adjust the device clock.*/ - eth_timesync_adjust_time timesync_adjust_time; - /** Get the device clock time. */ - eth_timesync_read_time timesync_read_time; - /** Set the device clock time. */ - eth_timesync_write_time timesync_write_time; - /** Config ether type of l2 tunnel */ - eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; - /** Enable/disable l2 tunnel offload functions */ - eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_adjust_time timesync_adjust_time; /** Adjust the device clock. */ + eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ + eth_timesync_write_time timesync_write_time; /** Set the device clock time. */ }; /** @@ -1605,17 +1614,6 @@ struct rte_eth_rxtx_callback { }; /** - * The eth device type. - */ -enum rte_eth_dev_type { - RTE_ETH_DEV_UNKNOWN, /**< unknown device type */ - RTE_ETH_DEV_PCI, - /**< Physical function and Virtual function of PCI devices */ - RTE_ETH_DEV_VIRTUAL, /**< non hardware device */ - RTE_ETH_DEV_MAX /**< max value of this enum */ -}; - -/** * @internal * The generic data structure associated with each ethernet device. * @@ -1628,10 +1626,12 @@ enum rte_eth_dev_type { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ - struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */ + struct rte_device *device; /**< Backing device */ + struct rte_intr_handle *intr_handle; /**< Device interrupt handle */ /** User application callbacks for NIC interrupts */ struct rte_eth_dev_cb_list link_intr_cbs; /** @@ -1645,7 +1645,6 @@ struct rte_eth_dev { */ struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; uint8_t attached; /**< Flag indicating the port is attached */ - enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */ } __rte_cache_aligned; struct rte_eth_dev_sriov { @@ -1693,6 +1692,7 @@ struct rte_eth_dev_data { struct ether_addr* hash_mac_addrs; /** Device Ethernet MAC addresses of hash filtering. */ uint8_t port_id; /**< Device [external] port identifier. */ + __extension__ uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ @@ -1758,8 +1758,7 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name); * @return * - Slot in the rte_dev_devices array for a new device; */ -struct rte_eth_dev *rte_eth_dev_allocate(const char *name, - enum rte_eth_dev_type type); +struct rte_eth_dev *rte_eth_dev_allocate(const char *name); /** * @internal @@ -1778,7 +1777,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); * @param devargs * A pointer to a strings array describing the new device * to be attached. The strings should be a pci address like - * '0000:01:00.0' or virtual device name like 'eth_pcap0'. + * '0000:01:00.0' or virtual device name like 'net_pcap0'. * @param port_id * A pointer to a port identifier actually attached. * @return @@ -1873,18 +1872,6 @@ struct eth_driver { }; /** - * @internal - * A function invoked by the initialization function of an Ethernet driver - * to simultaneously register itself as a PCI driver and as an Ethernet - * Poll Mode Driver (PMD). - * - * @param eth_drv - * The pointer to the *eth_driver* structure associated with - * the Ethernet driver. - */ -void rte_eth_driver_register(struct eth_driver *eth_drv); - -/** * Convert a numerical speed in Mbps to a bitmap flag that can be used in * the bitmap link_speeds of the struct rte_eth_conf * @@ -1930,6 +1917,19 @@ int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue, uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); /** + * @internal + * Release device queues and clear its configuration to force the user + * application to reconfigure it. It is for internal use only. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * void + */ +void _rte_eth_dev_reset(struct rte_eth_dev *dev); + +/** * Allocate and set up a receive queue for an Ethernet device. * * The function allocates a contiguous block of memory for *nb_rx_desc* @@ -2288,18 +2288,19 @@ void rte_eth_stats_reset(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. * @param xstats_names - * Block of memory to insert names into. Must be at least size in capacity. - * If set to NULL, function returns required capacity. + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. * @param size - * Capacity of xstats_names (number of names). + * The size of the xstats_names array (number of elements). * @return - * - positive value lower or equal to size: success. The return value + * - A positive value lower or equal to size: success. The return value * is the number of entries filled in the stats table. - * - positive value higher than size: error, the given statistics table + * - A positive value higher than size: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. - * - negative value on error (invalid port id) + * - A negative value on error (invalid port id). */ int rte_eth_xstats_get_names(uint8_t port_id, struct rte_eth_xstat_name *xstats_names, @@ -2312,19 +2313,20 @@ int rte_eth_xstats_get_names(uint8_t port_id, * The port identifier of the Ethernet device. * @param xstats * A pointer to a table of structure of type *rte_eth_xstat* - * to be filled with device statistics ids and values. + * to be filled with device statistics ids and values: id is the + * index of the name string in xstats_names (see rte_eth_xstats_get_names()), + * and value is the statistic counter. * This parameter can be set to NULL if n is 0. * @param n - * The size of the stats table, which should be large enough to store - * all the statistics of the device. + * The size of the xstats array (number of elements). * @return - * - positive value lower or equal to n: success. The return value + * - A positive value lower or equal to n: success. The return value * is the number of entries filled in the stats table. - * - positive value higher than n: error, the given statistics table + * - A positive value higher than n: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. - * - negative value on error (invalid port id) + * - A negative value on error (invalid port id). */ int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, unsigned n); @@ -2401,6 +2403,27 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); /** + * Retrieve the firmware version of a device. + * + * @param port_id + * The port identifier of the device. + * @param fw_version + * A pointer to a string array storing the firmware version of a device, + * the string includes terminating null. This pointer is allocated by caller. + * @param fw_size + * The size of the string array pointed by fw_version, which should be + * large enough to store firmware version of the device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (>0) if *fw_size* is not enough to store firmware version, return + * the size of the non truncated string. + */ +int rte_eth_dev_fw_version_get(uint8_t port_id, + char *fw_version, size_t fw_size); + +/** * Retrieve the supported packet types of an Ethernet device. * * When a packet type is announced as supported, it *must* be recognized by @@ -2835,6 +2858,115 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prepare() function is invoked to prepare output packets to be + * transmitted on the output queue *queue_id* of the Ethernet device designated + * by its *port_id*. + * The *nb_pkts* parameter is the number of packets to be prepared which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * For each packet to send, the rte_eth_tx_prepare() function performs + * the following operations: + * + * - Check if packet meets devices requirements for tx offloads. + * + * - Check limitations about number of segments. + * + * - Check additional requirements when debug is enabled. + * + * - Update and/or reset required checksums when tx offload is set for packet. + * + * Since this function can modify packet data, provided mbufs must be safely + * writable (e.g. modified data cannot be in shared segment). + * + * The rte_eth_tx_prepare() function returns the number of packets ready to be + * sent. A return value equal to *nb_pkts* means that all packets are valid and + * ready to be sent, otherwise stops processing on the first invalid packet and + * leaves the rest packets untouched. + * + * When this functionality is not implemented in the driver, all packets are + * are returned untouched. + * + * @param port_id + * The port identifier of the Ethernet device. + * The value must be a valid port id. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to process. + * @return + * The number of packets correct and ready to be sent. The return value can be + * less than the value of the *tx_pkts* parameter when some packet doesn't + * meet devices requirements with rte_errno set appropriately: + * - -EINVAL: offload flags are not correctly set + * - -ENOTSUP: the offload feature is not supported by the hardware + * + */ + +#ifndef RTE_ETHDEV_TX_PREPARE_NOOP + +static inline uint16_t +rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + if (!dev->tx_pkt_prepare) + return nb_pkts; + + return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id], + tx_pkts, nb_pkts); +} + +#else + +/* + * Native NOOP operation for compilation targets which doesn't require any + * preparations steps, and functional NOOP may introduce unnecessary performance + * drop. + * + * Generally this is not a good idea to turn it on globally and didn't should + * be used if behavior of tx_preparation can change. + */ + +static inline uint16_t +rte_eth_tx_prepare(__rte_unused uint8_t port_id, __rte_unused uint16_t queue_id, + __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + return nb_pkts; +} + +#endif + typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, void *userdata); @@ -3049,6 +3181,8 @@ enum rte_eth_event_type { /**< queue state event (enabled/disabled) */ RTE_ETH_EVENT_INTR_RESET, /**< reset interrupt event, sent to VF on PF reset */ + RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ + RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */ RTE_ETH_EVENT_MAX /**< max value of this enum */ }; @@ -3070,6 +3204,11 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ * @param cb_arg * Pointer to the parameters for the registered callback. * + * The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX. + * This even occurs when a message from the VF is received by the PF. + * The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param. + * This struct is defined in rte_pmd_ixgbe.h. + * * @return * - On success, zero. * - On failure, a negative value. @@ -3108,12 +3247,16 @@ int rte_eth_dev_callback_unregister(uint8_t port_id, * Pointer to struct rte_eth_dev. * @param event * Eth device interrupt event type. + * @param cb_arg + * Update callback parameter to pass data back to user application. + * This allows the user application to decide if a particular function + * is permitted or not. * * @return * void */ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event); + enum rte_eth_event_type event, void *cb_arg); /** * When there is no rx packet coming in Rx Queue for a long time, we can @@ -3409,93 +3552,6 @@ int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, */ int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); - /** - * Set RX L2 Filtering mode of a VF of an Ethernet device. - * - * @param port - * The port identifier of the Ethernet device. - * @param vf - * VF id. - * @param rx_mode - * The RX mode mask, which is one or more of accepting Untagged Packets, - * packets that match the PFUTA table, Broadcast and Multicast Promiscuous. - * ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, - * ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used - * in rx_mode. - * @param on - * 1 - Enable a VF RX mode. - * 0 - Disable a VF RX mode. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode, - uint8_t on); - -/** -* Enable or disable a VF traffic transmit of the Ethernet device. -* -* @param port -* The port identifier of the Ethernet device. -* @param vf -* VF id. -* @param on -* 1 - Enable a VF traffic transmit. -* 0 - Disable a VF traffic transmit. -* @return -* - (0) if successful. -* - (-ENODEV) if *port_id* invalid. -* - (-ENOTSUP) if hardware doesn't support. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on); - -/** -* Enable or disable a VF traffic receive of an Ethernet device. -* -* @param port -* The port identifier of the Ethernet device. -* @param vf -* VF id. -* @param on -* 1 - Enable a VF traffic receive. -* 0 - Disable a VF traffic receive. -* @return -* - (0) if successful. -* - (-ENOTSUP) if hardware doesn't support. -* - (-ENODEV) if *port_id* invalid. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on); - -/** -* Enable/Disable hardware VF VLAN filtering by an Ethernet device of -* received VLAN packets tagged with a given VLAN Tag Identifier. -* -* @param port id -* The port identifier of the Ethernet device. -* @param vlan_id -* The VLAN Tag Identifier whose filtering must be enabled or disabled. -* @param vf_mask -* Bitmap listing which VFs participate in the VLAN filtering. -* @param vlan_on -* 1 - Enable VFs VLAN filtering. -* 0 - Disable VFs VLAN filtering. -* @return -* - (0) if successful. -* - (-ENOTSUP) if hardware doesn't support. -* - (-ENODEV) if *port_id* invalid. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, - uint64_t vf_mask, - uint8_t vlan_on); - /** * Set a traffic mirroring rule on an Ethernet device * @@ -3557,26 +3613,6 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate); /** - * Set the rate limitation for a vf on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param vf - * VF id. - * @param tx_rate - * The tx rate allocated from the total link speed for this VF id. - * @param q_msk - * The queue mask which need to set the rate. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port_id* invalid. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk); - -/** * Initialize bypass logic. This function needs to be called before * executing any other bypass API. * @@ -4343,7 +4379,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, /** * Get the port id from pci adrress or device name -* Ex: 0000:2:00.0 or vdev name eth_pcap0 +* Ex: 0000:2:00.0 or vdev name net_pcap0 * * @param name * pci address or name of the device @@ -4370,6 +4406,21 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); +/** + * @internal + * Wrapper for use by pci drivers as a .probe function to attach to a ethdev + * interface. + */ +int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev); + +/** + * @internal + * Wrapper for use by pci drivers as a .remove function to detach a ethdev + * interface. + */ +int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev); + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_ether/rte_flow.c b/src/dpdk/lib/librte_ether/rte_flow.c new file mode 100644 index 00000000..aaa70d68 --- /dev/null +++ b/src/dpdk/lib/librte_ether/rte_flow.c @@ -0,0 +1,159 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include <rte_branch_prediction.h> +#include "rte_ethdev.h" +#include "rte_flow_driver.h" +#include "rte_flow.h" + +/* Get generic flow operations structure from a port. */ +const struct rte_flow_ops * +rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops; + int code; + + if (unlikely(!rte_eth_dev_is_valid_port(port_id))) + code = ENODEV; + else if (unlikely(!dev->dev_ops->filter_ctrl || + dev->dev_ops->filter_ctrl(dev, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_GET, + &ops) || + !ops)) + code = ENOSYS; + else + return ops; + rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(code)); + return NULL; +} + +/* Check whether a flow rule can be created on a given port. */ +int +rte_flow_validate(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->validate)) + return ops->validate(dev, attr, pattern, actions, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Create a flow rule on a given port. */ +struct rte_flow * +rte_flow_create(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return NULL; + if (likely(!!ops->create)) + return ops->create(dev, attr, pattern, actions, error); + rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); + return NULL; +} + +/* Destroy a flow rule on a given port. */ +int +rte_flow_destroy(uint8_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->destroy)) + return ops->destroy(dev, flow, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Destroy all flow rules associated with a port. */ +int +rte_flow_flush(uint8_t port_id, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->flush)) + return ops->flush(dev, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Query an existing flow rule. */ +int +rte_flow_query(uint8_t port_id, + struct rte_flow *flow, + enum rte_flow_action_type action, + void *data, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->query)) + return ops->query(dev, flow, action, data, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} diff --git a/src/dpdk/lib/librte_ether/rte_flow.h b/src/dpdk/lib/librte_ether/rte_flow.h new file mode 100644 index 00000000..171a5698 --- /dev/null +++ b/src/dpdk/lib/librte_ether/rte_flow.h @@ -0,0 +1,1090 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_FLOW_H_ +#define RTE_FLOW_H_ + +/** + * @file + * RTE generic flow API + * + * This interface provides the ability to program packet matching and + * associated actions in hardware through flow rules. + */ + +#include <rte_arp.h> +#include <rte_ether.h> +#include <rte_icmp.h> +#include <rte_ip.h> +#include <rte_sctp.h> +#include <rte_tcp.h> +#include <rte_udp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Flow rule attributes. + * + * Priorities are set on two levels: per group and per rule within groups. + * + * Lower values denote higher priority, the highest priority for both levels + * is 0, so that a rule with priority 0 in group 8 is always matched after a + * rule with priority 8 in group 0. + * + * Although optional, applications are encouraged to group similar rules as + * much as possible to fully take advantage of hardware capabilities + * (e.g. optimized matching) and work around limitations (e.g. a single + * pattern type possibly allowed in a given group). + * + * Group and priority levels are arbitrary and up to the application, they + * do not need to be contiguous nor start from 0, however the maximum number + * varies between devices and may be affected by existing flow rules. + * + * If a packet is matched by several rules of a given group for a given + * priority level, the outcome is undefined. It can take any path, may be + * duplicated or even cause unrecoverable errors. + * + * Note that support for more than a single group and priority level is not + * guaranteed. + * + * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). + * + * Several pattern items and actions are valid and can be used in both + * directions. Those valid for only one direction are described as such. + * + * At least one direction must be specified. + * + * Specifying both directions at once for a given rule is not recommended + * but may be valid in a few cases (e.g. shared counter). + */ +struct rte_flow_attr { + uint32_t group; /**< Priority group. */ + uint32_t priority; /**< Priority level within group. */ + uint32_t ingress:1; /**< Rule applies to ingress traffic. */ + uint32_t egress:1; /**< Rule applies to egress traffic. */ + uint32_t reserved:30; /**< Reserved, must be zero. */ +}; + +/** + * Matching pattern item types. + * + * Pattern items fall in two categories: + * + * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4, + * IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a + * specification structure. These must be stacked in the same order as the + * protocol layers to match, starting from the lowest. + * + * - Matching meta-data or affecting pattern processing (END, VOID, INVERT, + * PF, VF, PORT and so on), often without a specification structure. Since + * they do not match packet contents, these can be specified anywhere + * within item lists without affecting others. + * + * See the description of individual types for more information. Those + * marked with [META] fall into the second category. + */ +enum rte_flow_item_type { + /** + * [META] + * + * End marker for item lists. Prevents further processing of items, + * thereby ending the pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_VOID, + + /** + * [META] + * + * Inverted matching, i.e. process packets that do not match the + * pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_INVERT, + + /** + * Matches any protocol in place of the current layer, a single ANY + * may also stand for several protocol layers. + * + * See struct rte_flow_item_any. + */ + RTE_FLOW_ITEM_TYPE_ANY, + + /** + * [META] + * + * Matches packets addressed to the physical function of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item + * prevents it from reaching that device unless the flow rule + * contains a PF action. Packets are not duplicated between device + * instances by default. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_PF, + + /** + * [META] + * + * Matches packets addressed to a virtual function ID of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item + * prevents it from reaching that device unless the flow rule + * contains a VF action. Packets are not duplicated between device + * instances by default. + * + * See struct rte_flow_item_vf. + */ + RTE_FLOW_ITEM_TYPE_VF, + + /** + * [META] + * + * Matches packets coming from the specified physical port of the + * underlying device. + * + * The first PORT item overrides the physical port normally + * associated with the specified DPDK input port (port_id). This + * item can be provided several times to match additional physical + * ports. + * + * See struct rte_flow_item_port. + */ + RTE_FLOW_ITEM_TYPE_PORT, + + /** + * Matches a byte string of a given length at a given offset. + * + * See struct rte_flow_item_raw. + */ + RTE_FLOW_ITEM_TYPE_RAW, + + /** + * Matches an Ethernet header. + * + * See struct rte_flow_item_eth. + */ + RTE_FLOW_ITEM_TYPE_ETH, + + /** + * Matches an 802.1Q/ad VLAN tag. + * + * See struct rte_flow_item_vlan. + */ + RTE_FLOW_ITEM_TYPE_VLAN, + + /** + * Matches an IPv4 header. + * + * See struct rte_flow_item_ipv4. + */ + RTE_FLOW_ITEM_TYPE_IPV4, + + /** + * Matches an IPv6 header. + * + * See struct rte_flow_item_ipv6. + */ + RTE_FLOW_ITEM_TYPE_IPV6, + + /** + * Matches an ICMP header. + * + * See struct rte_flow_item_icmp. + */ + RTE_FLOW_ITEM_TYPE_ICMP, + + /** + * Matches a UDP header. + * + * See struct rte_flow_item_udp. + */ + RTE_FLOW_ITEM_TYPE_UDP, + + /** + * Matches a TCP header. + * + * See struct rte_flow_item_tcp. + */ + RTE_FLOW_ITEM_TYPE_TCP, + + /** + * Matches a SCTP header. + * + * See struct rte_flow_item_sctp. + */ + RTE_FLOW_ITEM_TYPE_SCTP, + + /** + * Matches a VXLAN header. + * + * See struct rte_flow_item_vxlan. + */ + RTE_FLOW_ITEM_TYPE_VXLAN, + + /** + * Matches a E_TAG header. + * + * See struct rte_flow_item_e_tag. + */ + RTE_FLOW_ITEM_TYPE_E_TAG, + + /** + * Matches a NVGRE header. + * + * See struct rte_flow_item_nvgre. + */ + RTE_FLOW_ITEM_TYPE_NVGRE, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ANY + * + * Matches any protocol in place of the current layer, a single ANY may also + * stand for several protocol layers. + * + * This is usually specified as the first pattern item when looking for a + * protocol anywhere in a packet. + * + * A zeroed mask stands for any number of layers. + */ +struct rte_flow_item_any { + uint32_t num; /**< Number of layers covered. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ +static const struct rte_flow_item_any rte_flow_item_any_mask = { + .num = 0x00000000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_VF + * + * Matches packets addressed to a virtual function ID of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item prevents it + * from reaching that device unless the flow rule contains a VF + * action. Packets are not duplicated between device instances by default. + * + * - Likely to return an error or never match any traffic if this causes a + * VF device to match traffic addressed to a different VF. + * - Can be specified multiple times to match traffic addressed to several + * VF IDs. + * - Can be combined with a PF item to match both PF and VF traffic. + * + * A zeroed mask can be used to match any VF ID. + */ +struct rte_flow_item_vf { + uint32_t id; /**< Destination VF ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ +static const struct rte_flow_item_vf rte_flow_item_vf_mask = { + .id = 0x00000000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_PORT + * + * Matches packets coming from the specified physical port of the underlying + * device. + * + * The first PORT item overrides the physical port normally associated with + * the specified DPDK input port (port_id). This item can be provided + * several times to match additional physical ports. + * + * Note that physical ports are not necessarily tied to DPDK input ports + * (port_id) when those are not under DPDK control. Possible values are + * specific to each device, they are not necessarily indexed from zero and + * may not be contiguous. + * + * As a device property, the list of allowed values as well as the value + * associated with a port_id should be retrieved by other means. + * + * A zeroed mask can be used to match any port index. + */ +struct rte_flow_item_port { + uint32_t index; /**< Physical port index. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */ +static const struct rte_flow_item_port rte_flow_item_port_mask = { + .index = 0x00000000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_RAW + * + * Matches a byte string of a given length at a given offset. + * + * Offset is either absolute (using the start of the packet) or relative to + * the end of the previous matched item in the stack, in which case negative + * values are allowed. + * + * If search is enabled, offset is used as the starting point. The search + * area can be delimited by setting limit to a nonzero value, which is the + * maximum number of bytes after offset where the pattern may start. + * + * Matching a zero-length pattern is allowed, doing so resets the relative + * offset for subsequent items. + * + * This type does not support ranges (struct rte_flow_item.last). + */ +struct rte_flow_item_raw { + uint32_t relative:1; /**< Look for pattern after the previous item. */ + uint32_t search:1; /**< Search pattern from offset (see also limit). */ + uint32_t reserved:30; /**< Reserved, must be set to zero. */ + int32_t offset; /**< Absolute or relative offset for pattern. */ + uint16_t limit; /**< Search area limit for start of pattern. */ + uint16_t length; /**< Pattern length. */ + uint8_t pattern[]; /**< Byte string to look for. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ +static const struct rte_flow_item_raw rte_flow_item_raw_mask = { + .relative = 1, + .search = 1, + .reserved = 0x3fffffff, + .offset = 0xffffffff, + .limit = 0xffff, + .length = 0xffff, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ETH + * + * Matches an Ethernet header. + */ +struct rte_flow_item_eth { + struct ether_addr dst; /**< Destination MAC. */ + struct ether_addr src; /**< Source MAC. */ + uint16_t type; /**< EtherType. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ +static const struct rte_flow_item_eth rte_flow_item_eth_mask = { + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .type = 0x0000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_VLAN + * + * Matches an 802.1Q/ad VLAN tag. + * + * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or + * RTE_FLOW_ITEM_TYPE_VLAN. + */ +struct rte_flow_item_vlan { + uint16_t tpid; /**< Tag protocol identifier. */ + uint16_t tci; /**< Tag control information. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ +static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { + .tpid = 0x0000, + .tci = 0xffff, +}; + +/** + * RTE_FLOW_ITEM_TYPE_IPV4 + * + * Matches an IPv4 header. + * + * Note: IPv4 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv4 { + struct ipv4_hdr hdr; /**< IPv4 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ +static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { + .hdr = { + .src_addr = 0xffffffff, + .dst_addr = 0xffffffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_IPV6. + * + * Matches an IPv6 header. + * + * Note: IPv6 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv6 { + struct ipv6_hdr hdr; /**< IPv6 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ +static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { + .hdr = { + .src_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .dst_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ICMP. + * + * Matches an ICMP header. + */ +struct rte_flow_item_icmp { + struct icmp_hdr hdr; /**< ICMP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ +static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { + .hdr = { + .icmp_type = 0xff, + .icmp_code = 0xff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_UDP. + * + * Matches a UDP header. + */ +struct rte_flow_item_udp { + struct udp_hdr hdr; /**< UDP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ +static const struct rte_flow_item_udp rte_flow_item_udp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_TCP. + * + * Matches a TCP header. + */ +struct rte_flow_item_tcp { + struct tcp_hdr hdr; /**< TCP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ +static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_SCTP. + * + * Matches a SCTP header. + */ +struct rte_flow_item_sctp { + struct sctp_hdr hdr; /**< SCTP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ +static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN. + * + * Matches a VXLAN header (RFC 7348). + */ +struct rte_flow_item_vxlan { + uint8_t flags; /**< Normally 0x08 (I flag). */ + uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ +static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { + .vni = "\xff\xff\xff", +}; + +/** + * RTE_FLOW_ITEM_TYPE_E_TAG. + * + * Matches a E-tag header. + */ +struct rte_flow_item_e_tag { + uint16_t tpid; /**< Tag protocol identifier (0x893F). */ + /** + * E-Tag control information (E-TCI). + * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). + */ + uint16_t epcp_edei_in_ecid_b; + /** Reserved (2b), GRP (2b), E-CID base (12b). */ + uint16_t rsvd_grp_ecid_b; + uint8_t in_ecid_e; /**< Ingress E-CID ext. */ + uint8_t ecid_e; /**< E-CID ext. */ +}; + +/** + * RTE_FLOW_ITEM_TYPE_NVGRE. + * + * Matches a NVGRE header. + */ +struct rte_flow_item_nvgre { + /** + * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), + * reserved 0 (9b), version (3b). + * + * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. + */ + uint16_t c_k_s_rsvd0_ver; + uint16_t protocol; /**< Protocol type (0x6558). */ + uint8_t tni[3]; /**< Virtual subnet ID. */ + uint8_t flow_id; /**< Flow ID. */ +}; + +/** + * Matching pattern item definition. + * + * A pattern is formed by stacking items starting from the lowest protocol + * layer to match. This stacking restriction does not apply to meta items + * which can be placed anywhere in the stack without affecting the meaning + * of the resulting pattern. + * + * Patterns are terminated by END items. + * + * The spec field should be a valid pointer to a structure of the related + * item type. It may remain unspecified (NULL) in many cases to request + * broad (nonspecific) matching. In such cases, last and mask must also be + * set to NULL. + * + * Optionally, last can point to a structure of the same type to define an + * inclusive range. This is mostly supported by integer and address fields, + * may cause errors otherwise. Fields that do not support ranges must be set + * to 0 or to the same value as the corresponding fields in spec. + * + * Only the fields defined to nonzero values in the default masks (see + * rte_flow_item_{name}_mask constants) are considered relevant by + * default. This can be overridden by providing a mask structure of the + * same type with applicable bits set to one. It can also be used to + * partially filter out specific fields (e.g. as an alternate mean to match + * ranges of IP addresses). + * + * Mask is a simple bit-mask applied before interpreting the contents of + * spec and last, which may yield unexpected results if not used + * carefully. For example, if for an IPv4 address field, spec provides + * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the + * effective range becomes 10.1.0.0 to 10.3.255.255. + */ +struct rte_flow_item { + enum rte_flow_item_type type; /**< Item type. */ + const void *spec; /**< Pointer to item specification structure. */ + const void *last; /**< Defines an inclusive range (spec to last). */ + const void *mask; /**< Bit-mask applied to spec and last. */ +}; + +/** + * Action types. + * + * Each possible action is represented by a type. Some have associated + * configuration structures. Several actions combined in a list can be + * affected to a flow rule. That list is not ordered. + * + * They fall in three categories: + * + * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent + * processing matched packets by subsequent flow rules, unless overridden + * with PASSTHRU. + * + * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up + * for additional processing by subsequent flow rules. + * + * - Other non terminating meta actions that do not affect the fate of + * packets (END, VOID, MARK, FLAG, COUNT). + * + * When several actions are combined in a flow rule, they should all have + * different types (e.g. dropping a packet twice is not possible). + * + * Only the last action of a given type is taken into account. PMDs still + * perform error checking on the entire list. + * + * Note that PASSTHRU is the only action able to override a terminating + * rule. + */ +enum rte_flow_action_type { + /** + * [META] + * + * End marker for action lists. Prevents further processing of + * actions, thereby ending the list. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_VOID, + + /** + * Leaves packets up for additional processing by subsequent flow + * rules. This is the default when a rule does not contain a + * terminating action, but can be specified to force a rule to + * become non-terminating. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PASSTHRU, + + /** + * [META] + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * See struct rte_flow_action_mark. + */ + RTE_FLOW_ACTION_TYPE_MARK, + + /** + * [META] + * + * Flags packets. Similar to MARK without a specific value; only + * sets the PKT_RX_FDIR mbuf flag. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_FLAG, + + /** + * Assigns packets to a given queue index. + * + * See struct rte_flow_action_queue. + */ + RTE_FLOW_ACTION_TYPE_QUEUE, + + /** + * Drops packets. + * + * PASSTHRU overrides this action if both are specified. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_DROP, + + /** + * [META] + * + * Enables counters for this rule. + * + * These counters can be retrieved and reset through rte_flow_query(), + * see struct rte_flow_query_count. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_COUNT, + + /** + * Duplicates packets to a given queue index. + * + * This is normally combined with QUEUE, however when used alone, it + * is actually similar to QUEUE + PASSTHRU. + * + * See struct rte_flow_action_dup. + */ + RTE_FLOW_ACTION_TYPE_DUP, + + /** + * Similar to QUEUE, except RSS is additionally performed on packets + * to spread them among several queues according to the provided + * parameters. + * + * See struct rte_flow_action_rss. + */ + RTE_FLOW_ACTION_TYPE_RSS, + + /** + * Redirects packets to the physical function (PF) of the current + * device. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PF, + + /** + * Redirects packets to the virtual function (VF) of the current + * device with the specified ID. + * + * See struct rte_flow_action_vf. + */ + RTE_FLOW_ACTION_TYPE_VF, +}; + +/** + * RTE_FLOW_ACTION_TYPE_MARK + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. It is returned in the + * hash.fdir.hi mbuf field. + */ +struct rte_flow_action_mark { + uint32_t id; /**< Integer value to return with packets. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_QUEUE + * + * Assign packets to a given queue index. + * + * Terminating by default. + */ +struct rte_flow_action_queue { + uint16_t index; /**< Queue index to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_COUNT (query) + * + * Query structure to retrieve and reset flow rule counters. + */ +struct rte_flow_query_count { + uint32_t reset:1; /**< Reset counters after query [in]. */ + uint32_t hits_set:1; /**< hits field is set [out]. */ + uint32_t bytes_set:1; /**< bytes field is set [out]. */ + uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ + uint64_t hits; /**< Number of hits for this rule [out]. */ + uint64_t bytes; /**< Number of bytes through this rule [out]. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_DUP + * + * Duplicates packets to a given queue index. + * + * This is normally combined with QUEUE, however when used alone, it is + * actually similar to QUEUE + PASSTHRU. + * + * Non-terminating by default. + */ +struct rte_flow_action_dup { + uint16_t index; /**< Queue index to duplicate packets to. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_RSS + * + * Similar to QUEUE, except RSS is additionally performed on packets to + * spread them among several queues according to the provided parameters. + * + * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps + * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, + * both can be requested simultaneously. + * + * Terminating by default. + */ +struct rte_flow_action_rss { + const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */ + uint16_t num; /**< Number of entries in queue[]. */ + uint16_t queue[]; /**< Queues indices to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_VF + * + * Redirects packets to a virtual function (VF) of the current device. + * + * Packets matched by a VF pattern item can be redirected to their original + * VF ID instead of the specified one. This parameter may not be available + * and is not guaranteed to work properly if the VF part is matched by a + * prior flow rule or if packets are not addressed to a VF in the first + * place. + * + * Terminating by default. + */ +struct rte_flow_action_vf { + uint32_t original:1; /**< Use original VF ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< VF ID to redirect packets to. */ +}; + +/** + * Definition of a single action. + * + * A list of actions is terminated by a END action. + * + * For simple actions without a configuration structure, conf remains NULL. + */ +struct rte_flow_action { + enum rte_flow_action_type type; /**< Action type. */ + const void *conf; /**< Pointer to action configuration structure. */ +}; + +/** + * Opaque type returned after successfully creating a flow. + * + * This handle can be used to manage and query the related flow (e.g. to + * destroy it or retrieve counters). + */ +struct rte_flow; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_flow_error.cause. + */ +enum rte_flow_error_type { + RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ + RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ + RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ + RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ + RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ + RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_flow_error { + enum rte_flow_error_type type; /**< Cause field and error types. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Check whether a flow rule can be created on a given port. + * + * While this function has no effect on the target device, the flow rule is + * validated against its current configuration state and the returned value + * should be considered valid by the caller for that state only. + * + * The returned value is guaranteed to remain valid only as long as no + * successful calls to rte_flow_create() or rte_flow_destroy() are made in + * the meantime and no device parameter affecting flow rules in any way are + * modified, due to possible collisions or resource limitations (although in + * such cases EINVAL should not be returned). + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 if flow rule is valid and can be created. A negative errno value + * otherwise (rte_errno is also set), the following errors are defined: + * + * -ENOSYS: underlying device does not support this functionality. + * + * -EINVAL: unknown or invalid rule specification. + * + * -ENOTSUP: valid but unsupported rule specification (e.g. partial + * bit-masks are unsupported). + * + * -EEXIST: collision with an existing rule. + * + * -ENOMEM: not enough resources. + * + * -EBUSY: action cannot be performed due to busy device resources, may + * succeed if the affected queues or even the entire port are in a stopped + * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). + */ +int +rte_flow_validate(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Create a flow rule on a given port. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * A valid handle in case of success, NULL otherwise and rte_errno is set + * to the positive version of one of the error codes defined for + * rte_flow_validate(). + */ +struct rte_flow * +rte_flow_create(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Destroy a flow rule on a given port. + * + * Failure to destroy a flow rule handle may occur when other flow rules + * depend on it, and destroying it would result in an inconsistent state. + * + * This function is only guaranteed to succeed if handles are destroyed in + * reverse order of their creation. + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to destroy. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_destroy(uint8_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error); + +/** + * Destroy all flow rules associated with a port. + * + * In the unlikely event of failure, handles are still considered destroyed + * and no longer valid but the port must be assumed to be in an inconsistent + * state. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_flush(uint8_t port_id, + struct rte_flow_error *error); + +/** + * Query an existing flow rule. + * + * This function allows retrieving flow-specific data such as counters. + * Data is gathered by special actions which must be present in the flow + * rule definition. + * + * \see RTE_FLOW_ACTION_TYPE_COUNT + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to query. + * @param action + * Action type to query. + * @param[in, out] data + * Pointer to storage for the associated query data type. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_query(uint8_t port_id, + struct rte_flow *flow, + enum rte_flow_action_type action, + void *data, + struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_H_ */ diff --git a/src/dpdk/lib/librte_ether/rte_flow_driver.h b/src/dpdk/lib/librte_ether/rte_flow_driver.h new file mode 100644 index 00000000..da5749d5 --- /dev/null +++ b/src/dpdk/lib/librte_ether/rte_flow_driver.h @@ -0,0 +1,182 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_FLOW_DRIVER_H_ +#define RTE_FLOW_DRIVER_H_ + +/** + * @file + * RTE generic flow API (driver side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_flow.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Generic flow operations structure implemented and returned by PMDs. + * + * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter + * type in their .filter_ctrl callback function (struct eth_dev_ops) as well + * as the RTE_ETH_FILTER_GET filter operation. + * + * If successful, this operation must result in a pointer to a PMD-specific + * struct rte_flow_ops written to the argument address as described below: + * + * \code + * + * // PMD filter_ctrl callback + * + * static const struct rte_flow_ops pmd_flow_ops = { ... }; + * + * switch (filter_type) { + * case RTE_ETH_FILTER_GENERIC: + * if (filter_op != RTE_ETH_FILTER_GET) + * return -EINVAL; + * *(const void **)arg = &pmd_flow_ops; + * return 0; + * } + * + * \endcode + * + * See also rte_flow_ops_get(). + * + * These callback functions are not supposed to be used by applications + * directly, which must rely on the API defined in rte_flow.h. + * + * Public-facing wrapper functions perform a few consistency checks so that + * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These + * callbacks otherwise only differ by their first argument (with port ID + * already resolved to a pointer to struct rte_eth_dev). + */ +struct rte_flow_ops { + /** See rte_flow_validate(). */ + int (*validate) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_create(). */ + struct rte_flow *(*create) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_destroy(). */ + int (*destroy) + (struct rte_eth_dev *, + struct rte_flow *, + struct rte_flow_error *); + /** See rte_flow_flush(). */ + int (*flush) + (struct rte_eth_dev *, + struct rte_flow_error *); + /** See rte_flow_query(). */ + int (*query) + (struct rte_eth_dev *, + struct rte_flow *, + enum rte_flow_action_type, + void *, + struct rte_flow_error *); +}; + +/** + * Initialize generic flow error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to flow error structure (may be NULL). + * @param code + * Related error code (rte_errno). + * @param type + * Cause field and error types. + * @param cause + * Object responsible for the error. + * @param message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_flow_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic flow operations structure from a port. + * + * @param port_id + * Port identifier to query. + * @param[out] error + * Pointer to flow error structure. + * + * @return + * The flow operations structure associated with port_id, NULL in case of + * error, in which case rte_errno is set and the error structure contains + * additional details. + */ +const struct rte_flow_ops * +rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_DRIVER_H_ */ diff --git a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c index 26e54f68..51db006a 100644 --- a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c +++ b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c @@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name) void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) { + h->cmp_jump_table_idx = KEY_CUSTOM; h->rte_hash_custom_cmp_eq = func; } @@ -159,7 +160,8 @@ rte_hash_create(const struct rte_hash_parameters *params) num_key_slots = params->entries + 1; snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); - r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), + /* Create ring (Dummy slot index is not enqueued) */ + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1), params->socket_id, 0); if (r == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); @@ -282,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params) h->free_slots = r; h->hw_trans_mem_support = hw_trans_mem_support; +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2; + else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; + else +#endif + h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; + /* Turn on multi-writer only with explicit flat from user and TM * support. */ @@ -408,6 +419,7 @@ rte_hash_reset(struct rte_hash *h) static inline int make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) { + static unsigned int nr_pushes; unsigned i, j; int ret; uint32_t next_bucket_idx; @@ -419,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Search for space in alternative locations */ - next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask; + next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask; next_bkt[i] = &h->buckets[next_bucket_idx]; for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { - if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE) + if (next_bkt[i]->key_idx[j] == EMPTY_SLOT) break; } @@ -432,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) /* Alternative location has spare room (end of recursive function) */ if (i != RTE_HASH_BUCKET_ENTRIES) { - next_bkt[i]->signatures[j].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[j].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[j] = bkt->sig_current[i]; + next_bkt[i]->sig_current[j] = bkt->sig_alt[i]; next_bkt[i]->key_idx[j] = bkt->key_idx[i]; return i; } @@ -444,11 +456,13 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) break; /* All entries have been pushed, so entry cannot be added */ - if (i == RTE_HASH_BUCKET_ENTRIES) + if (i == RTE_HASH_BUCKET_ENTRIES || nr_pushes > RTE_HASH_MAX_PUSHES) return -ENOSPC; /* Set flag to indicate that this entry is going to be pushed */ bkt->flag[i] = 1; + + nr_pushes++; /* Need room in alternative bucket to insert the pushed entry */ ret = make_space_bucket(h, next_bkt[i]); /* @@ -458,9 +472,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) * or return error */ bkt->flag[i] = 0; + nr_pushes = 0; if (ret >= 0) { - next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[ret] = bkt->sig_current[i]; + next_bkt[i]->sig_current[ret] = bkt->sig_alt[i]; next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; return i; } else @@ -542,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (prim_bkt->signatures[i].current == sig && - prim_bkt->signatures[i].alt == alt_hash) { + if (prim_bkt->sig_current[i] == sig && + prim_bkt->sig_alt[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + prim_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -562,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (sec_bkt->signatures[i].alt == sig && - sec_bkt->signatures[i].current == alt_hash) { + if (sec_bkt->sig_alt[i] == sig && + sec_bkt->sig_current[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + sec_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -608,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, #endif for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -630,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, */ ret = make_space_bucket(h, prim_bkt); if (ret >= 0) { - prim_bkt->signatures[ret].current = sig; - prim_bkt->signatures[ret].alt = alt_hash; + prim_bkt->sig_current[ret] = sig; + prim_bkt->sig_alt[ret] = alt_hash; prim_bkt->key_idx[ret] = new_idx; if (h->add_key == ADD_KEY_MULTIWRITER) rte_spinlock_unlock(h->multiwriter_lock); @@ -705,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -728,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].alt == sig) { + if (bkt->sig_current[i] == alt_hash && + bkt->sig_alt[i] == sig) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -783,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) unsigned lcore_id, n_slots; struct lcore_cache *cached_free_slots; - bkt->signatures[i].sig = NULL_SIGNATURE; + bkt->sig_current[i] = NULL_SIGNATURE; + bkt->sig_alt[i] = NULL_SIGNATURE; if (h->hw_trans_mem_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; @@ -814,14 +830,15 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, unsigned i; struct rte_hash_bucket *bkt; struct rte_hash_key *k, *keys = h->key_store; + int32_t ret; bucket_idx = sig & h->bucket_bitmask; bkt = &h->buckets[bucket_idx]; /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -831,7 +848,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * substracting the first dummy index */ - return bkt->key_idx[i] - 1; + ret = bkt->key_idx[i] - 1; + bkt->key_idx[i] = EMPTY_SLOT; + return ret; } } } @@ -843,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == alt_hash && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -854,7 +873,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * substracting the first dummy index */ - return bkt->key_idx[i] - 1; + ret = bkt->key_idx[i] - 1; + bkt->key_idx[i] = EMPTY_SLOT; + return ret; } } } @@ -897,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, return 0; } -/* Lookup bulk stage 0: Prefetch input key */ static inline void -lookup_stage0(unsigned *idx, uint64_t *lookup_mask, - const void * const *keys) +compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + hash_sig_t prim_hash, hash_sig_t sec_hash, + enum rte_hash_sig_compare_function sig_cmp_fn) { - *idx = __builtin_ctzl(*lookup_mask); - if (*lookup_mask == 0) - *idx = 0; + unsigned int i; + + switch (sig_cmp_fn) { +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + case RTE_HASH_COMPARE_AVX2: + *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)prim_bkt->sig_current), + _mm256_set1_epi32(prim_hash))); + *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)sec_bkt->sig_current), + _mm256_set1_epi32(sec_hash))); + break; +#endif +#ifdef RTE_MACHINE_CPUFLAG_SSE2 + case RTE_HASH_COMPARE_SSE: + /* Compare the first 4 signatures in the bucket */ + *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)prim_bkt->sig_current), + _mm_set1_epi32(prim_hash))); + *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&prim_bkt->sig_current[4]), + _mm_set1_epi32(prim_hash)))) << 4; + /* Compare the first 4 signatures in the bucket */ + *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)sec_bkt->sig_current), + _mm_set1_epi32(sec_hash))); + *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&sec_bkt->sig_current[4]), + _mm_set1_epi32(sec_hash)))) << 4; + break; +#endif + default: + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + *prim_hash_matches |= + ((prim_hash == prim_bkt->sig_current[i]) << i); + *sec_hash_matches |= + ((sec_hash == sec_bkt->sig_current[i]) << i); + } + } - rte_prefetch0(keys[*idx]); - *lookup_mask &= ~(1llu << *idx); } -/* - * Lookup bulk stage 1: Calculate primary/secondary hashes - * and prefetch primary/secondary buckets - */ +#define PREFETCH_OFFSET 4 static inline void -lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, - const struct rte_hash_bucket **primary_bkt, - const struct rte_hash_bucket **secondary_bkt, - hash_sig_t *hash_vals, const void * const *keys, - const struct rte_hash *h) +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + int32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) { - *prim_hash = rte_hash_hash(h, keys[idx]); - hash_vals[idx] = *prim_hash; - *sec_hash = rte_hash_secondary_hash(*prim_hash); + uint64_t hits = 0; + int32_t i; + uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + + /* Prefetch first keys */ + for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++) + rte_prefetch0(keys[i]); - *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask]; - *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask]; + /* + * Prefetch rest of the keys, calculate primary and + * secondary bucket and prefetch them + */ + for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) { + rte_prefetch0(keys[i + PREFETCH_OFFSET]); - rte_prefetch0(*primary_bkt); - rte_prefetch0(*secondary_bkt); -} + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); -/* - * Lookup bulk stage 2: Search for match hashes in primary/secondary locations - * and prefetch first key slot - */ -static inline void -lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, - const struct rte_hash_bucket *prim_bkt, - const struct rte_hash_bucket *sec_bkt, - const struct rte_hash_key **key_slot, int32_t *positions, - uint64_t *extra_hits_mask, const void *keys, - const struct rte_hash *h) -{ - unsigned prim_hash_matches, sec_hash_matches, key_idx, i; - unsigned total_hash_matches; + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i); - sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); } - key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; - if (key_idx == 0) - key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)]; + /* Calculate and prefetch rest of the buckets */ + for (; i < num_keys; i++) { + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); - total_hash_matches = (prim_hash_matches | - (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1))); - *key_slot = (const struct rte_hash_key *) ((const char *)keys + - key_idx * h->key_entry_size); + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - rte_prefetch0(*key_slot); - /* - * Return index where key is stored, - * substracting the first dummy index - */ - positions[idx] = (key_idx - 1); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); + } - *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx; + /* Compare signatures and prefetch key slot of first hit */ + for (i = 0; i < num_keys; i++) { + compare_signatures(&prim_hitmask[i], &sec_hitmask[i], + primary_bkt[i], secondary_bkt[i], + prim_hash[i], sec_hash[i], h->sig_cmp_fn); + + if (prim_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]); + uint32_t key_idx = primary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + continue; + } -} + if (sec_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]); + uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + } + } + /* Compare keys, first hits in primary first */ + for (i = 0; i < num_keys; i++) { + positions[i] = -ENOENT; + while (prim_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]); + + uint32_t key_idx = primary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; -/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ -static inline void -lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, - const int32_t *positions, void *data[], uint64_t *hits, - const struct rte_hash *h) -{ - unsigned hit; - unsigned key_idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; + } + prim_hitmask[i] &= ~(1 << (hit_index)); + } - hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h); - if (data != NULL) - data[idx] = key_slot->pdata; + while (sec_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]); - key_idx = positions[idx] + 1; - /* - * If key index is 0, force hit to be 0, in case key to be looked up - * is all zero (as in the dummy slot), which would result in a wrong hit - */ - *hits |= (uint64_t)(hit && !!key_idx) << idx; -} + uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ -static inline void -__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, - uint32_t num_keys, int32_t *positions, - uint64_t *hit_mask, void *data[]) -{ - uint64_t hits = 0; - uint64_t extra_hits_mask = 0; - uint64_t lookup_mask, miss_mask; - unsigned idx; - const void *key_store = h->key_store; - int ret; - hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX]; - - unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31; - const struct rte_hash_bucket *primary_bkt10, *primary_bkt11; - const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11; - const struct rte_hash_bucket *primary_bkt20, *primary_bkt21; - const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21; - const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31; - hash_sig_t primary_hash10, primary_hash11; - hash_sig_t secondary_hash10, secondary_hash11; - hash_sig_t primary_hash20, primary_hash21; - hash_sig_t secondary_hash20, secondary_hash21; - - lookup_mask = (uint64_t) -1 >> (64 - num_keys); - miss_mask = lookup_mask; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - - while (lookup_mask) { - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, - primary_bkt20, secondary_bkt20, &k_slot20, positions, - &extra_hits_mask, key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, - primary_bkt21, secondary_bkt21, &k_slot21, positions, - &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - } + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - /* ignore any items we have already found */ - extra_hits_mask &= ~hits; - - if (unlikely(extra_hits_mask)) { - /* run a single search for each remaining item */ - do { - idx = __builtin_ctzl(extra_hits_mask); - if (data != NULL) { - ret = rte_hash_lookup_with_hash_data(h, - keys[idx], hash_vals[idx], &data[idx]); - if (ret >= 0) - hits |= 1ULL << idx; - } else { - positions[idx] = rte_hash_lookup_with_hash(h, - keys[idx], hash_vals[idx]); - if (positions[idx] >= 0) - hits |= 1llu << idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; } - extra_hits_mask &= ~(1llu << idx); - } while (extra_hits_mask); - } + sec_hitmask[i] &= ~(1 << (hit_index)); + } - miss_mask &= ~hits; - if (unlikely(miss_mask)) { - do { - idx = __builtin_ctzl(miss_mask); - positions[idx] = -ENOENT; - miss_mask &= ~(1llu << idx); - } while (miss_mask); +next_key: + continue; } if (hit_mask != NULL) @@ -1223,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 idx = *next % RTE_HASH_BUCKET_ENTRIES; /* If current position is empty, go to the next one */ - while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) { + while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) { (*next)++; /* End of table */ if (*next == total_entries) diff --git a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h index 6c76700f..1b8ffed8 100644 --- a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h +++ b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h @@ -130,14 +130,18 @@ enum add_key_case { }; /** Number of items per bucket. */ -#define RTE_HASH_BUCKET_ENTRIES 4 +#define RTE_HASH_BUCKET_ENTRIES 8 #define NULL_SIGNATURE 0 +#define EMPTY_SLOT 0 + #define KEY_ALIGNMENT 16 #define LCORE_CACHE_SIZE 64 +#define RTE_HASH_MAX_PUSHES 100 + #define RTE_HASH_BFS_QUEUE_MAX_LEN 1000 #define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4 @@ -149,17 +153,6 @@ struct lcore_cache { void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ } __rte_cache_aligned; -/* Structure storing both primary and secondary hashes */ -struct rte_hash_signatures { - union { - struct { - hash_sig_t current; - hash_sig_t alt; - }; - uint64_t sig; - }; -}; - /* Structure that stores key-value pair */ struct rte_hash_key { union { @@ -170,11 +163,22 @@ struct rte_hash_key { char key[0]; } __attribute__((aligned(KEY_ALIGNMENT))); +/* All different signature compare functions */ +enum rte_hash_sig_compare_function { + RTE_HASH_COMPARE_SCALAR = 0, + RTE_HASH_COMPARE_SSE, + RTE_HASH_COMPARE_AVX2, + RTE_HASH_COMPARE_NUM +}; + /** Bucket structure */ struct rte_hash_bucket { - struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES]; - /* Includes dummy key index that always contains index 0 */ - uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1]; + hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES]; + + uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES]; + + hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES]; + uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; } __rte_cache_aligned; @@ -183,30 +187,38 @@ struct rte_hash { char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */ uint32_t entries; /**< Total table entries. */ uint32_t num_buckets; /**< Number of buckets in table. */ - uint32_t key_len; /**< Length of hash key. */ + + struct rte_ring *free_slots; + /**< Ring that stores all indexes of the free slots in the key table */ + uint8_t hw_trans_mem_support; + /**< Hardware transactional memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ + enum add_key_case add_key; /**< Multi-writer hash add behavior */ + + rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + + /* Fields used in lookup */ + + uint32_t key_len __rte_cache_aligned; + /**< Length of hash key. */ rte_hash_function hash_func; /**< Function used to calculate hash. */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< Custom function used to compare keys. */ enum cmp_jump_table_case cmp_jump_table_idx; /**< Indicates which compare function to use. */ - uint32_t bucket_bitmask; /**< Bitmask for getting bucket index - from hash signature. */ + enum rte_hash_sig_compare_function sig_cmp_fn; + /**< Indicates which signature compare function to use. */ + uint32_t bucket_bitmask; + /**< Bitmask for getting bucket index from hash signature. */ uint32_t key_entry_size; /**< Size of each key entry. */ - struct rte_ring *free_slots; /**< Ring that stores all indexes - of the free slots in the key table */ void *key_store; /**< Table storing all keys and data */ - struct rte_hash_bucket *buckets; /**< Table with buckets storing all the - hash values and key indexes - to the key table*/ - uint8_t hw_trans_mem_support; /**< Hardware transactional - memory support */ - struct lcore_cache *local_free_slots; - /**< Local cache per lcore, storing some indexes of the free slots */ - enum add_key_case add_key; /**< Multi-writer hash add behavior */ - - rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + struct rte_hash_bucket *buckets; + /**< Table with buckets storing all the hash values and key indexes + * to the key table. + */ } __rte_cache_aligned; struct queue_node { diff --git a/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h b/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h index fa5630b7..0c94244a 100644 --- a/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h +++ b/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h @@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt, */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == - NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, prev_slot = curr_node->prev_slot; prev_alt_bkt_idx - = prev_bkt->signatures[prev_slot].alt + = prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask; if (unlikely(&h->buckets[prev_alt_bkt_idx] @@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, * Cuckoo insert to move elements back to its * primary bucket if available */ - curr_bkt->signatures[curr_slot].alt = - prev_bkt->signatures[prev_slot].current; - curr_bkt->signatures[curr_slot].current = - prev_bkt->signatures[prev_slot].alt; + curr_bkt->sig_alt[curr_slot] = + prev_bkt->sig_current[prev_slot]; + curr_bkt->sig_current[curr_slot] = + prev_bkt->sig_alt[prev_slot]; curr_bkt->key_idx[curr_slot] = prev_bkt->key_idx[prev_slot]; @@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, curr_bkt = curr_node->bkt; } - curr_bkt->signatures[curr_slot].current = sig; - curr_bkt->signatures[curr_slot].alt = alt_hash; + curr_bkt->sig_current[curr_slot] = sig; + curr_bkt->sig_alt[curr_slot] = alt_hash; curr_bkt->key_idx[curr_slot] = new_idx; rte_xend(); @@ -168,10 +167,11 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, /* Cuckoo bfs Search */ while (likely(tail != head && head < - queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) { + queue + RTE_HASH_BFS_QUEUE_MAX_LEN - + RTE_HASH_BUCKET_ENTRIES)) { curr_bkt = tail->bkt; for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) { + if (curr_bkt->key_idx[i] == EMPTY_SLOT) { if (likely(rte_hash_cuckoo_move_insert_mw_tm(h, tail, i, sig, alt_hash, new_idx) == 0)) @@ -179,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, } /* Enqueue new node and keep prev node info */ - alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt + alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] & h->bucket_bitmask]); head->bkt = alt_bkt; head->prev = tail; diff --git a/src/dpdk/lib/librte_hash/rte_fbk_hash.h b/src/dpdk/lib/librte_hash/rte_fbk_hash.h index a430961d..bd46048f 100644 --- a/src/dpdk/lib/librte_hash/rte_fbk_hash.h +++ b/src/dpdk/lib/librte_hash/rte_fbk_hash.h @@ -115,7 +115,7 @@ struct rte_fbk_hash_table { uint32_t init_val; /**< For initialising hash function. */ /** A flat table of all buckets. */ - union rte_fbk_hash_entry t[0]; + union rte_fbk_hash_entry t[]; }; /** diff --git a/src/dpdk/lib/librte_hash/rte_thash.h b/src/dpdk/lib/librte_hash/rte_thash.h index d98e98e7..a4886a8c 100644 --- a/src/dpdk/lib/librte_hash/rte_thash.h +++ b/src/dpdk/lib/librte_hash/rte_thash.h @@ -54,6 +54,7 @@ extern "C" { #include <stdint.h> #include <rte_byteorder.h> #include <rte_ip.h> +#include <rte_common.h> #ifdef __SSE3__ #include <rte_vect.h> @@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = { struct rte_ipv4_tuple { uint32_t src_addr; uint32_t dst_addr; + RTE_STD_C11 union { struct { uint16_t dport; @@ -119,6 +121,7 @@ struct rte_ipv4_tuple { struct rte_ipv6_tuple { uint8_t src_addr[16]; uint8_t dst_addr[16]; + RTE_STD_C11 union { struct { uint16_t dport; diff --git a/src/dpdk/lib/librte_kvargs/rte_kvargs.c b/src/dpdk/lib/librte_kvargs/rte_kvargs.c index 8d56abd4..854ac83f 100644 --- a/src/dpdk/lib/librte_kvargs/rte_kvargs.c +++ b/src/dpdk/lib/librte_kvargs/rte_kvargs.c @@ -92,9 +92,9 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) * into a list of valid keys. */ static int -is_valid_key(const char *valid[], const char *key_match) +is_valid_key(const char * const valid[], const char *key_match) { - const char **valid_ptr; + const char * const *valid_ptr; for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) { if (strcmp(key_match, *valid_ptr) == 0) @@ -109,7 +109,7 @@ is_valid_key(const char *valid[], const char *key_match) */ static int check_for_valid_keys(struct rte_kvargs *kvlist, - const char *valid[]) + const char * const valid[]) { unsigned i, ret; struct rte_kvargs_pair *pair; @@ -187,7 +187,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist) * check if only valid keys were used. */ struct rte_kvargs * -rte_kvargs_parse(const char *args, const char *valid_keys[]) +rte_kvargs_parse(const char *args, const char * const valid_keys[]) { struct rte_kvargs *kvlist; diff --git a/src/dpdk/lib/librte_kvargs/rte_kvargs.h b/src/dpdk/lib/librte_kvargs/rte_kvargs.h index ae9ae79f..5821c726 100644 --- a/src/dpdk/lib/librte_kvargs/rte_kvargs.h +++ b/src/dpdk/lib/librte_kvargs/rte_kvargs.h @@ -97,7 +97,8 @@ struct rte_kvargs { * - A pointer to an allocated rte_kvargs structure on success * - NULL on error */ -struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]); +struct rte_kvargs *rte_kvargs_parse(const char *args, + const char *const valid_keys[]); /** * Free a rte_kvargs structure diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf.c b/src/dpdk/lib/librte_mbuf/rte_mbuf.c index 4846b897..72ad91e4 100644 --- a/src/dpdk/lib/librte_mbuf/rte_mbuf.c +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf.c @@ -53,12 +53,12 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_mempool.h> #include <rte_mbuf.h> #include <rte_string_fns.h> #include <rte_hexdump.h> #include <rte_errno.h> +#include <rte_memcpy.h> /* * ctrlmbuf constructor, given as a callback function to @@ -174,10 +174,12 @@ rte_pktmbuf_pool_create(const char *name, unsigned n, if (mp == NULL) return NULL; - rte_errno = rte_mempool_set_ops_byname(mp, - RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); - if (rte_errno != 0) { + ret = rte_mempool_set_ops_byname(mp, + RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); + if (ret != 0) { RTE_LOG(ERR, MBUF, "error setting mempool handler\n"); + rte_mempool_free(mp); + rte_errno = -ret; return NULL; } rte_pktmbuf_pool_init(mp, &mbp_priv); @@ -262,6 +264,40 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) } } +/* read len data bytes in a mbuf at specified offset (internal) */ +const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, + uint32_t len, void *buf) +{ + const struct rte_mbuf *seg = m; + uint32_t buf_off = 0, copy_len; + + if (off + len > rte_pktmbuf_pkt_len(m)) + return NULL; + + while (off >= rte_pktmbuf_data_len(seg)) { + off -= rte_pktmbuf_data_len(seg); + seg = seg->next; + } + + if (off + len <= rte_pktmbuf_data_len(seg)) + return rte_pktmbuf_mtod_offset(seg, char *, off); + + /* rare case: header is split among several segments */ + while (len > 0) { + copy_len = rte_pktmbuf_data_len(seg) - off; + if (copy_len > len) + copy_len = len; + rte_memcpy((char *)buf + buf_off, + rte_pktmbuf_mtod_offset(seg, char *, off), copy_len); + off = 0; + buf_off += copy_len; + len -= copy_len; + seg = seg->next; + } + + return buf; +} + /* * Get the name of a RX offload flag. Must be kept synchronized with flag * definitions in rte_mbuf.h. @@ -273,16 +309,78 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask) case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH"; case PKT_RX_FDIR: return "PKT_RX_FDIR"; case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD"; + case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD"; + case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE"; case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD"; + case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD"; + case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE"; case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD"; case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED"; case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP"; case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST"; case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED"; + case PKT_RX_LRO: return "PKT_RX_LRO"; default: return NULL; } } +struct flag_mask { + uint64_t flag; + uint64_t mask; + const char *default_name; +}; + +/* write the list of rx ol flags in buffer buf */ +int +rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) +{ + const struct flag_mask rx_flags[] = { + { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL }, + { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL }, + { PKT_RX_FDIR, PKT_RX_FDIR, NULL }, + { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK, + "PKT_RX_L4_CKSUM_UNKNOWN" }, + { PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK, + "PKT_RX_IP_CKSUM_UNKNOWN" }, + { PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL }, + { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL }, + { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL }, + { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL }, + { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL }, + { PKT_RX_LRO, PKT_RX_LRO, NULL }, + }; + const char *name; + unsigned int i; + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + for (i = 0; i < RTE_DIM(rx_flags); i++) { + if ((mask & rx_flags[i].mask) != rx_flags[i].flag) + continue; + name = rte_get_rx_ol_flag_name(rx_flags[i].flag); + if (name == NULL) + name = rx_flags[i].default_name; + ret = snprintf(buf, buflen, "%s ", name); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} + /* * Get the name of a TX offload flag. Must be kept synchronized with flag * definitions in rte_mbuf.h. @@ -302,6 +400,65 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM"; case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4"; case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6"; + case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN"; + case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE"; + case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; + case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; + case PKT_TX_MACSEC: return "PKT_TX_MACSEC"; default: return NULL; } } + +/* write the list of tx ol flags in buffer buf */ +int +rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) +{ + const struct flag_mask tx_flags[] = { + { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL }, + { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL }, + { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" }, + { PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL }, + { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL }, + { PKT_TX_IPV4, PKT_TX_IPV4, NULL }, + { PKT_TX_IPV6, PKT_TX_IPV6, NULL }, + { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL }, + { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL }, + { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL }, + { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL }, + }; + const char *name; + unsigned int i; + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + for (i = 0; i < RTE_DIM(tx_flags); i++) { + if ((mask & tx_flags[i].mask) != tx_flags[i].flag) + continue; + name = rte_get_tx_ol_flag_name(tx_flags[i].flag); + if (name == NULL) + name = tx_flags[i].default_name; + ret = snprintf(buf, buflen, "%s ", name); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf.h b/src/dpdk/lib/librte_mbuf/rte_mbuf.h index 9e607992..0d01167c 100644 --- a/src/dpdk/lib/librte_mbuf/rte_mbuf.h +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf.h @@ -44,7 +44,7 @@ * buffers. The message buffers are stored in a mempool, using the * RTE mempool library. * - * This library provide an API to allocate/free packet mbufs, which are + * This library provides an API to allocate/free packet mbufs, which are * used to carry network packets. * * To understand the concepts of packet buffers or mbufs, you @@ -60,6 +60,7 @@ #include <rte_atomic.h> #include <rte_prefetch.h> #include <rte_branch_prediction.h> +#include <rte_mbuf_ptype.h> #ifdef __cplusplus extern "C" { @@ -90,8 +91,25 @@ extern "C" { #define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */ #define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */ -#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */ -#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */ + +/** + * Deprecated. + * Checking this flag alone is deprecated: check the 2 bits of + * PKT_RX_L4_CKSUM_MASK. + * This flag was set when the L4 checksum of a packet was detected as + * wrong by the hardware. + */ +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) + +/** + * Deprecated. + * Checking this flag alone is deprecated: check the 2 bits of + * PKT_RX_IP_CKSUM_MASK. + * This flag was set when the IP checksum of a packet was detected as + * wrong by the hardware. + */ +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) + #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */ /** @@ -101,7 +119,35 @@ extern "C" { */ #define PKT_RX_VLAN_STRIPPED (1ULL << 6) -/* hole, some bits can be reused here */ +/** + * Mask of bits used to determine the status of RX IP checksum. + * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum + * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong + * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid + * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet + * data, but the integrity of the IP header is verified. + */ +#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7)) + +#define PKT_RX_IP_CKSUM_UNKNOWN 0 +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7)) + +/** + * Mask of bits used to determine the status of RX L4 checksum. + * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum + * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong + * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid + * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet + * data, but the integrity of the L4 data is verified. + */ +#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8)) + +#define PKT_RX_L4_CKSUM_UNKNOWN 0 +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) +#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) +#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8)) #define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */ #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/ @@ -124,11 +170,36 @@ extern "C" { */ #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED +/** + * When packets are coalesced by a hardware or virtual driver, this flag + * can be set in the RX mbuf, meaning that the m->tso_segsz field is + * valid and is set to the segment size of original packets. + */ +#define PKT_RX_LRO (1ULL << 16) + /* add new RX flags here */ /* add new TX flags here */ /** + * Offload the MACsec. This flag must be set by the application to enable + * this offload feature for a packet to be transmitted. + */ +#define PKT_TX_MACSEC (1ULL << 44) + +/** + * Bits 45:48 used for the tunnel type. + * When doing Tx offload like TSO or checksum, the HW needs to configure the + * tunnel type into the HW descriptors. + */ +#define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45) +#define PKT_TX_TUNNEL_GRE (0x2ULL << 45) +#define PKT_TX_TUNNEL_IPIP (0x3ULL << 45) +#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) +/* add new TX TUNNEL type here */ +#define PKT_TX_TUNNEL_MASK (0xFULL << 45) + +/** * Second VLAN insertion (QinQ) flag. */ #define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */ @@ -218,6 +289,19 @@ extern "C" { */ #define PKT_TX_OUTER_IPV6 (1ULL << 60) +/** + * Bitmask of all supported packet Tx offload features flags, + * which can be set for packet. + */ +#define PKT_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG | \ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT | \ + PKT_TX_TUNNEL_MASK) + #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ @@ -225,500 +309,6 @@ extern "C" { /* Use final bit of flags to indicate a control mbuf */ #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ -/* - * 32 bits are divided into several fields to mark packet types. Note that - * each field is indexical. - * - Bit 3:0 is for L2 types. - * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. - * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. - * - Bit 15:12 is for tunnel types. - * - Bit 19:16 is for inner L2 types. - * - Bit 23:20 is for inner L3 types. - * - Bit 27:24 is for inner L4 types. - * - Bit 31:28 is reserved. - * - * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, - * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP - * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. - * - * Note that L3 types values are selected for checking IPV4/IPV6 header from - * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and - * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. - * - * Note that the packet types of the same packet recognized by different - * hardware may be different, as different hardware may have different - * capability of packet type recognition. - * - * examples: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=0x29 - * | 'version'=6, 'next header'=0x3A - * | 'ICMPv6 header'> - * will be recognized on i40e hardware as packet type combination of, - * RTE_PTYPE_L2_ETHER | - * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - * RTE_PTYPE_TUNNEL_IP | - * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_INNER_L4_ICMP. - * - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x2F - * | 'GRE header' - * | 'version'=6, 'next header'=0x11 - * | 'UDP header'> - * will be recognized on i40e hardware as packet type combination of, - * RTE_PTYPE_L2_ETHER | - * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_TUNNEL_GRENAT | - * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_INNER_L4_UDP. - */ -#define RTE_PTYPE_UNKNOWN 0x00000000 -/** - * Ethernet packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=[0x0800|0x86DD]> - */ -#define RTE_PTYPE_L2_ETHER 0x00000001 -/** - * Ethernet packet type for time sync. - * - * Packet format: - * <'ether type'=0x88F7> - */ -#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 -/** - * ARP (Address Resolution Protocol) packet type. - * - * Packet format: - * <'ether type'=0x0806> - */ -#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 -/** - * LLDP (Link Layer Discovery Protocol) packet type. - * - * Packet format: - * <'ether type'=0x88CC> - */ -#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 -/** - * NSH (Network Service Header) packet type. - * - * Packet format: - * <'ether type'=0x894F> - */ -#define RTE_PTYPE_L2_ETHER_NSH 0x00000005 -/** - * Mask of layer 2 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L2_MASK 0x0000000f -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and does not contain any - * header option. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=5> - */ -#define RTE_PTYPE_L3_IPV4 0x00000010 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and contains header - * options. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[6-15], 'options'> - */ -#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and does not contain any - * extension header. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x3B> - */ -#define RTE_PTYPE_L3_IPV6 0x00000040 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and may or maynot contain - * header options. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[5-15], <'options'>> - */ -#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and contains extension - * headers. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * 'extension headers'> - */ -#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and may or maynot contain - * extension headers. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * <'extension headers'>> - */ -#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 -/** - * Mask of layer 3 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L3_MASK 0x000000f0 -/** - * TCP (Transmission Control Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=6, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=6> - */ -#define RTE_PTYPE_L4_TCP 0x00000100 -/** - * UDP (User Datagram Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17> - */ -#define RTE_PTYPE_L4_UDP 0x00000200 -/** - * Fragmented IP (Internet Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * It refers to those packets of any IP types, which can be recognized as - * fragmented. A fragmented packet cannot be recognized as any other L4 types - * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, - * RTE_PTYPE_L4_NONFRAG). - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'MF'=1> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=44> - */ -#define RTE_PTYPE_L4_FRAG 0x00000300 -/** - * SCTP (Stream Control Transmission Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=132, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=132> - */ -#define RTE_PTYPE_L4_SCTP 0x00000400 -/** - * ICMP (Internet Control Message Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=1, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=1> - */ -#define RTE_PTYPE_L4_ICMP 0x00000500 -/** - * Non-fragmented IP (Internet Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * It refers to those packets of any IP types, while cannot be recognized as - * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, - * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'!=[6|17|44|132|1]> - */ -#define RTE_PTYPE_L4_NONFRAG 0x00000600 -/** - * Mask of layer 4 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L4_MASK 0x00000f00 -/** - * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=[4|41]> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[4|41]> - */ -#define RTE_PTYPE_TUNNEL_IP 0x00001000 -/** - * GRE (Generic Routing Encapsulation) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=47> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=47> - */ -#define RTE_PTYPE_TUNNEL_GRE 0x00002000 -/** - * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17 - * | 'destination port'=4798> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17 - * | 'destination port'=4798> - */ -#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 -/** - * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling - * packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=47 - * | 'protocol type'=0x6558> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=47 - * | 'protocol type'=0x6558'> - */ -#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 -/** - * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17 - * | 'destination port'=6081> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17 - * | 'destination port'=6081> - */ -#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 -/** - * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area - * Network) or GRE (Generic Routing Encapsulation) could be recognized as this - * packet type, if they can not be recognized independently as of hardware - * capability. - */ -#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 -/** - * Mask of tunneling packet types. - */ -#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 -/** - * Ethernet packet type. - * It is used for inner packet type only. - * - * Packet format (inner only): - * <'ether type'=[0x800|0x86DD]> - */ -#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 -/** - * Ethernet packet type with VLAN (Virtual Local Area Network) tag. - * - * Packet format (inner only): - * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> - */ -#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 -/** - * Mask of inner layer 2 packet types. - */ -#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and does not contain any header option. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=5> - */ -#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and contains header options. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[6-15], 'options'> - */ -#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and does not contain any extension header. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x3B> - */ -#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and may or maynot contain header options. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[5-15], <'options'>> - */ -#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and contains extension headers. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * 'extension headers'> - */ -#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and may or maynot contain extension - * headers. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * <'extension headers'>> - */ -#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 -/** - * Mask of inner layer 3 packet types. - */ -#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 -/** - * TCP (Transmission Control Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=6, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=6> - */ -#define RTE_PTYPE_INNER_L4_TCP 0x01000000 -/** - * UDP (User Datagram Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17> - */ -#define RTE_PTYPE_INNER_L4_UDP 0x02000000 -/** - * Fragmented IP (Internet Protocol) packet type. - * It is used for inner packet only, and may or maynot have layer 4 packet. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'MF'=1> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=44> - */ -#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 -/** - * SCTP (Stream Control Transmission Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=132, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=132> - */ -#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 -/** - * ICMP (Internet Control Message Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=1, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=1> - */ -#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 -/** - * Non-fragmented IP (Internet Protocol) packet type. - * It is used for inner packet only, and may or maynot have other unknown layer - * 4 packet types. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'!=[6|17|44|132|1]> - */ -#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 -/** - * Mask of inner layer 4 packet types. - */ -#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 - -/** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can - * determine if it is an IPV4 packet. - */ -#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) - -/** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can - * determine if it is an IPV4 packet. - */ -#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) - -/* Check if it is a tunneling packet */ -#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \ - RTE_PTYPE_INNER_L2_MASK | \ - RTE_PTYPE_INNER_L3_MASK | \ - RTE_PTYPE_INNER_L4_MASK)) - /** Alignment constraint of mbuf private area. */ #define RTE_MBUF_PRIV_ALIGN 8 @@ -733,6 +323,20 @@ extern "C" { const char *rte_get_rx_ol_flag_name(uint64_t mask); /** + * Dump the list of RX offload flags in a buffer + * + * @param mask + * The mask describing the RX flags. + * @param buf + * The output buffer. + * @param buflen + * The length of the buffer. + * @return + * 0 on success, (-1) on error. + */ +int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); + +/** * Get the name of a TX offload flag * * @param mask @@ -745,6 +349,20 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask); const char *rte_get_tx_ol_flag_name(uint64_t mask); /** + * Dump the list of TX offload flags in a buffer + * + * @param mask + * The mask describing the TX flags. + * @param buf + * The output buffer. + * @param buflen + * The length of the buffer. + * @return + * 0 on success, (-1) on error. + */ +int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); + +/** * Some NICs need at least 2KB buffer to RX standard Ethernet frame without * splitting it into multiple segments. * So, for mbufs that planned to be involved into RX/TX, the recommended @@ -756,8 +374,11 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask); /* define a set of marker types that can be used to refer to set points in the * mbuf */ +__extension__ typedef void *MARKER[0]; /**< generic marker for a point in a structure */ +__extension__ typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */ +__extension__ typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes * with a single assignment */ @@ -784,6 +405,7 @@ struct rte_mbuf { * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC * config option. */ + RTE_STD_C11 union { rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ uint16_t refcnt; /**< Non-atomically accessed refcnt */ @@ -803,6 +425,7 @@ struct rte_mbuf { * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the * vlan is stripped from the data. */ + RTE_STD_C11 union { uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */ struct { @@ -824,6 +447,7 @@ struct rte_mbuf { union { uint32_t rss; /**< RSS hash result if RSS enabled */ struct { + RTE_STD_C11 union { struct { uint16_t hash; @@ -851,6 +475,7 @@ struct rte_mbuf { /* second cache line - fields only used in slow path or on TX */ MARKER cacheline1 __rte_cache_min_aligned; + RTE_STD_C11 union { void *userdata; /**< Can be used for external metadata */ uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */ @@ -860,10 +485,15 @@ struct rte_mbuf { struct rte_mbuf *next; /**< Next segment of scattered packet. */ /* fields to support TX offloads */ + RTE_STD_C11 union { uint64_t tx_offload; /**< combined for easy fetch */ + __extension__ struct { - uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l2_len:7; + /**< L2 (MAC) Header Length for non-tunneling pkt. + * Outer_L4_len + ... + Inner_L2_len for tunneling pkt. + */ uint64_t l3_len:9; /**< L3 (IP) Header Length. */ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ uint64_t tso_segsz:16; /**< TCP TSO segment size */ @@ -1059,9 +689,6 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) static inline uint16_t rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) { - // TREX_PATCH - The code in #if 0 caused tx queue to hang when running: - // sudo ./t-rex-64-o -f avl/sfr_delay_10_1g_no_bundeling.yaml -m 35 -p -d 100 -#if 0 /* * The atomic_add is an expensive operation, so we don't want to * call it in the case where we know we are the uniq holder of @@ -1073,7 +700,7 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) rte_mbuf_refcnt_set(m, 1 + value); return 1 + value; } -#endif + return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value)); } @@ -1160,13 +787,6 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) return m; } -/* compat with older versions */ -__rte_deprecated static inline struct rte_mbuf * -__rte_mbuf_raw_alloc(struct rte_mempool *mp) -{ - return rte_mbuf_raw_alloc(mp); -} - /** * @internal Put mbuf back into its original mempool. * The use of that function is reserved for RTE internal needs. @@ -1388,6 +1008,19 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp) } /** + * Reset the data_off field of a packet mbuf to its default value. + * + * The given mbuf must have only one segment, which should be empty. + * + * @param m + * The packet mbuf's data_off field has to be reset. + */ +static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) +{ + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); +} + +/** * Reset the fields of a packet mbuf to their default values. * * The given mbuf must have only one segment. @@ -1407,8 +1040,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) m->ol_flags = 0; m->packet_type = 0; - m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? - RTE_PKTMBUF_HEADROOM : m->buf_len; + rte_pktmbuf_reset_headroom(m); m->data_len = 0; __rte_mbuf_sanity_check(m, 1); @@ -1526,7 +1158,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) mi->buf_addr = m->buf_addr; mi->buf_len = m->buf_len; - mi->next = m->next; mi->data_off = m->data_off; mi->data_len = m->data_len; mi->port = m->port; @@ -1572,7 +1203,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) m->buf_addr = (char *)m + mbuf_size; m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; m->buf_len = (uint16_t)buf_len; - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + rte_pktmbuf_reset_headroom(m); m->data_len = 0; m->ol_flags = 0; @@ -1961,6 +1592,41 @@ static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) } /** + * @internal used by rte_pktmbuf_read(). + */ +const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, + uint32_t len, void *buf); + +/** + * Read len data bytes in a mbuf at specified offset. + * + * If the data is contiguous, return the pointer in the mbuf data, else + * copy the data in the buffer provided by the user and return its + * pointer. + * + * @param m + * The pointer to the mbuf. + * @param off + * The offset of the data in the mbuf. + * @param len + * The amount of bytes to read. + * @param buf + * The buffer where data is copied if it is not contigous in mbuf + * data. Its length should be at least equal to the len parameter. + * @return + * The pointer to the data, either in the mbuf if it is contiguous, + * or in the user buffer. If mbuf is too small, NULL is returned. + */ +static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m, + uint32_t off, uint32_t len, void *buf) +{ + if (likely(off + len <= rte_pktmbuf_data_len(m))) + return rte_pktmbuf_mtod_offset(m, char *, off); + else + return __rte_pktmbuf_read(m, off, len, buf); +} + +/** * Chain an mbuf to another, thereby creating a segmented packet. * * Note: The implementation will do a linear walk over the segments to find @@ -1999,7 +1665,109 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** - * Dump an mbuf structure to the console. + * Validate general requirements for Tx offload in mbuf. + * + * This function checks correctness and completeness of Tx offload settings. + * + * @param m + * The packet mbuf to be validated. + * @return + * 0 if packet is valid + */ +static inline int +rte_validate_tx_offload(const struct rte_mbuf *m) +{ + uint64_t ol_flags = m->ol_flags; + uint64_t inner_l3_offset = m->l2_len; + + /* Does packet set any of available offloads? */ + if (!(ol_flags & PKT_TX_OFFLOAD_MASK)) + return 0; + + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + + /* Headers are fragmented */ + if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) + return -ENOTSUP; + + /* IP checksum can be counted only for IPv4 packet */ + if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6)) + return -EINVAL; + + /* IP type not set when required */ + if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) + if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6))) + return -EINVAL; + + /* Check requirements for TSO packet */ + if (ol_flags & PKT_TX_TCP_SEG) + if ((m->tso_segsz == 0) || + ((ol_flags & PKT_TX_IPV4) && + !(ol_flags & PKT_TX_IP_CKSUM))) + return -EINVAL; + + /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */ + if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && + !(ol_flags & PKT_TX_OUTER_IPV4)) + return -EINVAL; + + return 0; +} + +/** + * Linearize data in mbuf. + * + * This function moves the mbuf data in the first segment if there is enough + * tailroom. The subsequent segments are unchained and freed. + * + * @param mbuf + * mbuf to linearize + * @return + * - 0, on success + * - -1, on error + */ +static inline int +rte_pktmbuf_linearize(struct rte_mbuf *mbuf) +{ + int seg_len, copy_len; + struct rte_mbuf *m; + struct rte_mbuf *m_next; + char *buffer; + + if (rte_pktmbuf_is_contiguous(mbuf)) + return 0; + + /* Extend first segment to the total packet length */ + copy_len = rte_pktmbuf_pkt_len(mbuf) - rte_pktmbuf_data_len(mbuf); + + if (unlikely(copy_len > rte_pktmbuf_tailroom(mbuf))) + return -1; + + buffer = rte_pktmbuf_mtod_offset(mbuf, char *, mbuf->data_len); + mbuf->data_len = (uint16_t)(mbuf->pkt_len); + + /* Append data from next segments to the first one */ + m = mbuf->next; + while (m != NULL) { + m_next = m->next; + + seg_len = rte_pktmbuf_data_len(m); + rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), seg_len); + buffer += seg_len; + + rte_pktmbuf_free_seg(m); + m = m_next; + } + + mbuf->next = NULL; + mbuf->nb_segs = 1; + + return 0; +} + +/** + * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated * segments (in the case of a chained buffer). diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c new file mode 100644 index 00000000..e5c4fae3 --- /dev/null +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_mbuf.h> +#include <rte_mbuf_ptype.h> + +/* get the name of the l2 packet type */ +const char *rte_get_ptype_l2_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L2_MASK) { + case RTE_PTYPE_L2_ETHER: return "L2_ETHER"; + case RTE_PTYPE_L2_ETHER_TIMESYNC: return "L2_ETHER_TIMESYNC"; + case RTE_PTYPE_L2_ETHER_ARP: return "L2_ETHER_ARP"; + case RTE_PTYPE_L2_ETHER_LLDP: return "L2_ETHER_LLDP"; + case RTE_PTYPE_L2_ETHER_NSH: return "L2_ETHER_NSH"; + case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN"; + case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ"; + default: return "L2_UNKNOWN"; + } +} + +/* get the name of the l3 packet type */ +const char *rte_get_ptype_l3_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L3_MASK) { + case RTE_PTYPE_L3_IPV4: return "L3_IPV4"; + case RTE_PTYPE_L3_IPV4_EXT: return "L3_IPV4_EXT"; + case RTE_PTYPE_L3_IPV6: return "L3_IPV6"; + case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: return "L3_IPV4_EXT_UNKNOWN"; + case RTE_PTYPE_L3_IPV6_EXT: return "L3_IPV6_EXT"; + case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: return "L3_IPV6_EXT_UNKNOWN"; + default: return "L3_UNKNOWN"; + } +} + +/* get the name of the l4 packet type */ +const char *rte_get_ptype_l4_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L4_MASK) { + case RTE_PTYPE_L4_TCP: return "L4_TCP"; + case RTE_PTYPE_L4_UDP: return "L4_UDP"; + case RTE_PTYPE_L4_FRAG: return "L4_FRAG"; + case RTE_PTYPE_L4_SCTP: return "L4_SCTP"; + case RTE_PTYPE_L4_ICMP: return "L4_ICMP"; + case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG"; + default: return "L4_UNKNOWN"; + } +} + +/* get the name of the tunnel packet type */ +const char *rte_get_ptype_tunnel_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_TUNNEL_MASK) { + case RTE_PTYPE_TUNNEL_IP: return "TUNNEL_IP"; + case RTE_PTYPE_TUNNEL_GRE: return "TUNNEL_GRE"; + case RTE_PTYPE_TUNNEL_VXLAN: return "TUNNEL_VXLAN"; + case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE"; + case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE"; + case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT"; + default: return "TUNNEL_UNKNOWN"; + } +} + +/* get the name of the inner_l2 packet type */ +const char *rte_get_ptype_inner_l2_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L2_MASK) { + case RTE_PTYPE_INNER_L2_ETHER: return "INNER_L2_ETHER"; + case RTE_PTYPE_INNER_L2_ETHER_VLAN: return "INNER_L2_ETHER_VLAN"; + case RTE_PTYPE_INNER_L2_ETHER_QINQ: return "INNER_L2_ETHER_QINQ"; + default: return "INNER_L2_UNKNOWN"; + } +} + +/* get the name of the inner_l3 packet type */ +const char *rte_get_ptype_inner_l3_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L3_MASK) { + case RTE_PTYPE_INNER_L3_IPV4: return "INNER_L3_IPV4"; + case RTE_PTYPE_INNER_L3_IPV4_EXT: return "INNER_L3_IPV4_EXT"; + case RTE_PTYPE_INNER_L3_IPV6: return "INNER_L3_IPV6"; + case RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN: + return "INNER_L3_IPV4_EXT_UNKNOWN"; + case RTE_PTYPE_INNER_L3_IPV6_EXT: return "INNER_L3_IPV6_EXT"; + case RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN: + return "INNER_L3_IPV6_EXT_UNKNOWN"; + default: return "INNER_L3_UNKNOWN"; + } +} + +/* get the name of the inner_l4 packet type */ +const char *rte_get_ptype_inner_l4_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L4_MASK) { + case RTE_PTYPE_INNER_L4_TCP: return "INNER_L4_TCP"; + case RTE_PTYPE_INNER_L4_UDP: return "INNER_L4_UDP"; + case RTE_PTYPE_INNER_L4_FRAG: return "INNER_L4_FRAG"; + case RTE_PTYPE_INNER_L4_SCTP: return "INNER_L4_SCTP"; + case RTE_PTYPE_INNER_L4_ICMP: return "INNER_L4_ICMP"; + case RTE_PTYPE_INNER_L4_NONFRAG: return "INNER_L4_NONFRAG"; + default: return "INNER_L4_UNKNOWN"; + } +} + +/* write the packet type name into the buffer */ +int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen) +{ + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) { + ret = snprintf(buf, buflen, "UNKNOWN"); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + return 0; + } + + if ((ptype & RTE_PTYPE_L2_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l2_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_L3_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l3_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_L4_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l4_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_tunnel_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l2_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l3_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l4_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h new file mode 100644 index 00000000..ff6de9d1 --- /dev/null +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h @@ -0,0 +1,668 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. + * Copyright 2014-2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MBUF_PTYPE_H_ +#define _RTE_MBUF_PTYPE_H_ + +/** + * @file + * RTE Mbuf Packet Types + * + * This file contains declarations for features related to mbuf packet + * types. The packet type gives information about the data carried by the + * mbuf, and is stored in the mbuf in a 32 bits field. + * + * The 32 bits are divided into several fields to mark packet types. Note that + * each field is indexical. + * - Bit 3:0 is for L2 types. + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. + * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. + * - Bit 15:12 is for tunnel types. + * - Bit 19:16 is for inner L2 types. + * - Bit 23:20 is for inner L3 types. + * - Bit 27:24 is for inner L4 types. + * - Bit 31:28 is reserved. + * + * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. + * + * Note that L3 types values are selected for checking IPV4/IPV6 header from + * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and + * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. + * + * Note that the packet types of the same packet recognized by different + * hardware may be different, as different hardware may have different + * capability of packet type recognition. + * + * examples: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=0x29 + * | 'version'=6, 'next header'=0x3A + * | 'ICMPv6 header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_IP | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_ICMP. + * + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x2F + * | 'GRE header' + * | 'version'=6, 'next header'=0x11 + * | 'UDP header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_GRENAT | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_UDP. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * No packet type information. + */ +#define RTE_PTYPE_UNKNOWN 0x00000000 +/** + * Ethernet packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=[0x0800|0x86DD]> + */ +#define RTE_PTYPE_L2_ETHER 0x00000001 +/** + * Ethernet packet type for time sync. + * + * Packet format: + * <'ether type'=0x88F7> + */ +#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 +/** + * ARP (Address Resolution Protocol) packet type. + * + * Packet format: + * <'ether type'=0x0806> + */ +#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 +/** + * LLDP (Link Layer Discovery Protocol) packet type. + * + * Packet format: + * <'ether type'=0x88CC> + */ +#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 +/** + * NSH (Network Service Header) packet type. + * + * Packet format: + * <'ether type'=0x894F> + */ +#define RTE_PTYPE_L2_ETHER_NSH 0x00000005 +/** + * VLAN packet type. + * + * Packet format: + * <'ether type'=[0x8100]> + */ +#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006 +/** + * QinQ packet type. + * + * Packet format: + * <'ether type'=[0x88A8]> + */ +#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007 +/** + * Mask of layer 2 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L2_MASK 0x0000000f +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * header option. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_L3_IPV4 0x00000010 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and contains header + * options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * extension header. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_L3_IPV6 0x00000040 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * header options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and contains extension + * headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * extension headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 +/** + * Mask of layer 3 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L3_MASK 0x000000f0 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_L4_TCP 0x00000100 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_L4_UDP 0x00000200 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, which can be recognized as + * fragmented. A fragmented packet cannot be recognized as any other L4 types + * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, + * RTE_PTYPE_L4_NONFRAG). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'frag_offset'!=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_L4_FRAG 0x00000300 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_L4_SCTP 0x00000400 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_L4_ICMP 0x00000500 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, while cannot be recognized as + * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, + * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_L4_NONFRAG 0x00000600 +/** + * Mask of layer 4 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L4_MASK 0x00000f00 +/** + * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=[4|41]> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[4|41]> + */ +#define RTE_PTYPE_TUNNEL_IP 0x00001000 +/** + * GRE (Generic Routing Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47> + */ +#define RTE_PTYPE_TUNNEL_GRE 0x00002000 +/** + * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=4798> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=4798> + */ +#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 +/** + * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling + * packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol type'=0x6558> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47 + * | 'protocol type'=0x6558'> + */ +#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 +/** + * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=6081> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=6081> + */ +#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 +/** + * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area + * Network) or GRE (Generic Routing Encapsulation) could be recognized as this + * packet type, if they can not be recognized independently as of hardware + * capability. + */ +#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 +/** + * Mask of tunneling packet types. + */ +#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 +/** + * Ethernet packet type. + * It is used for inner packet type only. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD]> + */ +#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 +/** + * Ethernet packet type with VLAN (Virtual Local Area Network) tag. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 +/** + * QinQ packet type. + * + * Packet format: + * <'ether type'=[0x88A8]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_QINQ 0x00030000 +/** + * Mask of inner layer 2 packet types. + */ +#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and does not contain any header option. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and contains header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and does not contain any extension header. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and may or maynot contain header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and contains extension headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and may or maynot contain extension + * headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 +/** + * Mask of inner layer 3 packet types. + */ +#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_INNER_L4_TCP 0x01000000 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_INNER_L4_UDP 0x02000000 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have layer 4 packet. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'frag_offset'!=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have other unknown layer + * 4 packet types. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 +/** + * Mask of inner layer 4 packet types. + */ +#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 +/** + * All valid layer masks. + */ +#define RTE_PTYPE_ALL_MASK 0x0fffffff + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) + +/* Check if it is a tunneling packet */ +#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & \ + (RTE_PTYPE_TUNNEL_MASK | \ + RTE_PTYPE_INNER_L2_MASK | \ + RTE_PTYPE_INNER_L3_MASK | \ + RTE_PTYPE_INNER_L4_MASK)) + +/** + * Get the name of the l2 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l2_name(uint32_t ptype); + +/** + * Get the name of the l3 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l3_name(uint32_t ptype); + +/** + * Get the name of the l4 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l4_name(uint32_t ptype); + +/** + * Get the name of the tunnel packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_tunnel_name(uint32_t ptype); + +/** + * Get the name of the inner_l2 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l2_name(uint32_t ptype); + +/** + * Get the name of the inner_l3 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l3_name(uint32_t ptype); + +/** + * Get the name of the inner_l4 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l4_name(uint32_t ptype); + +/** + * Write the packet type name into the buffer + * + * @param ptype + * The packet type value. + * @param buf + * The buffer where the string is written. + * @param buflen + * The length of the buffer. + * @return + * - 0 on success + * - (-1) if the buffer is too small + */ +int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MBUF_PTYPE_H_ */ diff --git a/src/dpdk/lib/librte_mempool/rte_mempool.c b/src/dpdk/lib/librte_mempool/rte_mempool.c index 2e28e2e8..1c2aed8c 100644 --- a/src/dpdk/lib/librte_mempool/rte_mempool.c +++ b/src/dpdk/lib/librte_mempool/rte_mempool.c @@ -55,7 +55,6 @@ #include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_errno.h> #include <rte_string_fns.h> #include <rte_spinlock.h> @@ -429,7 +428,7 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, /* populate with the largest group of contiguous pages */ for (n = 1; (i + n) < pg_num && - paddr[i] + pg_sz == paddr[i+n]; n++) + paddr[i + n - 1] + pg_sz == paddr[i + n]; n++) ; ret = rte_mempool_populate_phys(mp, vaddr + i * pg_sz, @@ -579,8 +578,10 @@ rte_mempool_populate_default(struct rte_mempool *mp) mz->len, pg_sz, rte_mempool_memchunk_mz_free, (void *)(uintptr_t)mz); - if (ret < 0) + if (ret < 0) { + rte_memzone_free(mz); goto fail; + } } return mp->size; @@ -817,7 +818,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size, goto exit_unlock; } mp->mz = mz; - mp->socket_id = socket_id; mp->size = n; mp->flags = flags; mp->socket_id = socket_id; @@ -879,7 +879,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, * Since we have 4 combinations of the SP/SC/MP/MC examine the flags to * set the correct index into the table of ops structs. */ - if (flags & (MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) + if ((flags & MEMPOOL_F_SP_PUT) && (flags & MEMPOOL_F_SC_GET)) rte_mempool_set_ops_byname(mp, "ring_sp_sc", NULL); else if (flags & MEMPOOL_F_SP_PUT) rte_mempool_set_ops_byname(mp, "ring_sp_mc", NULL); @@ -909,9 +909,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, /* * Create the mempool over already allocated chunk of memory. * That external memory buffer can consists of physically disjoint pages. - * Setting vaddr to NULL, makes mempool to fallback to original behaviour - * and allocate space for mempool and it's elements as one big chunk of - * physically continuos memory. + * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create() + * behavior. */ struct rte_mempool * rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, diff --git a/src/dpdk/lib/librte_mempool/rte_mempool.h b/src/dpdk/lib/librte_mempool/rte_mempool.h index 059ad9e5..d0f5b27a 100644 --- a/src/dpdk/lib/librte_mempool/rte_mempool.h +++ b/src/dpdk/lib/librte_mempool/rte_mempool.h @@ -75,6 +75,7 @@ #include <rte_branch_prediction.h> #include <rte_ring.h> #include <rte_memcpy.h> +#include <rte_common.h> #ifdef __cplusplus extern "C" { @@ -216,6 +217,7 @@ struct rte_mempool { * RTE_MEMPOOL_NAMESIZE next time the ABI changes */ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of mempool. */ + RTE_STD_C11 union { void *pool_data; /**< Ring or pool to store objects. */ uint64_t pool_id; /**< External mempool identifier. */ @@ -355,7 +357,7 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp, * Prototype for implementation specific data provisioning function. * * The function should provide the implementation specific memory for - * for use by the other mempool ops functions in a given mempool ops struct. + * use by the other mempool ops functions in a given mempool ops struct. * E.g. the default ops provides an instance of the rte_ring for this purpose. * it will most likely point to a different type of data structure, and * will be transparent to the application programmer. @@ -549,7 +551,7 @@ int rte_mempool_register_ops(const struct rte_mempool_ops *ops); /** * Macro to statically register the ops of a mempool handler. * Note that the rte_mempool_register_ops fails silently here when - * more then RTE_MEMPOOL_MAX_OPS_IDX is registered. + * more than RTE_MEMPOOL_MAX_OPS_IDX is registered. */ #define MEMPOOL_REGISTER_OPS(ops) \ void mp_hdlr_init_##ops(void); \ @@ -587,10 +589,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); /** * Create a new mempool named *name* in memory. * - * This function uses ``memzone_reserve()`` to allocate memory. The + * This function uses ``rte_memzone_reserve()`` to allocate memory. The * pool contains n elements of elt_size. Its size is set to n. - * All elements of the mempool are allocated together with the mempool header, - * in one physically continuous chunk of memory. * * @param name * The name of the mempool. @@ -610,9 +610,7 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); * never be used. The access to the per-lcore table is of course * faster than the multi-producer/consumer pool. The cache can be * disabled if the cache_size argument is set to 0; it can be useful to - * avoid losing objects in cache. Note that even if not used, the - * memory space for cache is always reserved in a mempool structure, - * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * avoid losing objects in cache. * @param private_data_size * The size of the private data appended after the mempool * structure. This is useful for storing some private data after the @@ -746,7 +744,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, * * The mempool is allocated and initialized, but it is not populated: no * memory is allocated for the mempool elements. The user has to call - * rte_mempool_populate_*() or to add memory chunks to the pool. Once + * rte_mempool_populate_*() to add memory chunks to the pool. Once * populated, the user may also want to initialize each object with * rte_mempool_obj_iter(). * @@ -798,6 +796,10 @@ rte_mempool_free(struct rte_mempool *mp); * Add a virtually and physically contiguous memory chunk in the pool * where objects can be instanciated. * + * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR), + * the chunk doesn't need to be physically contiguous (only virtually), + * and allocated objects may span two pages. + * * @param mp * A pointer to the mempool structure. * @param vaddr @@ -946,7 +948,7 @@ uint32_t rte_mempool_mem_iter(struct rte_mempool *mp, rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg); /** - * Dump the status of the mempool to the console. + * Dump the status of the mempool to a file. * * @param f * A pointer to a file for output @@ -1036,19 +1038,15 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) */ static inline void __attribute__((always_inline)) __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache) { void **cache_objs; /* increment stat now, adding in mempool always success */ __MEMPOOL_STAT_ADD(mp, put, n); - /* No cache provided or single producer */ - if (unlikely(cache == NULL || flags & MEMPOOL_F_SP_PUT)) - goto ring_enqueue; - - /* Go straight to ring if put would overflow mem allocated for cache */ - if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) + /* No cache provided or if put would overflow mem allocated for cache */ + if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE)) goto ring_enqueue; cache_objs = &cache->objs[cache->len]; @@ -1102,10 +1100,11 @@ ring_enqueue: */ static inline void __attribute__((always_inline)) rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache, + __rte_unused int flags) { __mempool_check_cookies(mp, obj_table, n, 0); - __mempool_generic_put(mp, obj_table, n, cache, flags); + __mempool_generic_put(mp, obj_table, n, cache); } /** @@ -1242,15 +1241,14 @@ rte_mempool_put(struct rte_mempool *mp, void *obj) */ static inline int __attribute__((always_inline)) __mempool_generic_get(struct rte_mempool *mp, void **obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache) { int ret; uint32_t index, len; void **cache_objs; - /* No cache provided or single consumer */ - if (unlikely(cache == NULL || flags & MEMPOOL_F_SC_GET || - n >= cache->size)) + /* No cache provided or cannot be satisfied from cache */ + if (unlikely(cache == NULL || n >= cache->size)) goto ring_dequeue; cache_objs = cache->objs; @@ -1324,10 +1322,10 @@ ring_dequeue: */ static inline int __attribute__((always_inline)) rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, - struct rte_mempool_cache *cache, int flags) + struct rte_mempool_cache *cache, __rte_unused int flags) { int ret; - ret = __mempool_generic_get(mp, obj_table, n, cache, flags); + ret = __mempool_generic_get(mp, obj_table, n, cache); if (ret == 0) __mempool_check_cookies(mp, obj_table, n, 1); return ret; diff --git a/src/dpdk/lib/librte_mempool/rte_mempool_stack.c b/src/dpdk/lib/librte_mempool/rte_mempool_stack.c index 5fd8af24..817f77e6 100644 --- a/src/dpdk/lib/librte_mempool/rte_mempool_stack.c +++ b/src/dpdk/lib/librte_mempool/rte_mempool_stack.c @@ -118,7 +118,7 @@ stack_dequeue(struct rte_mempool *mp, void **obj_table, s->len -= n; rte_spinlock_unlock(&s->sl); - return n; + return 0; } static unsigned diff --git a/src/dpdk/lib/librte_ether/rte_ether.h b/src/dpdk/lib/librte_net/rte_ether.h index 1d62d8e5..ff3d0654 100644 --- a/src/dpdk/lib/librte_ether/rte_ether.h +++ b/src/dpdk/lib/librte_net/rte_ether.h @@ -84,7 +84,7 @@ extern "C" { * See http://standards.ieee.org/regauth/groupmac/tutorial.html */ struct ether_addr { - uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */ + uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Addr bytes in tx order */ } __attribute__((__packed__)); #define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ @@ -224,7 +224,7 @@ static inline int is_local_admin_ether_addr(const struct ether_addr *ea) */ static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) { - return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea)); + return is_unicast_ether_addr(ea) && (!is_zero_ether_addr(ea)); } /** @@ -236,7 +236,7 @@ static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) static inline void eth_random_addr(uint8_t *addr) { uint64_t rand = rte_rand(); - uint8_t *p = (uint8_t*)&rand; + uint8_t *p = (uint8_t *)&rand; rte_memcpy(addr, p, ETHER_ADDR_LEN); addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ @@ -329,6 +329,7 @@ struct vxlan_hdr { #define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ +#define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_warnings.h b/src/dpdk/lib/librte_net/rte_gre.h index 54b545c9..46568ff5 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_warnings.h +++ b/src/dpdk/lib/librte_net/rte_gre.h @@ -1,8 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. + * Copyright 2016 6WIND S.A. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,54 +30,42 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/** - * @file - * Definitions of warnings for use of various insecure functions - */ - -#ifndef _RTE_WARNINGS_H_ -#define _RTE_WARNINGS_H_ - -#ifdef RTE_INSECURE_FUNCTION_WARNING - -/* we need to include all used standard header files so that they appear - * _before_ we poison the function names. - */ +#ifndef _RTE_GRE_H_ +#define _RTE_GRE_H_ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <errno.h> -#ifdef RTE_EXEC_ENV_LINUXAPP -#include <dirent.h> -#endif - -/* the following function are deemed not fully secure for use e.g. they - * do not always null-terminate arguments */ -#pragma GCC poison sprintf strtok snprintf vsnprintf -#pragma GCC poison strlen strcpy strcat -#pragma GCC poison sscanf - -/* other unsafe functions may be implemented as macros so just undef them */ -#ifdef strsep -#undef strsep -#else -#pragma GCC poison strsep -#endif +#include <stdint.h> +#include <rte_byteorder.h> -#ifdef strncpy -#undef strncpy -#else -#pragma GCC poison strncpy +#ifdef __cplusplus +extern "C" { #endif -#ifdef strncat -#undef strncat -#else -#pragma GCC poison strncat +/** + * GRE Header + */ +struct gre_hdr { +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + uint16_t res2:4; /**< Reserved */ + uint16_t s:1; /**< Sequence Number Present bit */ + uint16_t k:1; /**< Key Present bit */ + uint16_t res1:1; /**< Reserved */ + uint16_t c:1; /**< Checksum Present bit */ + uint16_t ver:3; /**< Version Number */ + uint16_t res3:5; /**< Reserved */ +#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN + uint16_t c:1; /**< Checksum Present bit */ + uint16_t res1:1; /**< Reserved */ + uint16_t k:1; /**< Key Present bit */ + uint16_t s:1; /**< Sequence Number Present bit */ + uint16_t res2:4; /**< Reserved */ + uint16_t res3:5; /**< Reserved */ + uint16_t ver:3; /**< Version Number */ #endif + uint16_t proto; /**< Protocol Type */ +} __attribute__((__packed__)); +#ifdef __cplusplus +} #endif -#endif /* RTE_WARNINGS_H */ +#endif /* RTE_GRE_H_ */ diff --git a/src/dpdk/lib/librte_net/rte_ip.h b/src/dpdk/lib/librte_net/rte_ip.h index 5b7554ab..4491b86e 100644 --- a/src/dpdk/lib/librte_net/rte_ip.h +++ b/src/dpdk/lib/librte_net/rte_ip.h @@ -230,6 +230,77 @@ rte_raw_cksum(const void *buf, size_t len) } /** + * Compute the raw (non complemented) checksum of a packet. + * + * @param m + * The pointer to the mbuf. + * @param off + * The offset in bytes to start the checksum. + * @param len + * The length in bytes of the data to ckecksum. + * @param cksum + * A pointer to the checksum, filled on success. + * @return + * 0 on success, -1 on error (bad length or offset). + */ +static inline int +rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len, + uint16_t *cksum) +{ + const struct rte_mbuf *seg; + const char *buf; + uint32_t sum, tmp; + uint32_t seglen, done; + + /* easy case: all data in the first segment */ + if (off + len <= rte_pktmbuf_data_len(m)) { + *cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m, + const char *, off), len); + return 0; + } + + if (unlikely(off + len > rte_pktmbuf_pkt_len(m))) + return -1; /* invalid params, return a dummy value */ + + /* else browse the segment to find offset */ + seglen = 0; + for (seg = m; seg != NULL; seg = seg->next) { + seglen = rte_pktmbuf_data_len(seg); + if (off < seglen) + break; + off -= seglen; + } + seglen -= off; + buf = rte_pktmbuf_mtod_offset(seg, const char *, off); + if (seglen >= len) { + /* all in one segment */ + *cksum = rte_raw_cksum(buf, len); + return 0; + } + + /* hard case: process checksum of several segments */ + sum = 0; + done = 0; + for (;;) { + tmp = __rte_raw_cksum(buf, seglen, 0); + if (done & 1) + tmp = rte_bswap16(tmp); + sum += tmp; + done += seglen; + if (done == len) + break; + seg = seg->next; + buf = rte_pktmbuf_mtod(seg, const char *); + seglen = rte_pktmbuf_data_len(seg); + if (seglen > len - done) + seglen = len - done; + } + + *cksum = __rte_raw_cksum_reduce(sum); + return 0; +} + +/** * Process the IPv4 checksum of an IPv4 header. * * The checksum field must be set to 0 by the caller. diff --git a/src/dpdk/lib/librte_net/rte_net.c b/src/dpdk/lib/librte_net/rte_net.c new file mode 100644 index 00000000..a8c7aff9 --- /dev/null +++ b/src/dpdk/lib/librte_net/rte_net.c @@ -0,0 +1,517 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include <rte_mbuf.h> +#include <rte_mbuf_ptype.h> +#include <rte_byteorder.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_sctp.h> +#include <rte_gre.h> +#include <rte_net.h> + +/* get l3 packet type from ip6 next protocol */ +static uint32_t +ptype_l3_ip6(uint8_t ip6_proto) +{ + static const uint32_t ip6_ext_proto_map[256] = { + [IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + }; + + return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto]; +} + +/* get l3 packet type from ip version and header length */ +static uint32_t +ptype_l3_ip(uint8_t ipv_ihl) +{ + static const uint32_t ptype_l3_ip_proto_map[256] = { + [0x45] = RTE_PTYPE_L3_IPV4, + [0x46] = RTE_PTYPE_L3_IPV4_EXT, + [0x47] = RTE_PTYPE_L3_IPV4_EXT, + [0x48] = RTE_PTYPE_L3_IPV4_EXT, + [0x49] = RTE_PTYPE_L3_IPV4_EXT, + [0x4A] = RTE_PTYPE_L3_IPV4_EXT, + [0x4B] = RTE_PTYPE_L3_IPV4_EXT, + [0x4C] = RTE_PTYPE_L3_IPV4_EXT, + [0x4D] = RTE_PTYPE_L3_IPV4_EXT, + [0x4E] = RTE_PTYPE_L3_IPV4_EXT, + [0x4F] = RTE_PTYPE_L3_IPV4_EXT, + }; + + return ptype_l3_ip_proto_map[ipv_ihl]; +} + +/* get l4 packet type from proto */ +static uint32_t +ptype_l4(uint8_t proto) +{ + static const uint32_t ptype_l4_proto[256] = { + [IPPROTO_UDP] = RTE_PTYPE_L4_UDP, + [IPPROTO_TCP] = RTE_PTYPE_L4_TCP, + [IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP, + }; + + return ptype_l4_proto[proto]; +} + +/* get inner l3 packet type from ip6 next protocol */ +static uint32_t +ptype_inner_l3_ip6(uint8_t ip6_proto) +{ + static const uint32_t ptype_inner_ip6_ext_proto_map[256] = { + [IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + }; + + return RTE_PTYPE_INNER_L3_IPV6 + + ptype_inner_ip6_ext_proto_map[ip6_proto]; +} + +/* get inner l3 packet type from ip version and header length */ +static uint32_t +ptype_inner_l3_ip(uint8_t ipv_ihl) +{ + static const uint32_t ptype_inner_l3_ip_proto_map[256] = { + [0x45] = RTE_PTYPE_INNER_L3_IPV4, + [0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT, + }; + + return ptype_inner_l3_ip_proto_map[ipv_ihl]; +} + +/* get inner l4 packet type from proto */ +static uint32_t +ptype_inner_l4(uint8_t proto) +{ + static const uint32_t ptype_inner_l4_proto[256] = { + [IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP, + [IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP, + [IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP, + }; + + return ptype_inner_l4_proto[proto]; +} + +/* get the tunnel packet type if any, update proto and off. */ +static uint32_t +ptype_tunnel(uint16_t *proto, const struct rte_mbuf *m, + uint32_t *off) +{ + switch (*proto) { + case IPPROTO_GRE: { + static const uint8_t opt_len[16] = { + [0x0] = 4, + [0x1] = 8, + [0x2] = 8, + [0x8] = 8, + [0x3] = 12, + [0x9] = 12, + [0xa] = 12, + [0xb] = 16, + }; + const struct gre_hdr *gh; + struct gre_hdr gh_copy; + uint16_t flags; + + gh = rte_pktmbuf_read(m, *off, sizeof(*gh), &gh_copy); + if (unlikely(gh == NULL)) + return 0; + + flags = rte_be_to_cpu_16(*(const uint16_t *)gh); + flags >>= 12; + if (opt_len[flags] == 0) + return 0; + + *off += opt_len[flags]; + *proto = gh->proto; + if (*proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) + return RTE_PTYPE_TUNNEL_NVGRE; + else + return RTE_PTYPE_TUNNEL_GRE; + } + case IPPROTO_IPIP: + *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + return RTE_PTYPE_TUNNEL_IP; + case IPPROTO_IPV6: + *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6); + return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */ + default: + return 0; + } +} + +/* get the ipv4 header length */ +static uint8_t +ip4_hlen(const struct ipv4_hdr *hdr) +{ + return (hdr->version_ihl & 0xf) * 4; +} + +/* parse ipv6 extended headers, update offset and return next proto */ +static uint16_t +skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, + int *frag) +{ + struct ext_hdr { + uint8_t next_hdr; + uint8_t len; + }; + const struct ext_hdr *xh; + struct ext_hdr xh_copy; + unsigned int i; + + *frag = 0; + +#define MAX_EXT_HDRS 5 + for (i = 0; i < MAX_EXT_HDRS; i++) { + switch (proto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + xh = rte_pktmbuf_read(m, *off, sizeof(*xh), + &xh_copy); + if (xh == NULL) + return 0; + *off += (xh->len + 1) * 8; + proto = xh->next_hdr; + break; + case IPPROTO_FRAGMENT: + xh = rte_pktmbuf_read(m, *off, sizeof(*xh), + &xh_copy); + if (xh == NULL) + return 0; + *off += 8; + proto = xh->next_hdr; + *frag = 1; + return proto; /* this is always the last ext hdr */ + case IPPROTO_NONE: + return 0; + default: + return proto; + } + } + return 0; +} + +/* parse mbuf data to get packet type */ +uint32_t rte_net_get_ptype(const struct rte_mbuf *m, + struct rte_net_hdr_lens *hdr_lens, uint32_t layers) +{ + struct rte_net_hdr_lens local_hdr_lens; + const struct ether_hdr *eh; + struct ether_hdr eh_copy; + uint32_t pkt_type = RTE_PTYPE_L2_ETHER; + uint32_t off = 0; + uint16_t proto; + + if (hdr_lens == NULL) + hdr_lens = &local_hdr_lens; + + eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); + if (unlikely(eh == NULL)) + return 0; + proto = eh->ether_type; + off = sizeof(*eh); + hdr_lens->l2_len = off; + + if ((layers & RTE_PTYPE_L2_MASK) == 0) + return 0; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + goto l3; /* fast path if packet is IPv4 */ + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type = RTE_PTYPE_L2_ETHER_VLAN; + vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += sizeof(*vh); + hdr_lens->l2_len += sizeof(*vh); + proto = vh->eth_proto; + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type = RTE_PTYPE_L2_ETHER_QINQ; + vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh), + &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += 2 * sizeof(*vh); + hdr_lens->l2_len += 2 * sizeof(*vh); + proto = vh->eth_proto; + } + + l3: + if ((layers & RTE_PTYPE_L3_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + const struct ipv4_hdr *ip4h; + struct ipv4_hdr ip4h_copy; + + ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy); + if (unlikely(ip4h == NULL)) + return pkt_type; + + pkt_type |= ptype_l3_ip(ip4h->version_ihl); + hdr_lens->l3_len = ip4_hlen(ip4h); + off += hdr_lens->l3_len; + + if ((layers & RTE_PTYPE_L4_MASK) == 0) + return pkt_type; + + if (ip4h->fragment_offset & rte_cpu_to_be_16( + IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) { + pkt_type |= RTE_PTYPE_L4_FRAG; + hdr_lens->l4_len = 0; + return pkt_type; + } + proto = ip4h->next_proto_id; + pkt_type |= ptype_l4(proto); + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + const struct ipv6_hdr *ip6h; + struct ipv6_hdr ip6h_copy; + int frag = 0; + + ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy); + if (unlikely(ip6h == NULL)) + return pkt_type; + + proto = ip6h->proto; + hdr_lens->l3_len = sizeof(*ip6h); + off += hdr_lens->l3_len; + pkt_type |= ptype_l3_ip6(proto); + if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) { + proto = skip_ip6_ext(proto, m, &off, &frag); + hdr_lens->l3_len = off - hdr_lens->l2_len; + } + if (proto == 0) + return pkt_type; + + if ((layers & RTE_PTYPE_L4_MASK) == 0) + return pkt_type; + + if (frag) { + pkt_type |= RTE_PTYPE_L4_FRAG; + hdr_lens->l4_len = 0; + return pkt_type; + } + pkt_type |= ptype_l4(proto); + } + + if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) { + hdr_lens->l4_len = sizeof(struct udp_hdr); + return pkt_type; + } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) { + const struct tcp_hdr *th; + struct tcp_hdr th_copy; + + th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy); + if (unlikely(th == NULL)) + return pkt_type & (RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK); + hdr_lens->l4_len = (th->data_off & 0xf0) >> 2; + return pkt_type; + } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) { + hdr_lens->l4_len = sizeof(struct sctp_hdr); + return pkt_type; + } else { + uint32_t prev_off = off; + + hdr_lens->l4_len = 0; + + if ((layers & RTE_PTYPE_TUNNEL_MASK) == 0) + return pkt_type; + + pkt_type |= ptype_tunnel(&proto, m, &off); + hdr_lens->tunnel_len = off - prev_off; + } + + /* same job for inner header: we need to duplicate the code + * because the packet types do not have the same value. + */ + if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) { + eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); + if (unlikely(eh == NULL)) + return pkt_type; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER; + proto = eh->ether_type; + off += sizeof(*eh); + hdr_lens->inner_l2_len = sizeof(*eh); + } + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type &= ~RTE_PTYPE_INNER_L2_MASK; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN; + vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += sizeof(*vh); + hdr_lens->inner_l2_len += sizeof(*vh); + proto = vh->eth_proto; + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type &= ~RTE_PTYPE_INNER_L2_MASK; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER_QINQ; + vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh), + &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += 2 * sizeof(*vh); + hdr_lens->inner_l2_len += 2 * sizeof(*vh); + proto = vh->eth_proto; + } + + if ((layers & RTE_PTYPE_INNER_L3_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + const struct ipv4_hdr *ip4h; + struct ipv4_hdr ip4h_copy; + + ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy); + if (unlikely(ip4h == NULL)) + return pkt_type; + + pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl); + hdr_lens->inner_l3_len = ip4_hlen(ip4h); + off += hdr_lens->inner_l3_len; + + if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0) + return pkt_type; + if (ip4h->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK | + IPV4_HDR_MF_FLAG)) { + pkt_type |= RTE_PTYPE_INNER_L4_FRAG; + hdr_lens->inner_l4_len = 0; + return pkt_type; + } + proto = ip4h->next_proto_id; + pkt_type |= ptype_inner_l4(proto); + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + const struct ipv6_hdr *ip6h; + struct ipv6_hdr ip6h_copy; + int frag = 0; + + ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy); + if (unlikely(ip6h == NULL)) + return pkt_type; + + proto = ip6h->proto; + hdr_lens->inner_l3_len = sizeof(*ip6h); + off += hdr_lens->inner_l3_len; + pkt_type |= ptype_inner_l3_ip6(proto); + if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) == + RTE_PTYPE_INNER_L3_IPV6_EXT) { + uint32_t prev_off; + + prev_off = off; + proto = skip_ip6_ext(proto, m, &off, &frag); + hdr_lens->inner_l3_len += off - prev_off; + } + if (proto == 0) + return pkt_type; + + if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0) + return pkt_type; + + if (frag) { + pkt_type |= RTE_PTYPE_INNER_L4_FRAG; + hdr_lens->inner_l4_len = 0; + return pkt_type; + } + pkt_type |= ptype_inner_l4(proto); + } + + if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) { + hdr_lens->inner_l4_len = sizeof(struct udp_hdr); + } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == + RTE_PTYPE_INNER_L4_TCP) { + const struct tcp_hdr *th; + struct tcp_hdr th_copy; + + th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy); + if (unlikely(th == NULL)) + return pkt_type & (RTE_PTYPE_INNER_L2_MASK | + RTE_PTYPE_INNER_L3_MASK); + hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2; + } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == + RTE_PTYPE_INNER_L4_SCTP) { + hdr_lens->inner_l4_len = sizeof(struct sctp_hdr); + } else { + hdr_lens->inner_l4_len = 0; + } + + return pkt_type; +} diff --git a/src/dpdk/lib/librte_net/rte_net.h b/src/dpdk/lib/librte_net/rte_net.h new file mode 100644 index 00000000..548eaedb --- /dev/null +++ b/src/dpdk/lib/librte_net/rte_net.h @@ -0,0 +1,204 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_NET_PTYPE_H_ +#define _RTE_NET_PTYPE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_tcp.h> +#include <rte_sctp.h> + +/** + * Structure containing header lengths associated to a packet, filled + * by rte_net_get_ptype(). + */ +struct rte_net_hdr_lens { + uint8_t l2_len; + uint8_t l3_len; + uint8_t l4_len; + uint8_t tunnel_len; + uint8_t inner_l2_len; + uint8_t inner_l3_len; + uint8_t inner_l4_len; +}; + +/** + * Parse an Ethernet packet to get its packet type. + * + * This function parses the network headers in mbuf data and return its + * packet type. + * + * If it is provided by the user, it also fills a rte_net_hdr_lens + * structure that contains the lengths of the parsed network + * headers. Each length field is valid only if the associated packet + * type is set. For instance, hdr_lens->l2_len is valid only if + * (retval & RTE_PTYPE_L2_MASK) != RTE_PTYPE_UNKNOWN. + * + * Supported packet types are: + * L2: Ether, Vlan, QinQ + * L3: IPv4, IPv6 + * L4: TCP, UDP, SCTP + * Tunnels: IPv4, IPv6, Gre, Nvgre + * + * @param m + * The packet mbuf to be parsed. + * @param hdr_lens + * A pointer to a structure where the header lengths will be returned, + * or NULL. + * @param layers + * List of layers to parse. The function will stop at the first + * empty layer. Examples: + * - To parse all known layers, use RTE_PTYPE_ALL_MASK. + * - To parse only L2 and L3, use RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK + * @return + * The packet type of the packet. + */ +uint32_t rte_net_get_ptype(const struct rte_mbuf *m, + struct rte_net_hdr_lens *hdr_lens, uint32_t layers); + +/** + * Prepare pseudo header checksum + * + * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in + * provided mbufs packet data and based on the requested offload flags. + * + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set + * in packet data, + * - for TSO the IP payload length is not included in pseudo header. + * + * This function expects that used headers are in the first data segment of + * mbuf, are not fragmented and can be safely modified. + * + * @param m + * The packet mbuf to be fixed. + * @param ol_flags + * TX offloads flags to use with this packet. + * @return + * 0 if checksum is initialized properly + */ +static inline int +rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + struct tcp_hdr *tcp_hdr; + struct udp_hdr *udp_hdr; + uint64_t inner_l3_offset = m->l2_len; + + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + + if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) { + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + inner_l3_offset); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + + udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + + m->l3_len); + udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, + ol_flags); + } else { + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + inner_l3_offset); + /* non-TSO udp */ + udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, + inner_l3_offset + m->l3_len); + udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, + ol_flags); + } + } else if ((ol_flags & PKT_TX_TCP_CKSUM) || + (ol_flags & PKT_TX_TCP_SEG)) { + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + inner_l3_offset); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + + /* non-TSO tcp or TSO */ + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + + m->l3_len); + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, + ol_flags); + } else { + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + inner_l3_offset); + /* non-TSO tcp or TSO */ + tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, + inner_l3_offset + m->l3_len); + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, + ol_flags); + } + } + + return 0; +} + +/** + * Prepare pseudo header checksum + * + * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in + * provided mbufs packet data. + * + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set + * in packet data, + * - for TSO the IP payload length is not included in pseudo header. + * + * This function expects that used headers are in the first data segment of + * mbuf, are not fragmented and can be safely modified. + * + * @param m + * The packet mbuf to be fixed. + * @return + * 0 if checksum is initialized properly + */ +static inline int +rte_net_intel_cksum_prepare(struct rte_mbuf *m) +{ + return rte_net_intel_cksum_flags_prepare(m, m->ol_flags); +} + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_NET_PTYPE_H_ */ diff --git a/src/dpdk/lib/librte_pipeline/rte_pipeline.h b/src/dpdk/lib/librte_pipeline/rte_pipeline.h index 84d18025..f3663483 100644 --- a/src/dpdk/lib/librte_pipeline/rte_pipeline.h +++ b/src/dpdk/lib/librte_pipeline/rte_pipeline.h @@ -87,6 +87,7 @@ extern "C" { #include <rte_port.h> #include <rte_table.h> +#include <rte_common.h> struct rte_mbuf; @@ -244,6 +245,7 @@ struct rte_pipeline_table_entry { /** Reserved action */ enum rte_pipeline_action action; + RTE_STD_C11 union { /** Output port ID (meta-data for "Send packet to output port" action) */ @@ -252,7 +254,7 @@ struct rte_pipeline_table_entry { uint32_t table_id; }; /** Start of table entry area for user defined actions and meta-data */ - uint8_t action_data[0]; + __extension__ uint8_t action_data[0]; }; /** diff --git a/src/dpdk/lib/librte_port/rte_port_fd.c b/src/dpdk/lib/librte_port/rte_port_fd.c new file mode 100644 index 00000000..0d640f34 --- /dev/null +++ b/src/dpdk/lib/librte_port/rte_port_fd.c @@ -0,0 +1,552 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> +#include <stdint.h> +#include <unistd.h> + +#include <rte_mbuf.h> +#include <rte_malloc.h> + +#include "rte_port_fd.h" + +/* + * Port FD Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_reader { + struct rte_port_in_stats stats; + int fd; + uint32_t mtu; + struct rte_mempool *mempool; +}; + +static void * +rte_port_fd_reader_create(void *params, int socket_id) +{ + struct rte_port_fd_reader_params *conf = + (struct rte_port_fd_reader_params *) params; + struct rte_port_fd_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + if (conf->fd < 0) { + RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__); + return NULL; + } + if (conf->mtu == 0) { + RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__); + return NULL; + } + if (conf->mempool == NULL) { + RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->mtu = conf->mtu; + port->mempool = conf->mempool; + + return port; +} + +static int +rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port; + uint32_t i; + + if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) + return 0; + + for (i = 0; i < n_pkts; i++) { + rte_mbuf_refcnt_set(pkts[i], 1); + rte_pktmbuf_reset(pkts[i]); + } + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void *); + ssize_t n_bytes; + + n_bytes = read(p->fd, pkt_data, (size_t) p->mtu); + if (n_bytes <= 0) + break; + + pkt->data_len = n_bytes; + pkt->pkt_len = n_bytes; + } + + for ( ; i < n_pkts; i++) + rte_pktmbuf_free(pkts[i]); + + RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i); + + return n_pkts; +} + +static int +rte_port_fd_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_fd_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_fd_reader *p = + (struct rte_port_fd_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_params *conf = + (struct rte_port_fd_writer_params *) params; + struct rte_port_fd_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + return port; +} + +static inline void +send_burst(struct rte_port_fd_writer *p) +{ + uint32_t i; + + for (i = 0; i < p->tx_buf_count; i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret < 0) + break; + } + + RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_flush(void *port) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_fd_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t n_retries; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_nodrop_params *conf = + (struct rte_port_fd_writer_nodrop_params *) params; + struct rte_port_fd_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->fd < 0) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_fd_writer_nodrop *p) +{ + uint64_t n_retries; + uint32_t i; + + n_retries = 0; + for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + + for ( ; n_retries < p->n_retries; n_retries++) { + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret) + break; + } + } + + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_flush(void *port) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_nodrop_flush(port); + rte_free(port); + +return 0; +} + +static int rte_port_fd_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_fd_reader_ops = { + .f_create = rte_port_fd_reader_create, + .f_free = rte_port_fd_reader_free, + .f_rx = rte_port_fd_reader_rx, + .f_stats = rte_port_fd_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_ops = { + .f_create = rte_port_fd_writer_create, + .f_free = rte_port_fd_writer_free, + .f_tx = rte_port_fd_writer_tx, + .f_tx_bulk = rte_port_fd_writer_tx_bulk, + .f_flush = rte_port_fd_writer_flush, + .f_stats = rte_port_fd_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = { + .f_create = rte_port_fd_writer_nodrop_create, + .f_free = rte_port_fd_writer_nodrop_free, + .f_tx = rte_port_fd_writer_nodrop_tx, + .f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk, + .f_flush = rte_port_fd_writer_nodrop_flush, + .f_stats = rte_port_fd_writer_nodrop_stats_read, +}; diff --git a/src/dpdk/lib/librte_port/rte_port_fd.h b/src/dpdk/lib/librte_port/rte_port_fd.h new file mode 100644 index 00000000..77a2d31b --- /dev/null +++ b/src/dpdk/lib/librte_port/rte_port_fd.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_FD_H__ +#define __INCLUDE_RTE_PORT_FD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port FD Device + * + * fd_reader: input port built on top of valid non-blocking file descriptor + * fd_writer: output port built on top of valid non-blocking file descriptor + * + ***/ + +#include <stdint.h> + +#include <rte_mempool.h> +#include "rte_port.h" + +/** fd_reader port parameters */ +struct rte_port_fd_reader_params { + /** File descriptor */ + int fd; + + /** Maximum Transfer Unit (MTU) */ + uint32_t mtu; + + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; +}; + +/** fd_reader port operations */ +extern struct rte_port_in_ops rte_port_fd_reader_ops; + +/** fd_writer port parameters */ +struct rte_port_fd_writer_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; +}; + +/** fd_writer port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_ops; + +/** fd_writer_nodrop port parameters */ +struct rte_port_fd_writer_nodrop_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** fd_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk/lib/librte_port/rte_port_source_sink.h b/src/dpdk/lib/librte_port/rte_port_source_sink.h index 4db8a8a8..be585a77 100644 --- a/src/dpdk/lib/librte_port/rte_port_source_sink.h +++ b/src/dpdk/lib/librte_port/rte_port_source_sink.h @@ -55,7 +55,7 @@ struct rte_port_source_params { struct rte_mempool *mempool; /** The full path of the pcap file to read packets from */ - char *file_name; + const char *file_name; /** The number of bytes to be read from each packet in the * pcap file. If this value is 0, the whole packet is read; * if it is bigger than packet size, the generated packets @@ -69,7 +69,7 @@ extern struct rte_port_in_ops rte_port_source_ops; /** sink port parameters */ struct rte_port_sink_params { /** The full path of the pcap file to write the packets to */ - char *file_name; + const char *file_name; /** The maximum number of packets write to the pcap file. * If this value is 0, the "infinite" write will be carried * out. diff --git a/src/dpdk/lib/librte_ring/rte_ring.h b/src/dpdk/lib/librte_ring/rte_ring.h index 0e22e694..e359affc 100644 --- a/src/dpdk/lib/librte_ring/rte_ring.h +++ b/src/dpdk/lib/librte_ring/rte_ring.h @@ -106,7 +106,7 @@ extern "C" { enum rte_ring_queue_behavior { RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */ - RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */ + RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items as possible from ring */ }; #ifdef RTE_LIBRTE_RING_DEBUG @@ -187,7 +187,7 @@ struct rte_ring { struct rte_ring_debug_stats stats[RTE_MAX_LCORE]; #endif - void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here. + void *ring[] __rte_cache_aligned; /**< Memory space of ring starts here. * not volatile so need to be careful * about compiler re-ordering */ }; @@ -341,7 +341,7 @@ void rte_ring_free(struct rte_ring *r); int rte_ring_set_water_mark(struct rte_ring *r, unsigned count); /** - * Dump the status of the ring to the console. + * Dump the status of the ring to a file. * * @param f * A pointer to a file for output diff --git a/src/dpdk/lib/librte_table/rte_table_acl.c b/src/dpdk/lib/librte_table/rte_table_acl.c index 8f1f8ceb..94b69a98 100644 --- a/src/dpdk/lib/librte_table/rte_table_acl.c +++ b/src/dpdk/lib/librte_table/rte_table_acl.c @@ -792,7 +792,7 @@ rte_table_acl_lookup( pkts_mask &= ~pkt_mask; - if (action_table_pos != RTE_ACL_INVALID_USERDATA) { + if (action_table_pos != 0) { pkts_out_mask |= pkt_mask; entries[pkt_pos] = (void *) &acl->memory[action_table_pos * diff --git a/src/dpdk/lib/librte_table/rte_table_hash.h b/src/dpdk/lib/librte_table/rte_table_hash.h index 9d17516a..57505a6f 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash.h +++ b/src/dpdk/lib/librte_table/rte_table_hash.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -363,6 +363,35 @@ struct rte_table_hash_key32_ext_params { /** Extendible bucket hash table operations */ extern struct rte_table_ops rte_table_hash_key32_ext_ops; +/** Cuckoo hash table parameters */ +struct rte_table_hash_cuckoo_params { + /** Key size (number of bytes */ + uint32_t key_size; + + /** Maximum number of hash table entries */ + uint32_t n_keys; + + /** Hash function used to calculate hash */ + rte_table_hash_op_hash f_hash; + + /** Seed value or Init value used by f_hash */ + uint32_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Hash table name */ + const char *name; +}; + +/** Cuckoo hash table operations */ +extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops; + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c b/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c new file mode 100644 index 00000000..ff7baee3 --- /dev/null +++ b/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c @@ -0,0 +1,382 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> +#include <stdio.h> + +#include <rte_common.h> +#include <rte_mbuf.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_log.h> + +#include <rte_hash.h> +#include "rte_table_hash.h" + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) \ + (table->stats.n_pkts_in += val) +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) \ + (table->stats.n_pkts_lookup_miss += val) + +#else + +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t key_size; + uint32_t entry_size; + uint32_t n_keys; + rte_table_hash_op_hash f_hash; + uint32_t seed; + uint32_t signature_offset; + uint32_t key_offset; + const char *name; + + /* cuckoo hash table object */ + struct rte_hash *h_table; + + /* Lookup table */ + uint8_t memory[0] __rte_cache_aligned; }; + +static int +check_params_create_hash_cuckoo(const struct +rte_table_hash_cuckoo_params *params) { + /* Check for valid parameters */ + if (params == NULL) { + RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n"); + return -EINVAL; + } + + if (params->key_size == 0) { + RTE_LOG(ERR, TABLE, "Invalid key_size.\n"); + return -EINVAL; + } + + if (params->n_keys == 0) { + RTE_LOG(ERR, TABLE, "Invalid n_keys.\n"); + return -EINVAL; + } + + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "f_hash is NULL.\n"); + return -EINVAL; + } + + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "Table name is NULL.\n"); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_cuckoo_create(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_hash *rte_hash_handle; + struct rte_table_hash *t; + uint32_t total_size, total_cl_size; + + /* Check input parameters */ + struct rte_table_hash_cuckoo_params *p = + (struct rte_table_hash_cuckoo_params *) params; + + if (check_params_create_hash_cuckoo(params)) + return NULL; + + /* Memory allocation */ + total_cl_size = + (sizeof(struct rte_table_hash) + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_cl_size += (p->n_keys * entry_size + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_size = total_cl_size * RTE_CACHE_LINE_SIZE; + + t = rte_zmalloc_socket("TABLE", + total_size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (t == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for Cuckoo hash table\n", + __func__, + (uint32_t)sizeof(struct rte_table_hash)); + return NULL; + } + + /* Create cuckoo hash table */ + struct rte_hash_parameters hash_cuckoo_params = { + .entries = p->n_keys, + .key_len = p->key_size, + .hash_func = (rte_hash_function)(p->f_hash), + .hash_func_init_val = p->seed, + .socket_id = socket_id, + .name = p->name + }; + + rte_hash_handle = rte_hash_find_existing(p->name); + if (rte_hash_handle == NULL) { + rte_hash_handle = rte_hash_create(&hash_cuckoo_params); + if (NULL == rte_hash_handle) { + RTE_LOG(ERR, TABLE, + "%s: failed to create cuckoo hash table. keysize: %u", + __func__, hash_cuckoo_params.key_len); + rte_free(t); + return NULL; + } + } + + /* initialize the cuckoo hash parameters */ + t->key_size = p->key_size; + t->entry_size = entry_size; + t->n_keys = p->n_keys; + t->f_hash = p->f_hash; + t->seed = p->seed; + t->signature_offset = p->signature_offset; + t->key_offset = p->key_offset; + t->name = p->name; + t->h_table = rte_hash_handle; + + RTE_LOG(INFO, TABLE, + "%s: Cuckoo Hash table memory footprint is %u bytes\n", + __func__, total_size); + return t; +} + +static int +rte_table_hash_cuckoo_free(void *table) { + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + rte_hash_free(t->h_table); + rte_free(t); + + return 0; +} + +static int +rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, + int *key_found, void **entry_ptr) { + int pos = 0; + + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + /* Find Existing entries */ + pos = rte_hash_lookup(t->h_table, key); + if (pos >= 0) { + uint8_t *existing_entry; + + *key_found = 1; + existing_entry = &t->memory[pos * t->entry_size]; + memcpy(existing_entry, entry, t->entry_size); + *entry_ptr = existing_entry; + + return 0; +} else if (pos == -ENOENT) { + /* Entry not found. Adding new entry */ + uint8_t *new_entry; + + pos = rte_hash_add_key(t->h_table, key); + if (pos < 0) { + RTE_LOG(ERR, TABLE, + "%s: Entry not added, status : %u\n", + __func__, pos); + return pos; + } + + new_entry = &t->memory[pos * t->entry_size]; + memcpy(new_entry, entry, t->entry_size); + + *key_found = 0; + *entry_ptr = new_entry; + return 0; + } + return pos; +} + +static int +rte_table_hash_cuckoo_entry_delete(void *table, void *key, + int *key_found, __rte_unused void *entry) { + int pos = 0; + + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + pos = rte_hash_del_key(t->h_table, key); + if (pos >= 0) { + *key_found = 1; + uint8_t *entry_ptr = &t->memory[pos * t->entry_size]; + + if (entry) + memcpy(entry, entry_ptr, t->entry_size); + + memset(&t->memory[pos * t->entry_size], 0, t->entry_size); + } + + return pos; +} + + +static int +rte_table_hash_cuckoo_lookup_dosig(void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *)table; + uint64_t pkts_mask_out = 0; + uint32_t i; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in); + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + const uint8_t *keys[64]; + int32_t positions[64], status; + + /* Keys for bulk lookup */ + for (i = 0; i < n_pkts_in; i++) + keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i], + t->key_offset); + + /* Bulk Lookup */ + status = rte_hash_lookup_bulk(t->h_table, + (const void **) keys, + n_pkts_in, + positions); + + if (status == 0) { + for (i = 0; i < n_pkts_in; i++) { + if (likely(positions[i] >= 0)) { + uint64_t pkt_mask = 1LLU << i; + + entries[i] = &t->memory[positions[i] + * t->entry_size]; + pkts_mask_out |= pkt_mask; + } + } + } + } else { + for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX + - __builtin_clzll(pkts_mask)); i++) { + uint64_t pkt_mask = 1LLU << i; + + if (pkt_mask & pkts_mask) { + struct rte_mbuf *pkt = pkts[i]; + uint8_t *key = RTE_MBUF_METADATA_UINT8_PTR(pkt, + t->key_offset); + int pos; + + pos = rte_hash_lookup(t->h_table, key); + if (likely(pos >= 0)) { + entries[i] = &t->memory[pos + * t->entry_size]; + pkts_mask_out |= pkt_mask; + } + } + } + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t, + n_pkts_in - __builtin_popcountll(pkts_mask_out)); + + return 0; + +} + +static int +rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats, + int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = { + .f_create = rte_table_hash_cuckoo_create, + .f_free = rte_table_hash_cuckoo_free, + .f_add = rte_table_hash_cuckoo_entry_add, + .f_delete = rte_table_hash_cuckoo_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_cuckoo_lookup_dosig, + .f_stats = rte_table_hash_cuckoo_stats_read, +}; diff --git a/src/dpdk/lib/librte_table/rte_table_hash_key16.c b/src/dpdk/lib/librte_table/rte_table_hash_key16.c index b7e000fd..08d4d77e 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash_key16.c +++ b/src/dpdk/lib/librte_table/rte_table_hash_key16.c @@ -130,7 +130,7 @@ rte_table_hash_create_key16_lru(void *params, /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; n_entries_per_bucket = 4; key_size = 16; @@ -344,7 +344,7 @@ rte_table_hash_create_key16_ext(void *params, /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/src/dpdk/lib/librte_table/rte_table_hash_key32.c b/src/dpdk/lib/librte_table/rte_table_hash_key32.c index a7aba492..161f6b7a 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash_key32.c +++ b/src/dpdk/lib/librte_table/rte_table_hash_key32.c @@ -129,7 +129,7 @@ rte_table_hash_create_key32_lru(void *params, /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) { + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) { return NULL; } n_entries_per_bucket = 4; @@ -337,7 +337,7 @@ rte_table_hash_create_key32_ext(void *params, /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/src/dpdk/lib/librte_table/rte_table_hash_key8.c b/src/dpdk/lib/librte_table/rte_table_hash_key8.c index e2e2bdc4..b04f60dc 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash_key8.c +++ b/src/dpdk/lib/librte_table/rte_table_hash_key8.c @@ -125,7 +125,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size) /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) { + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) { return NULL; } n_entries_per_bucket = 4; @@ -332,7 +332,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) return NULL; n_entries_per_bucket = 4; |