diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2018-11-12 16:14:45 +0000 |
---|---|---|
committer | Luca Boccassi <luca.boccassi@gmail.com> | 2018-11-12 16:15:06 +0000 |
commit | 88fab00d4402af240c1b7cc2566133aece115488 (patch) | |
tree | 54525f2b8784dd20ce6886b429ef85d24df04532 | |
parent | 8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 (diff) |
New upstream version 18.11-rc2upstream/18.11-rc2
Change-Id: I43ca4edd0747b2dfc38c574ebf3c0aac17d7392c
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
131 files changed, 10714 insertions, 1766 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index e60379df..19353ac8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1271,6 +1271,10 @@ F: doc/guides/sample_app_ug/ethtool.rst F: examples/exception_path/ F: doc/guides/sample_app_ug/exception_path.rst +M: Marko Kovacevic <marko.kovacevic@intel.com> +F: examples/fips_validation/ +F: doc/guides/sample_app_ug/fips_validation.rst + M: Ori Kam <orika@mellanox.com> F: examples/flow_filtering/ F: doc/guides/sample_app_ug/flow_filtering.rst diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 1050fde9..5e08a1b9 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -728,7 +728,8 @@ static void cmd_help_long_parsed(void *parsed_result, " show all queue region related configuration info\n\n" "add port tm node shaper profile (port_id) (shaper_profile_id)" - " (tb_rate) (tb_size) (packet_length_adjust)\n" + " (cmit_tb_rate) (cmit_tb_size) (peak_tb_rate) (peak_tb_size)" + " (packet_length_adjust)\n" " Add port tm node private shaper profile.\n\n" "del port tm node shaper profile (port_id) (shaper_profile_id)\n" @@ -5976,6 +5977,8 @@ static void cmd_create_bonded_device_parsed(void *parsed_result, nb_ports = rte_eth_dev_count_avail(); reconfig(port_id, res->socket); rte_eth_promiscuous_enable(port_id); + ports[port_id].need_setup = 0; + ports[port_id].port_status = RTE_PORT_STOPPED; } } diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 23ea7cc8..91e2e350 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -3248,26 +3248,15 @@ parse_vc_action_rss(struct context *ctx, const struct token *token, .func = RTE_ETH_HASH_FUNCTION_DEFAULT, .level = 0, .types = rss_hf, - .key_len = sizeof(action_rss_data->key), + .key_len = 0, .queue_num = RTE_MIN(nb_rxq, ACTION_RSS_QUEUE_NUM), - .key = action_rss_data->key, + .key = NULL, .queue = action_rss_data->queue, }, - .key = "testpmd's default RSS hash key, " - "override it for better balancing", .queue = { 0 }, }; for (i = 0; i < action_rss_data->conf.queue_num; ++i) action_rss_data->queue[i] = i; - if (!port_id_is_invalid(ctx->port, DISABLED_WARN) && - ctx->port != (portid_t)RTE_PORT_ALL) { - struct rte_eth_dev_info info; - - rte_eth_dev_info_get(ctx->port, &info); - action_rss_data->conf.key_len = - RTE_MIN(sizeof(action_rss_data->key), - info.hash_key_size); - } action->conf = &action_rss_data->conf; return ret; } diff --git a/app/test-pmd/cmdline_mtr.c b/app/test-pmd/cmdline_mtr.c index 32a47305..63f32828 100644 --- a/app/test-pmd/cmdline_mtr.c +++ b/app/test-pmd/cmdline_mtr.c @@ -1440,7 +1440,7 @@ static void cmd_show_port_meter_stats_parsed(void *parsed_result, printf("\tPkts R: %" PRIu64 "\n", stats.n_pkts[RTE_MTR_RED]); if (stats_mask & RTE_MTR_STATS_N_BYTES_RED) - printf("\tBytes Y: %" PRIu64 "\n", + printf("\tBytes R: %" PRIu64 "\n", stats.n_bytes[RTE_MTR_RED]); if (stats_mask & RTE_MTR_STATS_N_PKTS_DROPPED) printf("\tPkts DROPPED: %" PRIu64 "\n", diff --git a/app/test-pmd/cmdline_tm.c b/app/test-pmd/cmdline_tm.c index b4307974..4c763482 100644 --- a/app/test-pmd/cmdline_tm.c +++ b/app/test-pmd/cmdline_tm.c @@ -771,8 +771,10 @@ struct cmd_add_port_tm_node_shaper_profile_result { cmdline_fixed_string_t profile; uint16_t port_id; uint32_t shaper_id; - uint64_t tb_rate; - uint64_t tb_size; + uint64_t cmit_tb_rate; + uint64_t cmit_tb_size; + uint64_t peak_tb_rate; + uint64_t peak_tb_size; uint32_t pktlen_adjust; }; @@ -807,14 +809,22 @@ cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_shaper_id = TOKEN_NUM_INITIALIZER( struct cmd_add_port_tm_node_shaper_profile_result, shaper_id, UINT32); -cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_tb_rate = +cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_cmit_tb_rate = TOKEN_NUM_INITIALIZER( struct cmd_add_port_tm_node_shaper_profile_result, - tb_rate, UINT64); -cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_tb_size = + cmit_tb_rate, UINT64); +cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_cmit_tb_size = TOKEN_NUM_INITIALIZER( struct cmd_add_port_tm_node_shaper_profile_result, - tb_size, UINT64); + cmit_tb_size, UINT64); +cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_peak_tb_rate = + TOKEN_NUM_INITIALIZER( + struct cmd_add_port_tm_node_shaper_profile_result, + peak_tb_rate, UINT64); +cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_peak_tb_size = + TOKEN_NUM_INITIALIZER( + struct cmd_add_port_tm_node_shaper_profile_result, + peak_tb_size, UINT64); cmdline_parse_token_num_t cmd_add_port_tm_node_shaper_profile_pktlen_adjust = TOKEN_NUM_INITIALIZER( struct cmd_add_port_tm_node_shaper_profile_result, @@ -838,8 +848,10 @@ static void cmd_add_port_tm_node_shaper_profile_parsed(void *parsed_result, /* Private shaper profile params */ memset(&sp, 0, sizeof(struct rte_tm_shaper_params)); memset(&error, 0, sizeof(struct rte_tm_error)); - sp.peak.rate = res->tb_rate; - sp.peak.size = res->tb_size; + sp.committed.rate = res->cmit_tb_rate; + sp.committed.size = res->cmit_tb_size; + sp.peak.rate = res->peak_tb_rate; + sp.peak.size = res->peak_tb_size; sp.pkt_length_adjust = pkt_len_adjust; ret = rte_tm_shaper_profile_add(port_id, shaper_id, &sp, &error); @@ -862,8 +874,10 @@ cmdline_parse_inst_t cmd_add_port_tm_node_shaper_profile = { (void *)&cmd_add_port_tm_node_shaper_profile_profile, (void *)&cmd_add_port_tm_node_shaper_profile_port_id, (void *)&cmd_add_port_tm_node_shaper_profile_shaper_id, - (void *)&cmd_add_port_tm_node_shaper_profile_tb_rate, - (void *)&cmd_add_port_tm_node_shaper_profile_tb_size, + (void *)&cmd_add_port_tm_node_shaper_profile_cmit_tb_rate, + (void *)&cmd_add_port_tm_node_shaper_profile_cmit_tb_size, + (void *)&cmd_add_port_tm_node_shaper_profile_peak_tb_rate, + (void *)&cmd_add_port_tm_node_shaper_profile_peak_tb_size, (void *)&cmd_add_port_tm_node_shaper_profile_pktlen_adjust, NULL, }, diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index 0531b5d2..3214e3c9 100644 --- a/app/test-pmd/flowgen.c +++ b/app/test-pmd/flowgen.c @@ -123,7 +123,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs) struct ipv4_hdr *ip_hdr; struct udp_hdr *udp_hdr; uint16_t vlan_tci, vlan_tci_outer; - uint64_t ol_flags; + uint64_t ol_flags = 0; uint16_t nb_rx; uint16_t nb_tx; uint16_t nb_pkt; @@ -155,7 +155,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs) tx_offloads = ports[fs->tx_port].dev_conf.txmode.offloads; if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) - ol_flags = PKT_TX_VLAN_PKT; + ol_flags |= PKT_TX_VLAN_PKT; if (tx_offloads & DEV_TX_OFFLOAD_QINQ_INSERT) ol_flags |= PKT_TX_QINQ_PKT; if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT) diff --git a/devtools/check-forbidden-tokens.awk b/devtools/check-forbidden-tokens.awk index fd77cdd8..8c89de3d 100755 --- a/devtools/check-forbidden-tokens.awk +++ b/devtools/check-forbidden-tokens.awk @@ -63,12 +63,7 @@ BEGIN { END { if (count > 0) { print "Warning in " substr(last_file,6) ":" - print "are you sure you want to add the following:" - for (key in expressions) { - if (expressions[key] > 0) { - print key - } - } + print MESSAGE exit RET_ON_FAIL } } diff --git a/devtools/checkpatches.sh b/devtools/checkpatches.sh index bf3114f9..ee8debec 100755 --- a/devtools/checkpatches.sh +++ b/devtools/checkpatches.sh @@ -43,13 +43,23 @@ print_usage () { END_OF_HELP } -check_forbidden_additions() { +check_forbidden_additions() { # <patch> # refrain from new additions of rte_panic() and rte_exit() # multiple folders and expressions are separated by spaces awk -v FOLDERS="lib drivers" \ -v EXPRESSIONS="rte_panic\\\( rte_exit\\\(" \ -v RET_ON_FAIL=1 \ - -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk - + -v MESSAGE='Using rte_panic/rte_exit' \ + -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk \ + "$1" + # svg figures must be included with wildcard extension + # because of png conversion for pdf docs + awk -v FOLDERS='doc' \ + -v EXPRESSIONS='::[[:space:]]*[^[:space:]]*\\.svg' \ + -v RET_ON_FAIL=1 \ + -v MESSAGE='Using explicit .svg extension instead of .*' \ + -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk \ + "$1" } number=0 @@ -115,7 +125,7 @@ check () { # <patch> <commit> <title> fi ! $verbose || printf '\nChecking forbidden tokens additions:\n' - report=$(check_forbidden_additions <"$tmpinput") + report=$(check_forbidden_additions "$tmpinput") if [ $? -ne 0 ] ; then $headline_printed || print_headline "$3" printf '%s\n' "$report" diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst index b2dfeb00..1db98685 100644 --- a/doc/guides/cryptodevs/qat.rst +++ b/doc/guides/cryptodevs/qat.rst @@ -156,6 +156,7 @@ These are the build configuration options affecting QAT, and their default value CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48 CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS=16 + CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE=65536 CONFIG_RTE_LIBRTE_PMD_QAT must be enabled for any QAT PMD to be built. @@ -168,14 +169,30 @@ options and is built by default. The number of VFs per PF varies - see table below. If multiple QAT packages are installed on a platform then CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES should be adjusted to the number of VFs which the QAT common code will need to handle. -Note, there is a separate config item for max cryptodevs CONFIG_RTE_CRYPTO_MAX_DEVS, -if necessary this should be adjusted to handle the total of QAT and other devices -which the process will use. +Note, there are separate config items for max cryptodevs CONFIG_RTE_CRYPTO_MAX_DEVS +and max compressdevs CONFIG_RTE_COMPRESS_MAX_DEVS, if necessary these should be +adjusted to handle the total of QAT and other devices which the process will use. QAT allocates internal structures to handle SGLs. For the compression service CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS can be changed if more segments are needed. An extra (max_inflight_ops x 16) bytes per queue_pair will be used for every increment. +QAT compression PMD needs intermediate buffers to support Deflate compression +with Dynamic Huffman encoding. CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE +specifies the size of a single buffer, the PMD will allocate a multiple of these, +plus some extra space for associated meta-data. For GEN2 devices, 20 buffers plus +1472 bytes are allocated. + +.. Note:: + + If the compressed output of a Deflate operation using Dynamic Huffman + Encoding is too big to fit in an intermediate buffer, then the + operation will return RTE_COMP_OP_STATUS_ERROR and an error will be + displayed. Options for the application in this case + are to split the input data into smaller chunks and resubmit + in multiple operations or to configure QAT with + larger intermediate buffers. + Device and driver naming ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/guides/howto/telemetry.rst b/doc/guides/howto/telemetry.rst index 3fcb0619..00f8f7a8 100644 --- a/doc/guides/howto/telemetry.rst +++ b/doc/guides/howto/telemetry.rst @@ -29,7 +29,7 @@ formatted in JSON and sent back to the requesting client. Pre-requisites ~~~~~~~~~~~~~~ -* Python ≥ 2.5 +* Python >= 2.5 * Jansson library for JSON serialization diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst index 3fa5cb74..d3f90483 100644 --- a/doc/guides/nics/features.rst +++ b/doc/guides/nics/features.rst @@ -208,7 +208,7 @@ Supports TCP Segmentation Offloading. * **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_TCP_TSO``. * **[uses] rte_eth_desc_lim**: ``nb_seg_max``, ``nb_mtu_seg_max``. -* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_TCP_SEG``. +* **[uses] mbuf**: ``mbuf.ol_flags:`` ``PKT_TX_TCP_SEG``, ``PKT_TX_IPV4``, ``PKT_TX_IPV6``, ``PKT_TX_IP_CKSUM``. * **[uses] mbuf**: ``mbuf.tso_segsz``, ``mbuf.l2_len``, ``mbuf.l3_len``, ``mbuf.l4_len``. * **[implements] datapath**: ``TSO functionality``. * **[provides] rte_eth_dev_info**: ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_TCP_TSO,DEV_TX_OFFLOAD_UDP_TSO``. diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index ab3928a6..bfacbd11 100644 --- a/doc/guides/nics/i40e.rst +++ b/doc/guides/nics/i40e.rst @@ -172,6 +172,13 @@ Runtime Config Options -w 84:00.0,use-latest-supported-vec=1 +Vector RX Pre-conditions +~~~~~~~~~~~~~~~~~~~~~~~~ +For Vector RX it is assumed that the number of descriptor rings will be a power +of 2. With this pre-condition, the ring pointer can easily scroll back to the +head after hitting the tail without a conditional check. In addition Vector RX +can use this assumption to do a bit mask using ``ring_size - 1``. + Driver compilation and testing ------------------------------ diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 67696283..7af5ead8 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -54,6 +54,7 @@ Features - Support for scattered TX and RX frames. - IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues. - Several RSS hash keys, one for each flow type. +- Default RSS operation with no hash key specification. - Configurable RETA table. - Support for multiple MAC addresses. - VLAN filtering. @@ -246,6 +247,24 @@ Run-time configuration - x86_64 with ConnectX-4, ConnectX-4 LX, ConnectX-5 and Bluefield. - POWER8 and ARMv8 with ConnectX-4 LX, ConnectX-5 and Bluefield. +- ``rxq_cqe_pad_en`` parameter [int] + + A nonzero value enables 128B padding of CQE on RX side. The size of CQE + is aligned with the size of a cacheline of the core. If cacheline size is + 128B, the CQE size is configured to be 128B even though the device writes + only 64B data on the cacheline. This is to avoid unnecessary cache + invalidation by device's two consecutive writes on to one cacheline. + However in some architecture, it is more beneficial to update entire + cacheline with padding the rest 64B rather than striding because + read-modify-write could drop performance a lot. On the other hand, + writing extra data will consume more PCIe bandwidth and could also drop + the maximum throughput. It is recommended to empirically set this + parameter. Disabled by default. + + Supported on: + + - CPU having 128B cacheline with ConnectX-5 and Bluefield. + - ``mprq_en`` parameter [int] A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is @@ -320,6 +339,20 @@ Run-time configuration - Set to 8 by default. +- ``txqs_max_vec`` parameter [int] + + Enable vectorized Tx only when the number of TX queues is less than or + equal to this value. Effective only when ``tx_vec_en`` is enabled. + + On ConnectX-5: + + - Set to 8 by default on ARMv8. + - Set to 4 by default otherwise. + + On Bluefield + + - Set to 16 by default. + - ``txq_mpw_en`` parameter [int] A nonzero value enables multi-packet send (MPS) for ConnectX-4 Lx and @@ -365,7 +398,7 @@ Run-time configuration - ``tx_vec_en`` parameter [int] A nonzero value enables Tx vector on ConnectX-5 and Bluefield NICs if the number of - global Tx queues on the port is lesser than MLX5_VPMD_MIN_TXQS. + global Tx queues on the port is less than ``txqs_max_vec``. This option cannot be used with certain offloads such as ``DEV_TX_OFFLOAD_TCP_TSO, DEV_TX_OFFLOAD_VXLAN_TNL_TSO, DEV_TX_OFFLOAD_GRE_TNL_TSO, DEV_TX_OFFLOAD_VLAN_INSERT``. diff --git a/doc/guides/nics/mvpp2.rst b/doc/guides/nics/mvpp2.rst index 82b9383e..b2ddeab5 100644 --- a/doc/guides/nics/mvpp2.rst +++ b/doc/guides/nics/mvpp2.rst @@ -644,7 +644,7 @@ Node which has a parent is called a leaf whereas node without parent is called a non-leaf (root). MVPP2 PMD supports two level hierarchy where level 0 represents ports and level 1 represents tx queues of a given port. -.. figure:: img/mvpp2_tm.svg +.. figure:: img/mvpp2_tm.* Nodes hold following types of settings: diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst index 376128f6..51d00758 100644 --- a/doc/guides/rel_notes/release_18_11.rst +++ b/doc/guides/rel_notes/release_18_11.rst @@ -63,12 +63,22 @@ New Features * **Added check for ensuring allocated memory addressable by devices.** Some devices can have addressing limitations so a new function, - ``rte_eal_check_dma_mask``, has been added for checking allocated memory is + ``rte_mem_check_dma_mask``, has been added for checking allocated memory is not out of the device range. Because now memory can be dynamically allocated after initialization, a dma mask is kept and any new allocated memory will be checked out against that dma mask and rejected if out of range. If more than one device has addressing limitations, the dma mask is the more restricted one. +* **Updated the C11 memory model version of ring library.** + + The latency is decreased for architectures using the C11 memory model + version of the ring library. + + On Cavium ThunderX2 platform, the changes decreased latency by 27~29% + and 3~15% for MPMC and SPSC cases respectively (with 2 lcores). The + real improvements may vary with the number of contending lcores and + the size of ring. + * **Added hot-unplug handle mechanism.** ``rte_dev_hotplug_handle_enable`` and ``rte_dev_hotplug_handle_disable`` are @@ -285,6 +295,20 @@ New Features this application doesn't need to launch dedicated worker threads for vhost enqueue/dequeue operations. +* **Added cryptodev FIPS validation example application.** + + Added an example application to parse and perform symmetric cryptography + computation to the NIST Cryptographic Algorithm Validation Program (CAVP) + test vectors. + +* **Allow unit test binary to take parameters from the environment** + + The unit test "test", or "dpdk-test", binary is often called from scripts, + which can make passing additional parameters, such as a coremask, to it more + awkward. Support has been added to the application to allow it to take + additional command-line parameter values from the "DPDK_TEST_PARAMS" + environment variable to make this application easier to use. + API Changes ----------- diff --git a/doc/guides/sample_app_ug/fips_validation.rst b/doc/guides/sample_app_ug/fips_validation.rst new file mode 100644 index 00000000..aeacfacb --- /dev/null +++ b/doc/guides/sample_app_ug/fips_validation.rst @@ -0,0 +1,132 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Federal Information Processing Standards (FIPS) CryptoDev Validation +==================================================================== + +Overview +-------- + +Federal Information Processing Standards (FIPS) are publicly announced standards +developed by the United States federal government for use in computer systems by +non-military government agencies and government contractors. + +This application is used to parse and perform symmetric cryptography +computation to the NIST Cryptographic Algorithm Validation Program (CAVP) test +vectors. + +For an algorithm implementation to be listed on a cryptographic module +validation certificate as an Approved security function, the algorithm +implementation must meet all the requirements of FIPS 140-2 and must +successfully complete the cryptographic algorithm validation process. + +Limitations +----------- + +* Only NIST CAVP request files are parsed by this application. +* The version of request file supported is ``CAVS 21.0`` +* If the header comment in a ``.req`` file does not contain a Algo tag + i.e ``AES,TDES,GCM`` you need to manually add it into the header comment for + example:: + + # VARIABLE KEY - KAT for CBC / # TDES VARIABLE KEY - KAT for CBC + +* The application does not supply the test vectors. The user is expected to + obtain the test vector files from `NIST + <https://csrc.nist.gov/projects/cryptographic-algorithm-validation- + program/block-ciphers>`_ website. To obtain the ``.req`` files you need to + email a person from the NIST website and pay for the ``.req`` files. + The ``.rsp`` files from the site can be used to validate and compare with + the ``.rsp`` files created by the FIPS application. + +* Supported test vectors + * AES-CBC (128,192,256) - GFSbox, KeySbox, MCT, MMT + * AES-GCM (128,192,256) - EncryptExtIV, Decrypt + * AES-CCM (128) - VADT, VNT, VPT, VTT, DVPT + * AES-CMAC (128) - Generate, Verify + * HMAC (SHA1, SHA224, SHA256, SHA384, SHA512) + * TDES-CBC (1 Key, 2 Keys, 3 Keys) - MMT, Monte, Permop, Subkey, Varkey, + VarText + +Application Information +----------------------- + +If a ``.req`` is used as the input file after the application is finished +running it will generate a response file or ``.rsp``. Differences between the +two files are, the ``.req`` file has missing information for instance if doing +encryption you will not have the cipher text and that will be generated in the +response file. Also if doing decryption it will not have the plain text until it +finished the work and in the response file it will be added onto the end of each +operation. + +The application can be run with a ``.rsp`` file and what the outcome of that +will be is it will add a extra line in the generated ``.rsp`` which should be +the same as the ``.rsp`` used to run the application, this is useful for +validating if the application has done the operation correctly. + + +Compiling the Application +------------------------- + +* Compile Application + + .. code-block:: console + + make -C examples/fips_validation + +* Run ``dos2unix`` on the request files + + .. code-block:: console + + dos2unix AES/req/* + dos2unix AES_GCM/req/* + dos2unix CCM/req/* + dos2unix CMAC/req/* + dos2unix HMAC/req/* + dos2unix TDES/req/* + +Running the Application +----------------------- + +The application requires a number of command line options: + + .. code-block:: console + + ./fips_validation [EAL options] + -- --req-file FILE_PATH/FOLDER_PATH + --rsp-file FILE_PATH/FOLDER_PATH + [--cryptodev DEVICE_NAME] [--cryptodev-id ID] [--path-is-folder] + +where, + * req-file: The path of the request file or folder, separated by + ``path-is-folder`` option. + + * rsp-file: The path that the response file or folder is stored. separated by + ``path-is-folder`` option. + + * cryptodev: The name of the target DPDK Crypto device to be validated. + + * cryptodev-id: The id of the target DPDK Crypto device to be validated. + + * path-is-folder: If presented the application expects req-file and rsp-file + are folder paths. + + +To run the application in linuxapp environment to test one AES FIPS test data +file for crypto_aesni_mb PMD, issue the command: + +.. code-block:: console + + $ ./fips_validation --vdev crypto_aesni_mb -- + --req-file /PATH/TO/REQUEST/FILE.req --rsp-file ./PATH/TO/RESPONSE/FILE.rsp + --cryptodev crypto_aesni_mb + +To run the application in linuxapp environment to test all AES-GCM FIPS test +data files in one folder for crypto_aesni_gcm PMD, issue the command: + +.. code-block:: console + + $ ./fips_validation --vdev crypto_aesni_gcm0 -- + --req-file /PATH/TO/REQUEST/FILE/FOLDER/ + --rsp-file ./PATH/TO/RESPONSE/FILE/FOLDER/ + --cryptodev-id 0 --path-is-folder diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst index 74b12af8..b2455e09 100644 --- a/doc/guides/sample_app_ug/index.rst +++ b/doc/guides/sample_app_ug/index.rst @@ -55,6 +55,7 @@ Sample Applications User Guides tep_termination ptpclient performance_thread + fips_validation ipsec_secgw bbdev_app diff --git a/doc/guides/sample_app_ug/qos_metering.rst b/doc/guides/sample_app_ug/qos_metering.rst index 6391841c..2e8e0175 100644 --- a/doc/guides/sample_app_ug/qos_metering.rst +++ b/doc/guides/sample_app_ug/qos_metering.rst @@ -149,3 +149,7 @@ In this particular case: * Every packet which color has improved is dropped (this particular case can't happen, so these values will not be used). * For the rest of the cases, the color is changed to red. + +.. note:: + * In color blind mode, first row GREEN colour is only valid. + * To drop the packet, policer_table action has to be set to DROP. diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst index e23079b6..056f8bb3 100644 --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst @@ -2585,13 +2585,16 @@ Add port traffic management private shaper profile Add the port traffic management private shaper profile:: testpmd> add port tm node shaper profile (port_id) (shaper_profile_id) \ - (tb_rate) (tb_size) (packet_length_adjust) + (cmit_tb_rate) (cmit_tb_size) (peak_tb_rate) (peak_tb_size) \ + (packet_length_adjust) where: * ``shaper_profile id``: Shaper profile ID for the new profile. -* ``tb_rate``: Token bucket rate (bytes per second). -* ``tb_size``: Token bucket size (bytes). +* ``cmit_tb_rate``: Committed token bucket rate (bytes per second). +* ``cmit_tb_size``: Committed token bucket size (bytes). +* ``peak_tb_rate``: Peak token bucket rate (bytes per second). +* ``peak_tb_size``: Peak token bucket size (bytes). * ``packet_length_adjust``: The value (bytes) to be added to the length of each packet for the purpose of shaping. This parameter value can be used to correct the packet length with the framing overhead bytes that are consumed @@ -3499,7 +3502,7 @@ accordingly. Possible assignment tokens are: - ``spec``: match value according to configured bit-mask. - ``last``: specify upper bound to establish a range. - ``mask``: specify bit-mask with relevant bits set to one. -- ``prefix``: generate bit-mask from a prefix length. +- ``prefix``: generate bit-mask with <prefix-length> most-significant bits set to one. These yield identical results:: diff --git a/drivers/bus/dpaa/base/fman/fman.c b/drivers/bus/dpaa/base/fman/fman.c index bdb70042..06762e0f 100644 --- a/drivers/bus/dpaa/base/fman/fman.c +++ b/drivers/bus/dpaa/base/fman/fman.c @@ -13,6 +13,7 @@ #include <fman.h> #include <of.h> #include <rte_dpaa_logs.h> +#include <rte_string_fns.h> #define QMI_PORT_REGS_OFFSET 0x400 @@ -183,7 +184,7 @@ fman_if_init(const struct device_node *dpa_node) } memset(__if, 0, sizeof(*__if)); INIT_LIST_HEAD(&__if->__if.bpool_list); - strncpy(__if->node_path, dpa_node->full_name, PATH_MAX - 1); + strlcpy(__if->node_path, dpa_node->full_name, PATH_MAX - 1); __if->node_path[PATH_MAX - 1] = '\0'; /* Obtain the MAC node used by this interface except macless */ diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 45c24ef7..c99d523f 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -349,11 +349,36 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) if (ret < 0) { rte_pci_insert_device(dev2, dev); } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - pci_name_set(dev2); - memmove(dev2->mem_resource, dev->mem_resource, - sizeof(dev->mem_resource)); + if (!rte_dev_is_probed(&dev2->device)) { + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); + memmove(dev2->mem_resource, + dev->mem_resource, + sizeof(dev->mem_resource)); + } else { + /** + * If device is plugged and driver is + * probed already, (This happens when + * we call rte_dev_probe which will + * scan all device on the bus) we don't + * need to do anything here unless... + **/ + if (dev2->kdrv != dev->kdrv || + dev2->max_vfs != dev->max_vfs) + /* + * This should not happens. + * But it is still possible if + * we unbind a device from + * vfio or uio before hotplug + * remove and rebind it with + * a different configure. + * So we just print out the + * error as an alarm. + */ + RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n", + filename); + } free(dev); } return 0; @@ -590,7 +615,16 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; - return rte_eal_check_dma_mask(mgaw) == 0 ? true : false; + /* + * Assuming there is no limitation by now. We can not know at this point + * because the memory has not been initialized yet. Setting the dma mask + * will force a check once memory initialization is done. We can not do + * a fallback to IOVA PA now, but if the dma check fails, the error + * message should advice for using '--iova-mode pa' if IOVA VA is the + * current mode. + */ + rte_mem_set_dma_mask(mgaw); + return true; } #elif defined(RTE_ARCH_PPC_64) static bool @@ -679,6 +713,7 @@ int rte_pci_read_config(const struct rte_pci_device *device, switch (device->kdrv) { case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: return pci_uio_read_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: @@ -702,6 +737,7 @@ int rte_pci_write_config(const struct rte_pci_device *device, switch (device->kdrv) { case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: return pci_uio_write_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: diff --git a/drivers/bus/vmbus/rte_bus_vmbus.h b/drivers/bus/vmbus/rte_bus_vmbus.h index 2839fef5..4cf73ce8 100644 --- a/drivers/bus/vmbus/rte_bus_vmbus.h +++ b/drivers/bus/vmbus/rte_bus_vmbus.h @@ -407,8 +407,7 @@ void rte_vmbus_unregister(struct rte_vmbus_driver *driver); /** Helper for VMBUS device registration from driver instance */ #define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \ - RTE_INIT(vmbusinitfn_ ##nm); \ - static void vmbusinitfn_ ##nm(void) \ + RTE_INIT(vmbusinitfn_ ##nm) \ { \ (vmbus_drv).driver.name = RTE_STR(nm); \ rte_vmbus_register(&vmbus_drv); \ diff --git a/drivers/compress/isal/isal_compress_pmd.c b/drivers/compress/isal/isal_compress_pmd.c index e943336b..9f1e9688 100644 --- a/drivers/compress/isal/isal_compress_pmd.c +++ b/drivers/compress/isal/isal_compress_pmd.c @@ -314,24 +314,23 @@ chained_mbuf_decompression(struct rte_comp_op *op, struct isal_comp_qp *qp) ret = isal_inflate(qp->state); - /* Check for first segment, offset needs to be accounted for */ - if (remaining_data == op->src.length) { - consumed_data = src->data_len - qp->state->avail_in - - src_remaining_offset; - } else - consumed_data = src->data_len - qp->state->avail_in; - - op->consumed += consumed_data; - remaining_data -= consumed_data; - if (ret != ISAL_DECOMP_OK) { ISAL_PMD_LOG(ERR, "Decompression operation failed\n"); op->status = RTE_COMP_OP_STATUS_ERROR; return ret; } + /* Check for first segment, offset needs to be accounted for */ + if (remaining_data == op->src.length) { + consumed_data = src->data_len - src_remaining_offset; + } else + consumed_data = src->data_len; + if (qp->state->avail_in == 0 && op->consumed != op->src.length) { + op->consumed += consumed_data; + remaining_data -= consumed_data; + if (src->next != NULL) { src = src->next; qp->state->next_in = @@ -460,8 +459,9 @@ process_isal_deflate(struct rte_comp_op *op, struct isal_comp_qp *qp, return ret; } } - op->consumed = qp->stream->total_in; - op->produced = qp->stream->total_out; + op->consumed = qp->stream->total_in; + op->produced = qp->stream->total_out; + isal_deflate_reset(qp->stream); return ret; } @@ -538,6 +538,7 @@ process_isal_inflate(struct rte_comp_op *op, struct isal_comp_qp *qp) op->consumed = op->src.length - qp->state->avail_in; } op->produced = qp->state->total_out; + isal_inflate_reset(qp->state); return ret; } diff --git a/drivers/compress/octeontx/otx_zip_pmd.c b/drivers/compress/octeontx/otx_zip_pmd.c index 67ff5066..a1651b22 100644 --- a/drivers/compress/octeontx/otx_zip_pmd.c +++ b/drivers/compress/octeontx/otx_zip_pmd.c @@ -647,10 +647,7 @@ static struct rte_pci_driver octtx_zip_pmd = { RTE_PMD_REGISTER_PCI(COMPRESSDEV_NAME_ZIP_PMD, octtx_zip_pmd); RTE_PMD_REGISTER_PCI_TABLE(COMPRESSDEV_NAME_ZIP_PMD, pci_id_octtx_zipvf_table); -RTE_INIT(octtx_zip_init_log); - -static void -octtx_zip_init_log(void) +RTE_INIT(octtx_zip_init_log) { octtx_zip_logtype_driver = rte_log_register("pmd.compress.octeontx"); if (octtx_zip_logtype_driver >= 0) diff --git a/drivers/compress/qat/qat_comp.c b/drivers/compress/qat/qat_comp.c index d70c5949..27547428 100644 --- a/drivers/compress/qat/qat_comp.c +++ b/drivers/compress/qat/qat_comp.c @@ -141,6 +141,14 @@ qat_comp_process_response(void **op, uint8_t *resp) resp_msg->comn_resp.comn_status)) != ICP_QAT_FW_COMN_STATUS_FLAG_OK) { + if (unlikely((ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET( + resp_msg->comn_resp.comn_status) != + ICP_QAT_FW_COMN_STATUS_FLAG_OK) && + (qat_xform->qat_comp_request_type + == QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS))) + QAT_DP_LOG(ERR, "QAT intermediate buffer may be too " + "small for output, try configuring a larger size"); + rx_op->status = RTE_COMP_OP_STATUS_ERROR; rx_op->debug_status = *((uint16_t *)(&resp_msg->comn_resp.comn_error)); diff --git a/drivers/compress/qat/qat_comp_pmd.c b/drivers/compress/qat/qat_comp_pmd.c index 01dd7361..ea930772 100644 --- a/drivers/compress/qat/qat_comp_pmd.c +++ b/drivers/compress/qat/qat_comp_pmd.c @@ -165,11 +165,14 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, } /* Create a memzone to hold intermediate buffers and associated - * meta-data needed by the firmware. The memzone contains: + * meta-data needed by the firmware. The memzone contains 3 parts: * - a list of num_im_sgls physical pointers to sgls - * - the num_im_sgl sgl structures, each pointing to 2 flat buffers - * - the flat buffers: num_im_sgl * 2 - * where num_im_sgls depends on the hardware generation of the device + * - the num_im_sgl sgl structures, each pointing to + * QAT_NUM_BUFS_IN_IM_SGL flat buffers + * - the flat buffers: num_im_sgl * QAT_NUM_BUFS_IN_IM_SGL + * buffers, each of buff_size + * num_im_sgls depends on the hardware generation of the device + * buff_size comes from the user via the config file */ size_of_ptr_array = num_im_sgls * sizeof(phys_addr_t); @@ -202,30 +205,31 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, offset_of_sgls + i * sizeof(struct qat_inter_sgl); struct qat_inter_sgl *sgl = (struct qat_inter_sgl *)(mz_start + curr_sgl_offset); + int lb; array_of_pointers->pointer[i] = mz_start_phys + curr_sgl_offset; sgl->num_bufs = QAT_NUM_BUFS_IN_IM_SGL; sgl->num_mapped_bufs = 0; sgl->resrvd = 0; - sgl->buffers[0].addr = mz_start_phys + offset_of_flat_buffs + - ((i * QAT_NUM_BUFS_IN_IM_SGL) * buff_size); - sgl->buffers[0].len = buff_size; - sgl->buffers[0].resrvd = 0; - sgl->buffers[1].addr = mz_start_phys + offset_of_flat_buffs + - (((i * QAT_NUM_BUFS_IN_IM_SGL) + 1) * buff_size); - sgl->buffers[1].len = buff_size; - sgl->buffers[1].resrvd = 0; #if QAT_IM_BUFFER_DEBUG QAT_LOG(DEBUG, " : phys addr of sgl[%i] in array_of_pointers" - "= 0x%"PRIx64, i, array_of_pointers->pointer[i]); + " = 0x%"PRIx64, i, array_of_pointers->pointer[i]); QAT_LOG(DEBUG, " : virt address of sgl[%i] = %p", i, sgl); - QAT_LOG(DEBUG, " : sgl->buffers[0].addr = 0x%"PRIx64", len=%d", - sgl->buffers[0].addr, sgl->buffers[0].len); - QAT_LOG(DEBUG, " : sgl->buffers[1].addr = 0x%"PRIx64", len=%d", - sgl->buffers[1].addr, sgl->buffers[1].len); +#endif + for (lb = 0; lb < QAT_NUM_BUFS_IN_IM_SGL; lb++) { + sgl->buffers[lb].addr = + mz_start_phys + offset_of_flat_buffs + + (((i * QAT_NUM_BUFS_IN_IM_SGL) + lb) * buff_size); + sgl->buffers[lb].len = buff_size; + sgl->buffers[lb].resrvd = 0; +#if QAT_IM_BUFFER_DEBUG + QAT_LOG(DEBUG, + " : sgl->buffers[%d].addr = 0x%"PRIx64", len=%d", + lb, sgl->buffers[lb].addr, sgl->buffers[lb].len); #endif } + } #if QAT_IM_BUFFER_DEBUG QAT_DP_HEXDUMP_LOG(DEBUG, "IM buffer memzone start:", mz_start, offset_of_flat_buffs + 32); diff --git a/drivers/compress/zlib/zlib_pmd.c b/drivers/compress/zlib/zlib_pmd.c index 7d6871b1..5a4d47d4 100644 --- a/drivers/compress/zlib/zlib_pmd.c +++ b/drivers/compress/zlib/zlib_pmd.c @@ -425,10 +425,8 @@ static struct rte_vdev_driver zlib_pmd_drv = { }; RTE_PMD_REGISTER_VDEV(COMPRESSDEV_NAME_ZLIB_PMD, zlib_pmd_drv); -RTE_INIT(zlib_init_log); -static void -zlib_init_log(void) +RTE_INIT(zlib_init_log) { zlib_logtype_driver = rte_log_register("pmd.compress.zlib"); if (zlib_logtype_driver >= 0) diff --git a/drivers/crypto/caam_jr/caam_jr_uio.c b/drivers/crypto/caam_jr/caam_jr_uio.c index c07d9db0..d94101c2 100644 --- a/drivers/crypto/caam_jr/caam_jr_uio.c +++ b/drivers/crypto/caam_jr/caam_jr_uio.c @@ -332,7 +332,7 @@ free_job_ring(uint32_t uio_fd) struct uio_job_ring *job_ring = NULL; int i; - if (!job_ring->uio_fd) + if (!uio_fd) return; for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 003116dc..11ea0d19 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -1843,6 +1843,9 @@ process_openssl_rsa_op(struct rte_crypto_op *cop, struct rte_crypto_asym_op *op = cop->asym; RSA *rsa = sess->u.r.rsa; uint32_t pad = (op->rsa.pad); + uint8_t *tmp; + + cop->status = RTE_CRYPTO_OP_STATUS_SUCCESS; switch (pad) { case RTE_CRYPTO_RSA_PKCS1_V1_5_BT0: @@ -1895,9 +1898,15 @@ process_openssl_rsa_op(struct rte_crypto_op *cop, break; case RTE_CRYPTO_ASYM_OP_VERIFY: + tmp = rte_malloc(NULL, op->rsa.sign.length, 0); + if (tmp == NULL) { + OPENSSL_LOG(ERR, "Memory allocation failed"); + cop->status = RTE_CRYPTO_OP_STATUS_ERROR; + break; + } ret = RSA_public_decrypt(op->rsa.sign.length, op->rsa.sign.data, - op->rsa.sign.data, + tmp, rsa, pad); @@ -1905,13 +1914,12 @@ process_openssl_rsa_op(struct rte_crypto_op *cop, "Length of public_decrypt %d " "length of message %zd\n", ret, op->rsa.message.length); - - if (memcmp(op->rsa.sign.data, op->rsa.message.data, - op->rsa.message.length)) { - OPENSSL_LOG(ERR, - "RSA sign Verification failed"); - return -1; + if ((ret <= 0) || (memcmp(tmp, op->rsa.message.data, + op->rsa.message.length))) { + OPENSSL_LOG(ERR, "RSA sign Verification failed"); + cop->status = RTE_CRYPTO_OP_STATUS_ERROR; } + rte_free(tmp); break; default: diff --git a/drivers/crypto/scheduler/scheduler_pmd.c b/drivers/crypto/scheduler/scheduler_pmd.c index 20198ccb..a1632a2b 100644 --- a/drivers/crypto/scheduler/scheduler_pmd.c +++ b/drivers/crypto/scheduler/scheduler_pmd.c @@ -369,7 +369,7 @@ parse_name_arg(const char *key __rte_unused, return -EINVAL; } - strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN); + strlcpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN); return 0; } diff --git a/drivers/net/avf/base/avf_register.h b/drivers/net/avf/base/avf_register.h index ba5a9f3f..adb98958 100644 --- a/drivers/net/avf/base/avf_register.h +++ b/drivers/net/avf/base/avf_register.h @@ -76,7 +76,7 @@ POSSIBILITY OF SUCH DAMAGE. #define AVF_ARQLEN1_ARQCRIT_SHIFT 30 #define AVF_ARQLEN1_ARQCRIT_MASK AVF_MASK(0x1, AVF_ARQLEN1_ARQCRIT_SHIFT) #define AVF_ARQLEN1_ARQENABLE_SHIFT 31 -#define AVF_ARQLEN1_ARQENABLE_MASK AVF_MASK(0x1, AVF_ARQLEN1_ARQENABLE_SHIFT) +#define AVF_ARQLEN1_ARQENABLE_MASK AVF_MASK(0x1U, AVF_ARQLEN1_ARQENABLE_SHIFT) #define AVF_ARQT1 0x00007000 /* Reset: EMPR */ #define AVF_ARQT1_ARQT_SHIFT 0 #define AVF_ARQT1_ARQT_MASK AVF_MASK(0x3FF, AVF_ARQT1_ARQT_SHIFT) @@ -99,7 +99,7 @@ POSSIBILITY OF SUCH DAMAGE. #define AVF_ATQLEN1_ATQCRIT_SHIFT 30 #define AVF_ATQLEN1_ATQCRIT_MASK AVF_MASK(0x1, AVF_ATQLEN1_ATQCRIT_SHIFT) #define AVF_ATQLEN1_ATQENABLE_SHIFT 31 -#define AVF_ATQLEN1_ATQENABLE_MASK AVF_MASK(0x1, AVF_ATQLEN1_ATQENABLE_SHIFT) +#define AVF_ATQLEN1_ATQENABLE_MASK AVF_MASK(0x1U, AVF_ATQLEN1_ATQENABLE_SHIFT) #define AVF_ATQT1 0x00008400 /* Reset: EMPR */ #define AVF_ATQT1_ATQT_SHIFT 0 #define AVF_ATQT1_ATQT_MASK AVF_MASK(0x3FF, AVF_ATQT1_ATQT_SHIFT) diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c index c7bc8848..1bfc63d9 100644 --- a/drivers/net/bnxt/bnxt_rxr.c +++ b/drivers/net/bnxt/bnxt_rxr.c @@ -102,25 +102,6 @@ static inline void bnxt_reuse_rx_mbuf(struct bnxt_rx_ring_info *rxr, rxr->rx_prod = prod; } -#ifdef BNXT_DEBUG -static void bnxt_reuse_ag_mbuf(struct bnxt_rx_ring_info *rxr, uint16_t cons, - struct rte_mbuf *mbuf) -{ - uint16_t prod = rxr->ag_prod; - struct bnxt_sw_rx_bd *prod_rx_buf; - struct rx_prod_pkt_bd *prod_bd, *cons_bd; - - prod_rx_buf = &rxr->ag_buf_ring[prod]; - - prod_rx_buf->mbuf = mbuf; - - prod_bd = &rxr->ag_desc_ring[prod]; - cons_bd = &rxr->ag_desc_ring[cons]; - - prod_bd->address = cons_bd->addr; -} -#endif - static inline struct rte_mbuf *bnxt_consume_rx_buf(struct bnxt_rx_ring_info *rxr, uint16_t cons) @@ -377,9 +358,6 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, uint32_t tmp_raw_cons = *raw_cons; uint16_t cons, prod, cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons); -#ifdef BNXT_DEBUG - uint16_t ag_cons; -#endif struct rte_mbuf *mbuf; int rc = 0; uint8_t agg_buf = 0; @@ -482,8 +460,6 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, if (rxcmp1->errors_v2 & RX_CMP_L2_ERRORS) { /* Re-install the mbuf back to the rx ring */ bnxt_reuse_rx_mbuf(rxr, cons, mbuf); - if (agg_buf) - bnxt_reuse_ag_mbuf(rxr, ag_cons, mbuf); rc = -EIO; goto next_rx; diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index 156f31c6..1a6d8e4d 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -3216,8 +3216,6 @@ bond_probe(struct rte_vdev_device *dev) internals = rte_eth_devices[port_id].data->dev_private; internals->kvlist = kvlist; - rte_eth_dev_probing_finish(&rte_eth_devices[port_id]); - if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { if (rte_kvargs_process(kvlist, PMD_BOND_AGG_MODE_KVARG, @@ -3230,12 +3228,12 @@ bond_probe(struct rte_vdev_device *dev) } if (internals->mode == BONDING_MODE_8023AD) - rte_eth_bond_8023ad_agg_selection_set(port_id, - agg_mode); + internals->mode4.agg_selection = agg_mode; } else { - rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE); + internals->mode4.agg_selection = AGG_STABLE; } + rte_eth_dev_probing_finish(&rte_eth_devices[port_id]); RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on " "socket %u.", name, port_id, bonding_mode, socket_id); return 0; diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c index 25ff5f68..ab0a80e1 100644 --- a/drivers/net/e1000/igb_rxtx.c +++ b/drivers/net/e1000/igb_rxtx.c @@ -50,6 +50,10 @@ #endif /* Bit Mask to indicate what bits required for building TX context */ #define IGB_TX_OFFLOAD_MASK ( \ + PKT_TX_OUTER_IPV6 | \ + PKT_TX_OUTER_IPV4 | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ PKT_TX_VLAN_PKT | \ PKT_TX_IP_CKSUM | \ PKT_TX_L4_MASK | \ diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index 0c0ed930..abe1e7bd 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -509,6 +509,8 @@ err: static void ena_close(struct rte_eth_dev *dev) { + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ena_adapter *adapter = (struct ena_adapter *)(dev->data->dev_private); @@ -518,6 +520,25 @@ static void ena_close(struct rte_eth_dev *dev) ena_rx_queue_release_all(dev); ena_tx_queue_release_all(dev); + + rte_free(adapter->drv_stats); + adapter->drv_stats = NULL; + + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + ena_interrupt_handler_rte, + adapter); + + /* + * Pass the information to the rte_eth_dev_close() that it should also + * release the private port resources. + */ + dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + /* + * MAC is not allocated dynamically. Setting NULL should prevent from + * release of the resource in the rte_eth_dev_release_port(). + */ + dev->data->mac_addrs = NULL; } static int @@ -1683,8 +1704,6 @@ err: static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); - struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ena_adapter *adapter = (struct ena_adapter *)(eth_dev->data->dev_private); @@ -1699,14 +1718,6 @@ static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->tx_pkt_burst = NULL; eth_dev->tx_pkt_prepare = NULL; - rte_free(adapter->drv_stats); - adapter->drv_stats = NULL; - - rte_intr_disable(intr_handle); - rte_intr_callback_unregister(intr_handle, - ena_interrupt_handler_rte, - adapter); - adapter->state = ENA_ADAPTER_STATE_FREE; return 0; diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index e81c3f3b..c3869de3 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -518,7 +518,7 @@ static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx) * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used * used when that file is not compiled. */ -bool __attribute__((weak)) +__rte_weak bool enic_use_vector_rx_handler(__rte_unused struct enic *enic) { return false; diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 8bfa2517..e1152ff0 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -69,7 +69,7 @@ I40E_TX_IEEE1588_TMST) #define I40E_TX_OFFLOAD_NOTSUP_MASK \ - (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + ~(PKT_TX_OFFLOAD_MASK & I40E_TX_OFFLOAD_MASK) static inline void i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp) @@ -1741,6 +1741,11 @@ i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev, ad->rx_bulk_alloc_allowed = false; i40e_set_rx_function(dev); return 0; + } else if (ad->rx_vec_allowed && !rte_is_power_of_2(rxq->nb_rx_desc)) { + PMD_DRV_LOG(ERR, "Vector mode is allowed, but descriptor" + " number %d of queue %d isn't power of 2", + rxq->nb_rx_desc, rxq->queue_id); + return -EINVAL; } /* check bulk alloc conflict */ diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h index f00f6d64..0e6ffa00 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_common.h +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -192,8 +192,13 @@ static inline int i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev) { #ifndef RTE_LIBRTE_IEEE1588 + struct i40e_adapter *ad = + I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; + struct i40e_rx_queue *rxq; + uint16_t desc, i; + bool first_queue; /* no fdir support */ if (fconf->mode != RTE_FDIR_MODE_NONE) @@ -207,6 +212,39 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev) if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) return -1; + /** + * Vector mode is allowed only when number of Rx queue + * descriptor is power of 2. + */ + if (!dev->data->dev_started) { + first_queue = true; + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (!rxq) + continue; + desc = rxq->nb_rx_desc; + if (first_queue) + ad->rx_vec_allowed = + rte_is_power_of_2(desc); + else + ad->rx_vec_allowed = + ad->rx_vec_allowed ? + rte_is_power_of_2(desc) : + ad->rx_vec_allowed; + first_queue = false; + } + } else { + /* Only check the first queue's descriptor number */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (!rxq) + continue; + desc = rxq->nb_rx_desc; + ad->rx_vec_allowed = rte_is_power_of_2(desc); + break; + } + } + return 0; #else RTE_SET_USED(dev); diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 269595b7..c9e82d51 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -220,6 +220,8 @@ static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev); static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev); static void ixgbe_dev_interrupt_handler(void *param); static void ixgbe_dev_interrupt_delayed_handler(void *param); +static void ixgbe_dev_setup_link_alarm_handler(void *param); + static int ixgbe_add_rar(struct rte_eth_dev *dev, struct ether_addr *mac_addr, uint32_t index, uint32_t pool); static void ixgbe_remove_rar(struct rte_eth_dev *dev, uint32_t index); @@ -1303,7 +1305,7 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return -EPERM; + return 0; hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); @@ -1702,7 +1704,7 @@ eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev) PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return -EPERM; + return 0; hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); @@ -2793,6 +2795,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); + rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); + /* disable interrupts */ ixgbe_disable_intr(hw); @@ -3971,6 +3975,25 @@ out: return ret_val; } +static void +ixgbe_dev_setup_link_alarm_handler(void *param) +{ + struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_interrupt *intr = + IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + u32 speed; + bool autoneg = false; + + speed = hw->phy.autoneg_advertised; + if (!speed) + ixgbe_get_link_capabilities(hw, &speed, &autoneg); + + ixgbe_setup_link(hw, speed, true); + + intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; +} + /* return 0 means link status changed, -1 means not changed */ int ixgbe_dev_link_update_share(struct rte_eth_dev *dev, @@ -3983,9 +4006,7 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); int link_up; int diag; - u32 speed = 0; int wait = 1; - bool autoneg = false; memset(&link, 0, sizeof(link)); link.link_status = ETH_LINK_DOWN; @@ -3995,13 +4016,8 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, hw->mac.get_link_status = true; - if ((intr->flags & IXGBE_FLAG_NEED_LINK_CONFIG) && - ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) { - speed = hw->phy.autoneg_advertised; - if (!speed) - ixgbe_get_link_capabilities(hw, &speed, &autoneg); - ixgbe_setup_link(hw, speed, true); - } + if (intr->flags & IXGBE_FLAG_NEED_LINK_CONFIG) + return rte_eth_linkstatus_set(dev, &link); /* check if it needs to wait to complete, if lsc interrupt is enabled */ if (wait_to_complete == 0 || dev->data->dev_conf.intr_conf.lsc != 0) @@ -4019,11 +4035,14 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, } if (link_up == 0) { - intr->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; + if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) { + intr->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; + rte_eal_alarm_set(10, + ixgbe_dev_setup_link_alarm_handler, dev); + } return rte_eth_linkstatus_set(dev, &link); } - intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; link.link_status = ETH_LINK_UP; link.link_duplex = ETH_LINK_FULL_DUPLEX; @@ -5128,6 +5147,8 @@ ixgbevf_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); + rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); + ixgbevf_intr_disable(dev); hw->adapter_stopped = 1; diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index fecb57c1..7a50bccd 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -51,6 +51,7 @@ CFLAGS += -D_DEFAULT_SOURCE CFLAGS += -D_XOPEN_SOURCE=600 CFLAGS += $(WERROR_FLAGS) CFLAGS += -Wno-strict-prototypes +CFLAGS += $(shell pkg-config --cflags libmnl) ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"' CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"' @@ -59,7 +60,7 @@ LDLIBS += -ldl else LDLIBS += -libverbs -lmlx5 endif -LDLIBS += -lmnl +LDLIBS += $(shell pkg-config --libs libmnl) LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci @@ -137,9 +138,14 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_IBV_MLX5_MOD_CQE_128B_PAD \ + infiniband/mlx5dv.h \ + enum MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_IBV_FLOW_DV_SUPPORT \ infiniband/mlx5dv.h \ - enum MLX5DV_FLOW_ACTION_TAG \ + func mlx5dv_create_flow_action_packet_reformat \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ HAVE_ETHTOOL_LINK_MODE_25G \ @@ -212,6 +218,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum IFLA_PHYS_PORT_NAME \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_IFLA_VXLAN_COLLECT_METADATA \ + linux/if_link.h \ + enum IFLA_VXLAN_COLLECT_METADATA \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_TCA_CHAIN \ linux/rtnetlink.h \ enum TCA_CHAIN \ @@ -372,6 +383,86 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum TCA_VLAN_PUSH_VLAN_PRIORITY \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_KEY_ID \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_KEY_ID \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_SRC \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_DST \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_SRC \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_DST \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TC_ACT_TUNNEL_KEY \ + linux/tc_act/tc_tunnel_key.h \ + define TCA_ACT_TUNNEL_KEY \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT \ + linux/tc_act/tc_tunnel_key.h \ + enum TCA_TUNNEL_KEY_ENC_DST_PORT \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_TUNNEL_KEY_NO_CSUM \ + linux/tc_act/tc_tunnel_key.h \ + enum TCA_TUNNEL_KEY_NO_CSUM \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_TC_ACT_PEDIT \ linux/tc_act/tc_pedit.h \ enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \ diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index e8cbe3ee..28938db0 100644 --- a/drivers/net/mlx5/meson.build +++ b/drivers/net/mlx5/meson.build @@ -96,8 +96,10 @@ if build 'MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED' ], [ 'HAVE_IBV_MLX5_MOD_CQE_128B_COMP', 'infiniband/mlx5dv.h', 'MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP' ], + [ 'HAVE_IBV_MLX5_MOD_CQE_128B_PAD', 'infiniband/mlx5dv.h', + 'MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD' ], [ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h', - 'MLX5DV_FLOW_ACTION_TAG' ], + 'mlx5dv_create_flow_action_packet_reformat' ], [ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h', 'IBV_FLOW_SPEC_MPLS' ], [ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h', @@ -128,6 +130,8 @@ if build 'IFLA_PHYS_SWITCH_ID' ], [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', 'IFLA_PHYS_PORT_NAME' ], + [ 'HAVE_IFLA_VXLAN_COLLECT_METADATA', 'linux/if_link.h', + 'IFLA_VXLAN_COLLECT_METADATA' ], [ 'HAVE_TCA_CHAIN', 'linux/rtnetlink.h', 'TCA_CHAIN' ], [ 'HAVE_TCA_FLOWER_ACT', 'linux/pkt_cls.h', @@ -192,6 +196,38 @@ if build 'TC_ACT_GOTO_CHAIN' ], [ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h', 'TCA_VLAN_PUSH_VLAN_PRIORITY' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_KEY_ID', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_KEY_ID' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV4_SRC' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV4_DST' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV4_DST_MASK' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV6_SRC' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV6_DST' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_IPV6_DST_MASK' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_UDP_SRC_PORT' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_UDP_DST_PORT' ], + [ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK', 'linux/pkt_cls.h', + 'TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK' ], + [ 'HAVE_TC_ACT_TUNNEL_KEY', 'linux/tc_act/tc_tunnel_key.h', + 'TCA_ACT_TUNNEL_KEY' ], + [ 'HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT', 'linux/tc_act/tc_tunnel_key.h', + 'TCA_TUNNEL_KEY_ENC_DST_PORT' ], + [ 'HAVE_TCA_TUNNEL_KEY_NO_CSUM', 'linux/tc_act/tc_tunnel_key.h', + 'TCA_TUNNEL_KEY_NO_CSUM' ], [ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h', 'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ], [ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h', diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index a277b573..ed1fcfc7 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -51,6 +51,9 @@ /* Device parameter to enable RX completion queue compression. */ #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" +/* Device parameter to enable RX completion entry padding to 128B. */ +#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en" + /* Device parameter to enable Multi-Packet Rx queue. */ #define MLX5_RX_MPRQ_EN "mprq_en" @@ -72,6 +75,12 @@ */ #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" +/* + * Device parameter to configure the number of TX queues threshold for + * enabling vectorized Tx. + */ +#define MLX5_TXQS_MAX_VEC "txqs_max_vec" + /* Device parameter to enable multi-packet send WQEs. */ #define MLX5_TXQ_MPW_EN "txq_mpw_en" @@ -390,6 +399,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .filter_ctrl = mlx5_dev_filter_ctrl, .rx_descriptor_status = mlx5_rx_descriptor_status, .tx_descriptor_status = mlx5_tx_descriptor_status, + .rx_queue_count = mlx5_rx_queue_count, .rx_queue_intr_enable = mlx5_rx_intr_enable, .rx_queue_intr_disable = mlx5_rx_intr_disable, .is_removed = mlx5_is_removed, @@ -479,6 +489,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { config->cqe_comp = !!tmp; + } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) { + config->cqe_pad = !!tmp; } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { config->mprq.enabled = !!tmp; } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { @@ -491,6 +503,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) config->txq_inline = tmp; } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { config->txqs_inline = tmp; + } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { + config->txqs_vec = tmp; } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { config->mps = !!tmp; } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { @@ -531,12 +545,14 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) { const char **params = (const char *[]){ MLX5_RXQ_CQE_COMP_EN, + MLX5_RXQ_CQE_PAD_EN, MLX5_RX_MPRQ_EN, MLX5_RX_MPRQ_LOG_STRIDE_NUM, MLX5_RX_MPRQ_MAX_MEMCPY_LEN, MLX5_RXQS_MIN_MPRQ, MLX5_TXQ_INLINE, MLX5_TXQS_MIN_INLINE, + MLX5_TXQS_MAX_VEC, MLX5_TXQ_MPW_EN, MLX5_TXQ_MPW_HDR_DSEG_EN, MLX5_TXQ_MAX_INLINE_LEN, @@ -698,8 +714,8 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev) * Backing DPDK device. * @param ibv_dev * Verbs device. - * @param vf - * If nonzero, enable VF-specific features. + * @param config + * Device configuration parameters. * @param[in] switch_info * Switch properties of Ethernet device. * @@ -713,7 +729,7 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev) static struct rte_eth_dev * mlx5_dev_spawn(struct rte_device *dpdk_dev, struct ibv_device *ibv_dev, - int vf, + struct mlx5_dev_config config, const struct mlx5_switch_info *switch_info) { struct ibv_context *ctx; @@ -721,28 +737,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, struct ibv_port_attr port_attr; struct ibv_pd *pd = NULL; struct mlx5dv_context dv_attr = { .comp_mask = 0 }; - struct mlx5_dev_config config = { - .vf = !!vf, - .mps = MLX5_ARG_UNSET, - .tx_vec_en = 1, - .rx_vec_en = 1, - .mpw_hdr_dseg = 0, - .txq_inline = MLX5_ARG_UNSET, - .txqs_inline = MLX5_ARG_UNSET, - .inline_max_packet_sz = MLX5_ARG_UNSET, - .vf_nl_en = 1, - .mprq = { - .enabled = 0, - .stride_num_n = MLX5_MPRQ_STRIDE_NUM_N, - .max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN, - .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, - }, - }; struct rte_eth_dev *eth_dev = NULL; struct priv *priv = NULL; int err = 0; unsigned int mps; unsigned int cqe_comp; + unsigned int cqe_pad = 0; unsigned int tunnel_en = 0; unsigned int mpls_en = 0; unsigned int swp = 0; @@ -863,6 +863,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, else cqe_comp = 1; config.cqe_comp = cqe_comp; +#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD + /* Whether device supports 128B Rx CQE padding. */ + cqe_pad = RTE_CACHE_LINE_SIZE == 128 && + (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD); +#endif #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) { tunnel_en = ((dv_attr.tunnel_offloads_caps & @@ -1079,6 +1084,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, DRV_LOG(WARNING, "Rx CQE compression isn't supported"); config.cqe_comp = 0; } + if (config.cqe_pad && !cqe_pad) { + DRV_LOG(WARNING, "Rx CQE padding isn't supported"); + config.cqe_pad = 0; + } else if (config.cqe_pad) { + DRV_LOG(INFO, "Rx CQE padding is enabled"); + } if (config.mprq.enabled && mprq) { if (config.mprq.stride_num_n > mprq_max_stride_num_n || config.mprq.stride_num_n < mprq_min_stride_num_n) { @@ -1157,7 +1168,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, eth_dev->dev_ops = &mlx5_dev_ops; /* Register MAC address. */ claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); - if (vf && config.vf_nl_en) + if (config.vf && config.vf_nl_en) mlx5_nl_mac_addr_sync(eth_dev); priv->tcf_context = mlx5_flow_tcf_context_create(); if (!priv->tcf_context) { @@ -1326,7 +1337,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, { struct ibv_device **ibv_list; unsigned int n = 0; - int vf; + struct mlx5_dev_config dev_config; int ret; assert(pci_drv == &mlx5_driver); @@ -1424,21 +1435,46 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, */ if (n) qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp); + /* Default configuration. */ + dev_config = (struct mlx5_dev_config){ + .mps = MLX5_ARG_UNSET, + .tx_vec_en = 1, + .rx_vec_en = 1, + .txq_inline = MLX5_ARG_UNSET, + .txqs_inline = MLX5_ARG_UNSET, + .txqs_vec = MLX5_ARG_UNSET, + .inline_max_packet_sz = MLX5_ARG_UNSET, + .vf_nl_en = 1, + .mprq = { + .enabled = 0, /* Disabled by default. */ + .stride_num_n = MLX5_MPRQ_STRIDE_NUM_N, + .max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN, + .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, + }, + }; + /* Device speicific configuration. */ switch (pci_dev->id.device_id) { + case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF: + dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD; + break; case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: - vf = 1; + dev_config.vf = 1; break; default: - vf = 0; + break; } + /* Set architecture-dependent default value if unset. */ + if (dev_config.txqs_vec == MLX5_ARG_UNSET) + dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS; for (i = 0; i != n; ++i) { uint32_t restore; - list[i].eth_dev = mlx5_dev_spawn - (&pci_dev->device, list[i].ibv_dev, vf, &list[i].info); + list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, + list[i].ibv_dev, dev_config, + &list[i].info); if (!list[i].eth_dev) { if (rte_errno != EBUSY && rte_errno != EEXIST) break; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 74d87c05..bc500b2b 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -115,6 +115,7 @@ struct mlx5_dev_config { /* Whether tunnel stateless offloads are supported. */ unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */ unsigned int cqe_comp:1; /* CQE compression is enabled. */ + unsigned int cqe_pad:1; /* CQE padding is enabled. */ unsigned int tso:1; /* Whether TSO is supported. */ unsigned int tx_vec_en:1; /* Tx vector is enabled. */ unsigned int rx_vec_en:1; /* Rx vector is enabled. */ @@ -139,6 +140,7 @@ struct mlx5_dev_config { unsigned int ind_table_max_size; /* Maximum indirection table size. */ int txq_inline; /* Maximum packet size for inlining. */ int txqs_inline; /* Queue number threshold for inlining. */ + int txqs_vec; /* Queue number threshold for vectorized Tx. */ int inline_max_packet_sz; /* Max packet size for inlining. */ }; @@ -219,6 +221,7 @@ struct priv { /* Verbs Indirection tables. */ LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls; LIST_HEAD(matchers, mlx5_flow_dv_matcher) matchers; + LIST_HEAD(encap_decap, mlx5_flow_dv_encap_decap_resource) encaps_decaps; uint32_t link_speed_capa; /* Link speed capabilities. */ struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ int primary_socket; /* Unix socket for primary process. */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index f2a16795..bfe66558 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -60,8 +60,13 @@ /* Maximum Packet headers size (L2+L3+L4) for TSO. */ #define MLX5_MAX_TSO_HEADER 192 -/* Default minimum number of Tx queues for vectorized Tx. */ -#define MLX5_VPMD_MIN_TXQS 4 +/* Default maximum number of Tx queues for vectorized Tx. */ +#if defined(RTE_ARCH_ARM64) +#define MLX5_VPMD_MAX_TXQS 8 +#else +#define MLX5_VPMD_MAX_TXQS 4 +#endif +#define MLX5_VPMD_MAX_TXQS_BLUEFIELD 16 /* Threshold of buffer replenishment for vectorized Rx. */ #define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 280af0ab..3c2ac4b3 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -239,7 +239,6 @@ static const struct rte_flow_ops mlx5_flow_ops = { /* Convert FDIR request to Generic flow. */ struct mlx5_fdir { struct rte_flow_attr attr; - struct rte_flow_action actions[2]; struct rte_flow_item items[4]; struct rte_flow_item_eth l2; struct rte_flow_item_eth l2_mask; @@ -259,6 +258,7 @@ struct mlx5_fdir { struct rte_flow_item_udp udp; struct rte_flow_item_tcp tcp; } l4_mask; + struct rte_flow_action actions[2]; struct rte_flow_action_queue queue; }; @@ -275,7 +275,7 @@ static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { /* Tunnel information. */ struct mlx5_flow_tunnel_info { - uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ + uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ }; @@ -912,7 +912,13 @@ mlx5_flow_validate_action_rss(const struct rte_flow_action *action, RTE_FLOW_ERROR_TYPE_ACTION_CONF, &rss->level, "tunnel RSS is not supported"); - if (rss->key_len < MLX5_RSS_HASH_KEY_LEN) + /* allow RSS key_len 0 in case of NULL (default) RSS key. */ + if (rss->key_len == 0 && rss->key != NULL) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + &rss->key_len, + "RSS hash key length 0"); + if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF, &rss->key_len, @@ -1046,15 +1052,13 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item, }; int ret; int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; - if (item_flags & MLX5_FLOW_LAYER_OUTER_L2) + if (item_flags & ethm) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, - "3 levels of l2 are not supported"); - if ((item_flags & MLX5_FLOW_LAYER_INNER_L2) && !tunnel) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM, item, - "2 L2 without tunnel are not supported"); + "multiple L2 layers not supported"); if (!mask) mask = &rte_flow_item_eth_mask; ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, @@ -1079,7 +1083,7 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item, */ int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, - int64_t item_flags, + uint64_t item_flags, struct rte_flow_error *error) { const struct rte_flow_item_vlan *spec = item->spec; @@ -1091,17 +1095,17 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, uint16_t vlan_tag = 0; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); int ret; - const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | + const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | MLX5_FLOW_LAYER_INNER_L4) : (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4); - const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : + const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : MLX5_FLOW_LAYER_OUTER_VLAN; if (item_flags & vlanm) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, - "VLAN layer already configured"); + "multiple VLAN layers not supported"); else if ((item_flags & l34m) != 0) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, @@ -1145,7 +1149,7 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, */ int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, - int64_t item_flags, + uint64_t item_flags, struct rte_flow_error *error) { const struct rte_flow_item_ipv4 *mask = item->mask; @@ -1158,15 +1162,17 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, }, }; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : + MLX5_FLOW_LAYER_OUTER_L3; + const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : + MLX5_FLOW_LAYER_OUTER_L4; int ret; - if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : - MLX5_FLOW_LAYER_OUTER_L3)) + if (item_flags & l3m) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, "multiple L3 layers not supported"); - else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : - MLX5_FLOW_LAYER_OUTER_L4)) + else if (item_flags & l4m) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 cannot follow an L4 layer."); @@ -1214,15 +1220,17 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, }, }; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : + MLX5_FLOW_LAYER_OUTER_L3; + const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : + MLX5_FLOW_LAYER_OUTER_L4; int ret; - if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : - MLX5_FLOW_LAYER_OUTER_L3)) + if (item_flags & l3m) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, "multiple L3 layers not supported"); - else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : - MLX5_FLOW_LAYER_OUTER_L4)) + else if (item_flags & l4m) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 cannot follow an L4 layer."); @@ -1273,6 +1281,10 @@ mlx5_flow_validate_item_udp(const struct rte_flow_item *item, { const struct rte_flow_item_udp *mask = item->mask; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : + MLX5_FLOW_LAYER_OUTER_L3; + const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : + MLX5_FLOW_LAYER_OUTER_L4; int ret; if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) @@ -1280,16 +1292,14 @@ mlx5_flow_validate_item_udp(const struct rte_flow_item *item, RTE_FLOW_ERROR_TYPE_ITEM, item, "protocol filtering not compatible" " with UDP layer"); - if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : - MLX5_FLOW_LAYER_OUTER_L3))) + if (!(item_flags & l3m)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 is mandatory to filter on L4"); - if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : - MLX5_FLOW_LAYER_OUTER_L4)) + if (item_flags & l4m) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, - "L4 layer is already present"); + "multiple L4 layers not supported"); if (!mask) mask = &rte_flow_item_udp_mask; ret = mlx5_flow_item_acceptable @@ -1325,6 +1335,10 @@ mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, { const struct rte_flow_item_tcp *mask = item->mask; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : + MLX5_FLOW_LAYER_OUTER_L3; + const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : + MLX5_FLOW_LAYER_OUTER_L4; int ret; assert(flow_mask); @@ -1333,16 +1347,14 @@ mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, RTE_FLOW_ERROR_TYPE_ITEM, item, "protocol filtering not compatible" " with TCP layer"); - if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : - MLX5_FLOW_LAYER_OUTER_L3))) + if (!(item_flags & l3m)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 is mandatory to filter on L4"); - if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : - MLX5_FLOW_LAYER_OUTER_L4)) + if (item_flags & l4m) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, - "L4 layer is already present"); + "multiple L4 layers not supported"); if (!mask) mask = &rte_flow_item_tcp_mask; ret = mlx5_flow_item_acceptable @@ -1387,7 +1399,8 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, if (item_flags & MLX5_FLOW_LAYER_TUNNEL) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, - "a tunnel is already present"); + "multiple tunnel layers not" + " supported"); /* * Verify only UDPv4 is present as defined in * https://tools.ietf.org/html/rfc7348 @@ -1473,7 +1486,8 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, if (item_flags & MLX5_FLOW_LAYER_TUNNEL) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, - "a tunnel is already present"); + "multiple tunnel layers not" + " supported"); /* * Verify only UDPv4 is present as defined in * https://tools.ietf.org/html/rfc7348 @@ -1556,7 +1570,8 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item, if (item_flags & MLX5_FLOW_LAYER_TUNNEL) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, - "a tunnel is already present"); + "multiple tunnel layers not" + " supported"); if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, @@ -1610,11 +1625,13 @@ mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused, RTE_FLOW_ERROR_TYPE_ITEM, item, "protocol filtering not compatible" " with MPLS layer"); - if (item_flags & MLX5_FLOW_LAYER_TUNNEL) + /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ + if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && + !(item_flags & MLX5_FLOW_LAYER_GRE)) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, - "a tunnel is already" - " present"); + "multiple tunnel layers not" + " supported"); if (!mask) mask = &rte_flow_item_mpls_mask; ret = mlx5_flow_item_acceptable @@ -1646,8 +1663,6 @@ static struct mlx5_flow * flow_null_prepare(const struct rte_flow_attr *attr __rte_unused, const struct rte_flow_item items[] __rte_unused, const struct rte_flow_action actions[] __rte_unused, - uint64_t *item_flags __rte_unused, - uint64_t *action_flags __rte_unused, struct rte_flow_error *error __rte_unused) { rte_errno = ENOTSUP; @@ -1775,16 +1790,19 @@ flow_drv_validate(struct rte_eth_dev *dev, * calculates the size of memory required for device flow, allocates the memory, * initializes the device flow and returns the pointer. * + * @note + * This function initializes device flow structure such as dv, tcf or verbs in + * struct mlx5_flow. However, it is caller's responsibility to initialize the + * rest. For example, adding returning device flow to flow->dev_flow list and + * setting backward reference to the flow should be done out of this function. + * layers field is not filled either. + * * @param[in] attr * Pointer to the flow attributes. * @param[in] items * Pointer to the list of items. * @param[in] actions * Pointer to the list of actions. - * @param[out] item_flags - * Pointer to bit mask of all items detected. - * @param[out] action_flags - * Pointer to bit mask of all actions detected. * @param[out] error * Pointer to the error structure. * @@ -1792,12 +1810,10 @@ flow_drv_validate(struct rte_eth_dev *dev, * Pointer to device flow on success, otherwise NULL and rte_ernno is set. */ static inline struct mlx5_flow * -flow_drv_prepare(struct rte_flow *flow, +flow_drv_prepare(const struct rte_flow *flow, const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], - uint64_t *item_flags, - uint64_t *action_flags, struct rte_flow_error *error) { const struct mlx5_flow_driver_ops *fops; @@ -1805,8 +1821,7 @@ flow_drv_prepare(struct rte_flow *flow, assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); fops = flow_get_drv_ops(type); - return fops->prepare(attr, items, actions, item_flags, action_flags, - error); + return fops->prepare(attr, items, actions, error); } /** @@ -1815,6 +1830,12 @@ flow_drv_prepare(struct rte_flow *flow, * translates a generic flow into a driver flow. flow_drv_prepare() must * precede. * + * @note + * dev_flow->layers could be filled as a result of parsing during translation + * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled + * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, + * flow->actions could be overwritten even though all the expanded dev_flows + * have the same actions. * * @param[in] dev * Pointer to the rte dev structure. @@ -1878,7 +1899,7 @@ flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, * Flow driver remove API. This abstracts calling driver specific functions. * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow * on device. All the resources of the flow should be freed by calling - * flow_dv_destroy(). + * flow_drv_destroy(). * * @param[in] dev * Pointer to Ethernet device. @@ -2009,8 +2030,6 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list, { struct rte_flow *flow = NULL; struct mlx5_flow *dev_flow; - uint64_t action_flags = 0; - uint64_t item_flags = 0; const struct rte_flow_action_rss *rss; union { struct rte_flow_expand_rss buf; @@ -2053,16 +2072,10 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list, } for (i = 0; i < buf->entries; ++i) { dev_flow = flow_drv_prepare(flow, attr, buf->entry[i].pattern, - actions, &item_flags, &action_flags, - error); + actions, error); if (!dev_flow) goto error; dev_flow->flow = flow; - dev_flow->layers = item_flags; - /* Store actions once as expanded flows have same actions. */ - if (i == 0) - flow->actions = action_flags; - assert(flow->actions == action_flags); LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next); ret = flow_drv_translate(dev, dev_flow, attr, buf->entry[i].pattern, @@ -2127,6 +2140,7 @@ flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, */ if (dev->data->dev_started) flow_rxq_flags_trim(dev, flow); + rte_free(flow->fdir); rte_free(flow); } @@ -2444,7 +2458,7 @@ mlx5_flow_query(struct rte_eth_dev *dev, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_fdir_filter_convert(struct rte_eth_dev *dev, +flow_fdir_filter_convert(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter, struct mlx5_fdir *attributes) { @@ -2616,6 +2630,69 @@ mlx5_fdir_filter_convert(struct rte_eth_dev *dev, return 0; } +#define FLOW_FDIR_CMP(f1, f2, fld) \ + memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) + +/** + * Compare two FDIR flows. If items and actions are identical, the two flows are + * regarded as same. + * + * @param dev + * Pointer to Ethernet device. + * @param f1 + * FDIR flow to compare. + * @param f2 + * FDIR flow to compare. + * + * @return + * Zero on match, 1 otherwise. + */ +static int +flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) +{ + if (FLOW_FDIR_CMP(f1, f2, attr) || + FLOW_FDIR_CMP(f1, f2, l2) || + FLOW_FDIR_CMP(f1, f2, l2_mask) || + FLOW_FDIR_CMP(f1, f2, l3) || + FLOW_FDIR_CMP(f1, f2, l3_mask) || + FLOW_FDIR_CMP(f1, f2, l4) || + FLOW_FDIR_CMP(f1, f2, l4_mask) || + FLOW_FDIR_CMP(f1, f2, actions[0])) + return 1; + if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && + FLOW_FDIR_CMP(f1, f2, queue)) + return 1; + return 0; +} + +/** + * Search device flow list to find out a matched FDIR flow. + * + * @param dev + * Pointer to Ethernet device. + * @param fdir_flow + * FDIR flow to lookup. + * + * @return + * Pointer of flow if found, NULL otherwise. + */ +static struct rte_flow * +flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) +{ + struct priv *priv = dev->data->dev_private; + struct rte_flow *flow = NULL; + + assert(fdir_flow); + TAILQ_FOREACH(flow, &priv->flows, next) { + if (flow->fdir && !flow_fdir_cmp(flow->fdir, fdir_flow)) { + DRV_LOG(DEBUG, "port %u found FDIR flow %p", + dev->data->port_id, (void *)flow); + break; + } + } + return flow; +} + /** * Add new flow director filter and store it in list. * @@ -2628,32 +2705,38 @@ mlx5_fdir_filter_convert(struct rte_eth_dev *dev, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_fdir_filter_add(struct rte_eth_dev *dev, +flow_fdir_filter_add(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter) { struct priv *priv = dev->data->dev_private; - struct mlx5_fdir attributes = { - .attr.group = 0, - .l2_mask = { - .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", - .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", - .type = 0, - }, - }; - struct rte_flow_error error; + struct mlx5_fdir *fdir_flow; struct rte_flow *flow; int ret; - ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); + fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0); + if (!fdir_flow) { + rte_errno = ENOMEM; + return -rte_errno; + } + ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); if (ret) - return ret; - flow = flow_list_create(dev, &priv->flows, &attributes.attr, - attributes.items, attributes.actions, &error); + goto error; + flow = flow_fdir_filter_lookup(dev, fdir_flow); if (flow) { - DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, - (void *)flow); - return 0; + rte_errno = EEXIST; + goto error; } + flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr, + fdir_flow->items, fdir_flow->actions, NULL); + if (!flow) + goto error; + assert(!flow->fdir); + flow->fdir = fdir_flow; + DRV_LOG(DEBUG, "port %u created FDIR flow %p", + dev->data->port_id, (void *)flow); + return 0; +error: + rte_free(fdir_flow); return -rte_errno; } @@ -2669,12 +2752,28 @@ mlx5_fdir_filter_add(struct rte_eth_dev *dev, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, - const struct rte_eth_fdir_filter *fdir_filter - __rte_unused) +flow_fdir_filter_delete(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *fdir_filter) { - rte_errno = ENOTSUP; - return -rte_errno; + struct priv *priv = dev->data->dev_private; + struct rte_flow *flow; + struct mlx5_fdir fdir_flow = { + .attr.group = 0, + }; + int ret; + + ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); + if (ret) + return -rte_errno; + flow = flow_fdir_filter_lookup(dev, &fdir_flow); + if (!flow) { + rte_errno = ENOENT; + return -rte_errno; + } + flow_list_destroy(dev, &priv->flows, flow); + DRV_LOG(DEBUG, "port %u deleted FDIR flow %p", + dev->data->port_id, (void *)flow); + return 0; } /** @@ -2689,15 +2788,15 @@ mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_fdir_filter_update(struct rte_eth_dev *dev, +flow_fdir_filter_update(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter) { int ret; - ret = mlx5_fdir_filter_delete(dev, fdir_filter); + ret = flow_fdir_filter_delete(dev, fdir_filter); if (ret) return ret; - return mlx5_fdir_filter_add(dev, fdir_filter); + return flow_fdir_filter_add(dev, fdir_filter); } /** @@ -2707,7 +2806,7 @@ mlx5_fdir_filter_update(struct rte_eth_dev *dev, * Pointer to Ethernet device. */ static void -mlx5_fdir_filter_flush(struct rte_eth_dev *dev) +flow_fdir_filter_flush(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; @@ -2723,7 +2822,7 @@ mlx5_fdir_filter_flush(struct rte_eth_dev *dev) * Resulting flow director information. */ static void -mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) +flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) { struct rte_eth_fdir_masks *mask = &dev->data->dev_conf.fdir_conf.mask; @@ -2753,7 +2852,7 @@ mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, +flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { enum rte_fdir_mode fdir_mode = @@ -2770,16 +2869,16 @@ mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, } switch (filter_op) { case RTE_ETH_FILTER_ADD: - return mlx5_fdir_filter_add(dev, arg); + return flow_fdir_filter_add(dev, arg); case RTE_ETH_FILTER_UPDATE: - return mlx5_fdir_filter_update(dev, arg); + return flow_fdir_filter_update(dev, arg); case RTE_ETH_FILTER_DELETE: - return mlx5_fdir_filter_delete(dev, arg); + return flow_fdir_filter_delete(dev, arg); case RTE_ETH_FILTER_FLUSH: - mlx5_fdir_filter_flush(dev); + flow_fdir_filter_flush(dev); break; case RTE_ETH_FILTER_INFO: - mlx5_fdir_info_get(dev, arg); + flow_fdir_info_get(dev, arg); break; default: DRV_LOG(DEBUG, "port %u unknown operation %u", @@ -2820,7 +2919,7 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, *(const void **)arg = &mlx5_flow_ops; return 0; case RTE_ETH_FILTER_FDIR: - return mlx5_fdir_ctrl_func(dev, filter_op, arg); + return flow_fdir_ctrl_func(dev, filter_op, arg); default: DRV_LOG(ERR, "port %u filter type (%d) not supported", dev->data->port_id, filter_type); diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 61299d66..51ab47fe 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -92,10 +92,24 @@ #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19) #define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20) #define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21) +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22) +#define MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23) +#define MLX5_FLOW_ACTION_NVGRE_ENCAP (1u << 24) +#define MLX5_FLOW_ACTION_NVGRE_DECAP (1u << 25) +#define MLX5_FLOW_ACTION_RAW_ENCAP (1u << 26) +#define MLX5_FLOW_ACTION_RAW_DECAP (1u << 27) #define MLX5_FLOW_FATE_ACTIONS \ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS) +#define MLX5_FLOW_ENCAP_ACTIONS (MLX5_FLOW_ACTION_VXLAN_ENCAP | \ + MLX5_FLOW_ACTION_NVGRE_ENCAP | \ + MLX5_FLOW_ACTION_RAW_ENCAP) + +#define MLX5_FLOW_DECAP_ACTIONS (MLX5_FLOW_ACTION_VXLAN_DECAP | \ + MLX5_FLOW_ACTION_NVGRE_DECAP | \ + MLX5_FLOW_ACTION_RAW_DECAP) + #ifndef IPPROTO_MPLS #define IPPROTO_MPLS 137 #endif @@ -156,6 +170,7 @@ struct mlx5_flow_dv_match_params { }; #define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8 +#define MLX5_ENCAP_MAX_LEN 132 /* Matcher structure. */ struct mlx5_flow_dv_matcher { @@ -169,6 +184,19 @@ struct mlx5_flow_dv_matcher { struct mlx5_flow_dv_match_params mask; /**< Matcher mask. */ }; +/* Encap/decap resource structure. */ +struct mlx5_flow_dv_encap_decap_resource { + LIST_ENTRY(mlx5_flow_dv_encap_decap_resource) next; + /* Pointer to next element. */ + rte_atomic32_t refcnt; /**< Reference counter. */ + struct ibv_flow_action *verbs_action; + /**< Verbs encap/decap action object. */ + uint8_t buf[MLX5_ENCAP_MAX_LEN]; + size_t size; + uint8_t reformat_type; + uint8_t ft_type; +}; + /* DV flows structure. */ struct mlx5_flow_dv { uint64_t hash_fields; /**< Fields that participate in the hash. */ @@ -177,6 +205,8 @@ struct mlx5_flow_dv { struct mlx5_flow_dv_matcher *matcher; /**< Cache to matcher. */ struct mlx5_flow_dv_match_params value; /**< Holds the value that the packet is compared to. */ + struct mlx5_flow_dv_encap_decap_resource *encap_decap; + /**< Pointer to encap/decap resource in cache. */ struct ibv_flow *flow; /**< Installed flow. */ #ifdef HAVE_IBV_FLOW_DV_SUPPORT struct mlx5dv_flow_action_attr actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS]; @@ -189,6 +219,15 @@ struct mlx5_flow_dv { struct mlx5_flow_tcf { struct nlmsghdr *nlh; struct tcmsg *tcm; + union { /**< Tunnel encap/decap descriptor. */ + struct flow_tcf_tunnel_hdr *tunnel; + struct flow_tcf_vxlan_decap *vxlan_decap; + struct flow_tcf_vxlan_encap *vxlan_encap; + }; + uint32_t applied:1; /**< Whether rule is currently applied. */ +#ifndef NDEBUG + uint32_t nlsize; /**< Size of NL message buffer for debug check. */ +#endif }; /* Verbs specification header. */ @@ -253,7 +292,9 @@ struct rte_flow { /**< Device flows that are part of the flow. */ uint64_t actions; /**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */ + struct mlx5_fdir *fdir; /**< Pointer to associated FDIR if any. */ }; + typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item items[], @@ -261,8 +302,7 @@ typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev, struct rte_flow_error *error); typedef struct mlx5_flow *(*mlx5_flow_prepare_t) (const struct rte_flow_attr *attr, const struct rte_flow_item items[], - const struct rte_flow_action actions[], uint64_t *item_flags, - uint64_t *action_flags, struct rte_flow_error *error); + const struct rte_flow_action actions[], struct rte_flow_error *error); typedef int (*mlx5_flow_translate_t)(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, const struct rte_flow_attr *attr, @@ -336,7 +376,7 @@ int mlx5_flow_validate_item_gre(const struct rte_flow_item *item, uint8_t target_protocol, struct rte_flow_error *error); int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, - int64_t item_flags, + uint64_t item_flags, struct rte_flow_error *error); int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, uint64_t item_flags, @@ -355,7 +395,7 @@ int mlx5_flow_validate_item_udp(const struct rte_flow_item *item, uint8_t target_protocol, struct rte_flow_error *error); int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, - int64_t item_flags, + uint64_t item_flags, struct rte_flow_error *error); int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, uint64_t item_flags, diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 8f729f44..79096153 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -25,6 +25,7 @@ #include <rte_flow_driver.h> #include <rte_malloc.h> #include <rte_ip.h> +#include <rte_gre.h> #include "mlx5.h" #include "mlx5_defs.h" @@ -96,6 +97,613 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev, } /** + * Validate the L2 encap action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the encap action. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_l2_encap(uint64_t action_flags, + const struct rte_flow_action *action, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (!(action->conf)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "configuration cannot be null"); + if (action_flags & MLX5_FLOW_ACTION_DROP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't drop and encap in same flow"); + if (action_flags & (MLX5_FLOW_ENCAP_ACTIONS | MLX5_FLOW_DECAP_ACTIONS)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can only have a single encap or" + " decap action in a flow"); + if (attr->ingress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "encap action not supported for " + "ingress"); + return 0; +} + +/** + * Validate the L2 decap action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_l2_decap(uint64_t action_flags, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (action_flags & MLX5_FLOW_ACTION_DROP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't drop and decap in same flow"); + if (action_flags & (MLX5_FLOW_ENCAP_ACTIONS | MLX5_FLOW_DECAP_ACTIONS)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can only have a single encap or" + " decap action in a flow"); + if (attr->egress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, + "decap action not supported for " + "egress"); + return 0; +} + +/** + * Validate the raw encap action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the encap action. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_raw_encap(uint64_t action_flags, + const struct rte_flow_action *action, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (!(action->conf)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "configuration cannot be null"); + if (action_flags & MLX5_FLOW_ACTION_DROP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't drop and encap in same flow"); + if (action_flags & MLX5_FLOW_ENCAP_ACTIONS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can only have a single encap" + " action in a flow"); + /* encap without preceding decap is not supported for ingress */ + if (attr->ingress && !(action_flags & MLX5_FLOW_ACTION_RAW_DECAP)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "encap action not supported for " + "ingress"); + return 0; +} + +/** + * Validate the raw decap action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the encap action. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_raw_decap(uint64_t action_flags, + const struct rte_flow_action *action, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (action_flags & MLX5_FLOW_ACTION_DROP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't drop and decap in same flow"); + if (action_flags & MLX5_FLOW_ENCAP_ACTIONS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't have encap action before" + " decap action"); + if (action_flags & MLX5_FLOW_DECAP_ACTIONS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can only have a single decap" + " action in a flow"); + /* decap action is valid on egress only if it is followed by encap */ + if (attr->egress) { + for (; action->type != RTE_FLOW_ACTION_TYPE_END && + action->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP; + action++) { + } + if (action->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, "decap action not supported" + " for egress"); + } + return 0; +} + + +/** + * Find existing encap/decap resource or create and register a new one. + * + * @param dev[in, out] + * Pointer to rte_eth_dev structure. + * @param[in, out] resource + * Pointer to encap/decap resource. + * @parm[in, out] dev_flow + * Pointer to the dev_flow. + * @param[out] error + * pointer to error structure. + * + * @return + * 0 on success otherwise -errno and errno is set. + */ +static int +flow_dv_encap_decap_resource_register + (struct rte_eth_dev *dev, + struct mlx5_flow_dv_encap_decap_resource *resource, + struct mlx5_flow *dev_flow, + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + struct mlx5_flow_dv_encap_decap_resource *cache_resource; + + /* Lookup a matching resource from cache. */ + LIST_FOREACH(cache_resource, &priv->encaps_decaps, next) { + if (resource->reformat_type == cache_resource->reformat_type && + resource->ft_type == cache_resource->ft_type && + resource->size == cache_resource->size && + !memcmp((const void *)resource->buf, + (const void *)cache_resource->buf, + resource->size)) { + DRV_LOG(DEBUG, "encap/decap resource %p: refcnt %d++", + (void *)cache_resource, + rte_atomic32_read(&cache_resource->refcnt)); + rte_atomic32_inc(&cache_resource->refcnt); + dev_flow->dv.encap_decap = cache_resource; + return 0; + } + } + /* Register new encap/decap resource. */ + cache_resource = rte_calloc(__func__, 1, sizeof(*cache_resource), 0); + if (!cache_resource) + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "cannot allocate resource memory"); + *cache_resource = *resource; + cache_resource->verbs_action = + mlx5_glue->dv_create_flow_action_packet_reformat + (priv->ctx, cache_resource->size, + (cache_resource->size ? cache_resource->buf : NULL), + cache_resource->reformat_type, + cache_resource->ft_type); + if (!cache_resource->verbs_action) { + rte_free(cache_resource); + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "cannot create action"); + } + rte_atomic32_init(&cache_resource->refcnt); + rte_atomic32_inc(&cache_resource->refcnt); + LIST_INSERT_HEAD(&priv->encaps_decaps, cache_resource, next); + dev_flow->dv.encap_decap = cache_resource; + DRV_LOG(DEBUG, "new encap/decap resource %p: refcnt %d++", + (void *)cache_resource, + rte_atomic32_read(&cache_resource->refcnt)); + return 0; +} + +/** + * Get the size of specific rte_flow_item_type + * + * @param[in] item_type + * Tested rte_flow_item_type. + * + * @return + * sizeof struct item_type, 0 if void or irrelevant. + */ +static size_t +flow_dv_get_item_len(const enum rte_flow_item_type item_type) +{ + size_t retval; + + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + retval = sizeof(struct rte_flow_item_eth); + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + retval = sizeof(struct rte_flow_item_vlan); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + retval = sizeof(struct rte_flow_item_ipv4); + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + retval = sizeof(struct rte_flow_item_ipv6); + break; + case RTE_FLOW_ITEM_TYPE_UDP: + retval = sizeof(struct rte_flow_item_udp); + break; + case RTE_FLOW_ITEM_TYPE_TCP: + retval = sizeof(struct rte_flow_item_tcp); + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + retval = sizeof(struct rte_flow_item_vxlan); + break; + case RTE_FLOW_ITEM_TYPE_GRE: + retval = sizeof(struct rte_flow_item_gre); + break; + case RTE_FLOW_ITEM_TYPE_NVGRE: + retval = sizeof(struct rte_flow_item_nvgre); + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: + retval = sizeof(struct rte_flow_item_vxlan_gpe); + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + retval = sizeof(struct rte_flow_item_mpls); + break; + case RTE_FLOW_ITEM_TYPE_VOID: /* Fall through. */ + default: + retval = 0; + break; + } + return retval; +} + +#define MLX5_ENCAP_IPV4_VERSION 0x40 +#define MLX5_ENCAP_IPV4_IHL_MIN 0x05 +#define MLX5_ENCAP_IPV4_TTL_DEF 0x40 +#define MLX5_ENCAP_IPV6_VTC_FLOW 0x60000000 +#define MLX5_ENCAP_IPV6_HOP_LIMIT 0xff +#define MLX5_ENCAP_VXLAN_FLAGS 0x08000000 +#define MLX5_ENCAP_VXLAN_GPE_FLAGS 0x04 + +/** + * Convert the encap action data from list of rte_flow_item to raw buffer + * + * @param[in] items + * Pointer to rte_flow_item objects list. + * @param[out] buf + * Pointer to the output buffer. + * @param[out] size + * Pointer to the output buffer size. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_convert_encap_data(const struct rte_flow_item *items, uint8_t *buf, + size_t *size, struct rte_flow_error *error) +{ + struct ether_hdr *eth = NULL; + struct vlan_hdr *vlan = NULL; + struct ipv4_hdr *ipv4 = NULL; + struct ipv6_hdr *ipv6 = NULL; + struct udp_hdr *udp = NULL; + struct vxlan_hdr *vxlan = NULL; + struct vxlan_gpe_hdr *vxlan_gpe = NULL; + struct gre_hdr *gre = NULL; + size_t len; + size_t temp_size = 0; + + if (!items) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "invalid empty data"); + for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + len = flow_dv_get_item_len(items->type); + if (len + temp_size > MLX5_ENCAP_MAX_LEN) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "items total size is too big" + " for encap action"); + rte_memcpy((void *)&buf[temp_size], items->spec, len); + switch (items->type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth = (struct ether_hdr *)&buf[temp_size]; + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + vlan = (struct vlan_hdr *)&buf[temp_size]; + if (!eth) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "eth header not found"); + if (!eth->ether_type) + eth->ether_type = RTE_BE16(ETHER_TYPE_VLAN); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + ipv4 = (struct ipv4_hdr *)&buf[temp_size]; + if (!vlan && !eth) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "neither eth nor vlan" + " header found"); + if (vlan && !vlan->eth_proto) + vlan->eth_proto = RTE_BE16(ETHER_TYPE_IPv4); + else if (eth && !eth->ether_type) + eth->ether_type = RTE_BE16(ETHER_TYPE_IPv4); + if (!ipv4->version_ihl) + ipv4->version_ihl = MLX5_ENCAP_IPV4_VERSION | + MLX5_ENCAP_IPV4_IHL_MIN; + if (!ipv4->time_to_live) + ipv4->time_to_live = MLX5_ENCAP_IPV4_TTL_DEF; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + ipv6 = (struct ipv6_hdr *)&buf[temp_size]; + if (!vlan && !eth) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "neither eth nor vlan" + " header found"); + if (vlan && !vlan->eth_proto) + vlan->eth_proto = RTE_BE16(ETHER_TYPE_IPv6); + else if (eth && !eth->ether_type) + eth->ether_type = RTE_BE16(ETHER_TYPE_IPv6); + if (!ipv6->vtc_flow) + ipv6->vtc_flow = + RTE_BE32(MLX5_ENCAP_IPV6_VTC_FLOW); + if (!ipv6->hop_limits) + ipv6->hop_limits = MLX5_ENCAP_IPV6_HOP_LIMIT; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + udp = (struct udp_hdr *)&buf[temp_size]; + if (!ipv4 && !ipv6) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "ip header not found"); + if (ipv4 && !ipv4->next_proto_id) + ipv4->next_proto_id = IPPROTO_UDP; + else if (ipv6 && !ipv6->proto) + ipv6->proto = IPPROTO_UDP; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + vxlan = (struct vxlan_hdr *)&buf[temp_size]; + if (!udp) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "udp header not found"); + if (!udp->dst_port) + udp->dst_port = RTE_BE16(MLX5_UDP_PORT_VXLAN); + if (!vxlan->vx_flags) + vxlan->vx_flags = + RTE_BE32(MLX5_ENCAP_VXLAN_FLAGS); + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: + vxlan_gpe = (struct vxlan_gpe_hdr *)&buf[temp_size]; + if (!udp) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "udp header not found"); + if (!vxlan_gpe->proto) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "next protocol not found"); + if (!udp->dst_port) + udp->dst_port = + RTE_BE16(MLX5_UDP_PORT_VXLAN_GPE); + if (!vxlan_gpe->vx_flags) + vxlan_gpe->vx_flags = + MLX5_ENCAP_VXLAN_GPE_FLAGS; + break; + case RTE_FLOW_ITEM_TYPE_GRE: + case RTE_FLOW_ITEM_TYPE_NVGRE: + gre = (struct gre_hdr *)&buf[temp_size]; + if (!gre->proto) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "next protocol not found"); + if (!ipv4 && !ipv6) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "ip header not found"); + if (ipv4 && !ipv4->next_proto_id) + ipv4->next_proto_id = IPPROTO_GRE; + else if (ipv6 && !ipv6->proto) + ipv6->proto = IPPROTO_GRE; + break; + case RTE_FLOW_ITEM_TYPE_VOID: + break; + default: + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)items->type, + "unsupported item type"); + break; + } + temp_size += len; + } + *size = temp_size; + return 0; +} + +/** + * Convert L2 encap action to DV specification. + * + * @param[in] dev + * Pointer to rte_eth_dev structure. + * @param[in] action + * Pointer to action structure. + * @param[in, out] dev_flow + * Pointer to the mlx5_flow. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_create_action_l2_encap(struct rte_eth_dev *dev, + const struct rte_flow_action *action, + struct mlx5_flow *dev_flow, + struct rte_flow_error *error) +{ + const struct rte_flow_item *encap_data; + const struct rte_flow_action_raw_encap *raw_encap_data; + struct mlx5_flow_dv_encap_decap_resource res = { + .reformat_type = + MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL, + .ft_type = MLX5DV_FLOW_TABLE_TYPE_NIC_TX, + }; + + if (action->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) { + raw_encap_data = + (const struct rte_flow_action_raw_encap *)action->conf; + res.size = raw_encap_data->size; + memcpy(res.buf, raw_encap_data->data, res.size); + } else { + if (action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) + encap_data = + ((const struct rte_flow_action_vxlan_encap *) + action->conf)->definition; + else + encap_data = + ((const struct rte_flow_action_nvgre_encap *) + action->conf)->definition; + if (flow_dv_convert_encap_data(encap_data, res.buf, + &res.size, error)) + return -rte_errno; + } + if (flow_dv_encap_decap_resource_register(dev, &res, dev_flow, error)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "can't create L2 encap action"); + return 0; +} + +/** + * Convert L2 decap action to DV specification. + * + * @param[in] dev + * Pointer to rte_eth_dev structure. + * @param[in, out] dev_flow + * Pointer to the mlx5_flow. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_create_action_l2_decap(struct rte_eth_dev *dev, + struct mlx5_flow *dev_flow, + struct rte_flow_error *error) +{ + struct mlx5_flow_dv_encap_decap_resource res = { + .size = 0, + .reformat_type = + MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2, + .ft_type = MLX5DV_FLOW_TABLE_TYPE_NIC_RX, + }; + + if (flow_dv_encap_decap_resource_register(dev, &res, dev_flow, error)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "can't create L2 decap action"); + return 0; +} + +/** + * Convert raw decap/encap (L3 tunnel) action to DV specification. + * + * @param[in] dev + * Pointer to rte_eth_dev structure. + * @param[in] action + * Pointer to action structure. + * @param[in, out] dev_flow + * Pointer to the mlx5_flow. + * @param[in] attr + * Pointer to the flow attributes. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_create_action_raw_encap(struct rte_eth_dev *dev, + const struct rte_flow_action *action, + struct mlx5_flow *dev_flow, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + const struct rte_flow_action_raw_encap *encap_data; + struct mlx5_flow_dv_encap_decap_resource res; + + encap_data = (const struct rte_flow_action_raw_encap *)action->conf; + res.size = encap_data->size; + memcpy(res.buf, encap_data->data, res.size); + res.reformat_type = attr->egress ? + MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL : + MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + res.ft_type = attr->egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX : + MLX5DV_FLOW_TABLE_TYPE_NIC_RX; + if (flow_dv_encap_decap_resource_register(dev, &res, dev_flow, error)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "can't create encap action"); + return 0; +} + +/** * Verify the @p attributes will be correctly understood by the NIC and store * them in the @p flow if everything is correct. * @@ -339,6 +947,49 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, action_flags |= MLX5_FLOW_ACTION_COUNT; ++actions_n; break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: + ret = flow_dv_validate_action_l2_encap(action_flags, + actions, attr, + error); + if (ret < 0) + return ret; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP ? + MLX5_FLOW_ACTION_VXLAN_ENCAP : + MLX5_FLOW_ACTION_NVGRE_ENCAP; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: + ret = flow_dv_validate_action_l2_decap(action_flags, + attr, error); + if (ret < 0) + return ret; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_VXLAN_DECAP ? + MLX5_FLOW_ACTION_VXLAN_DECAP : + MLX5_FLOW_ACTION_NVGRE_DECAP; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: + ret = flow_dv_validate_action_raw_encap(action_flags, + actions, attr, + error); + if (ret < 0) + return ret; + action_flags |= MLX5_FLOW_ACTION_RAW_ENCAP; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_RAW_DECAP: + ret = flow_dv_validate_action_raw_decap(action_flags, + actions, attr, + error); + if (ret < 0) + return ret; + action_flags |= MLX5_FLOW_ACTION_RAW_DECAP; + ++actions_n; + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, @@ -363,10 +1014,6 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, * Pointer to the list of items. * @param[in] actions * Pointer to the list of actions. - * @param[out] item_flags - * Pointer to bit mask of all items detected. - * @param[out] action_flags - * Pointer to bit mask of all actions detected. * @param[out] error * Pointer to the error structure. * @@ -378,8 +1025,6 @@ static struct mlx5_flow * flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused, const struct rte_flow_item items[] __rte_unused, const struct rte_flow_action actions[] __rte_unused, - uint64_t *item_flags __rte_unused, - uint64_t *action_flags __rte_unused, struct rte_flow_error *error) { uint32_t size = sizeof(struct mlx5_flow); @@ -951,161 +1596,6 @@ flow_dv_translate_item_meta(void *matcher, void *key, } } -/** - * Update the matcher and the value based the selected item. - * - * @param[in, out] matcher - * Flow matcher. - * @param[in, out] key - * Flow matcher value. - * @param[in] item - * Flow pattern to translate. - * @param[in, out] dev_flow - * Pointer to the mlx5_flow. - * @param[in] inner - * Item is inner pattern. - */ -static void -flow_dv_create_item(void *matcher, void *key, - const struct rte_flow_item *item, - struct mlx5_flow *dev_flow, - int inner) -{ - struct mlx5_flow_dv_matcher *tmatcher = matcher; - - switch (item->type) { - case RTE_FLOW_ITEM_TYPE_ETH: - flow_dv_translate_item_eth(tmatcher->mask.buf, key, item, - inner); - tmatcher->priority = MLX5_PRIORITY_MAP_L2; - break; - case RTE_FLOW_ITEM_TYPE_VLAN: - flow_dv_translate_item_vlan(tmatcher->mask.buf, key, item, - inner); - break; - case RTE_FLOW_ITEM_TYPE_IPV4: - flow_dv_translate_item_ipv4(tmatcher->mask.buf, key, item, - inner); - tmatcher->priority = MLX5_PRIORITY_MAP_L3; - dev_flow->dv.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, inner, - MLX5_IPV4_LAYER_TYPES, - MLX5_IPV4_IBV_RX_HASH); - break; - case RTE_FLOW_ITEM_TYPE_IPV6: - flow_dv_translate_item_ipv6(tmatcher->mask.buf, key, item, - inner); - tmatcher->priority = MLX5_PRIORITY_MAP_L3; - dev_flow->dv.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, inner, - MLX5_IPV6_LAYER_TYPES, - MLX5_IPV6_IBV_RX_HASH); - break; - case RTE_FLOW_ITEM_TYPE_TCP: - flow_dv_translate_item_tcp(tmatcher->mask.buf, key, item, - inner); - tmatcher->priority = MLX5_PRIORITY_MAP_L4; - dev_flow->dv.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, inner, - ETH_RSS_TCP, - (IBV_RX_HASH_SRC_PORT_TCP | - IBV_RX_HASH_DST_PORT_TCP)); - break; - case RTE_FLOW_ITEM_TYPE_UDP: - flow_dv_translate_item_udp(tmatcher->mask.buf, key, item, - inner); - tmatcher->priority = MLX5_PRIORITY_MAP_L4; - dev_flow->verbs.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, inner, - ETH_RSS_UDP, - (IBV_RX_HASH_SRC_PORT_UDP | - IBV_RX_HASH_DST_PORT_UDP)); - break; - case RTE_FLOW_ITEM_TYPE_GRE: - flow_dv_translate_item_gre(tmatcher->mask.buf, key, item, - inner); - break; - case RTE_FLOW_ITEM_TYPE_NVGRE: - flow_dv_translate_item_nvgre(tmatcher->mask.buf, key, item, - inner); - break; - case RTE_FLOW_ITEM_TYPE_VXLAN: - case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: - flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item, - inner); - break; - case RTE_FLOW_ITEM_TYPE_META: - flow_dv_translate_item_meta(tmatcher->mask.buf, key, item); - break; - default: - break; - } -} - -/** - * Store the requested actions in an array. - * - * @param[in] action - * Flow action to translate. - * @param[in, out] dev_flow - * Pointer to the mlx5_flow. - */ -static void -flow_dv_create_action(const struct rte_flow_action *action, - struct mlx5_flow *dev_flow) -{ - const struct rte_flow_action_queue *queue; - const struct rte_flow_action_rss *rss; - int actions_n = dev_flow->dv.actions_n; - struct rte_flow *flow = dev_flow->flow; - - switch (action->type) { - case RTE_FLOW_ACTION_TYPE_VOID: - break; - case RTE_FLOW_ACTION_TYPE_FLAG: - dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG; - dev_flow->dv.actions[actions_n].tag_value = - mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT); - actions_n++; - flow->actions |= MLX5_FLOW_ACTION_FLAG; - break; - case RTE_FLOW_ACTION_TYPE_MARK: - dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG; - dev_flow->dv.actions[actions_n].tag_value = - mlx5_flow_mark_set - (((const struct rte_flow_action_mark *) - (action->conf))->id); - flow->actions |= MLX5_FLOW_ACTION_MARK; - actions_n++; - break; - case RTE_FLOW_ACTION_TYPE_DROP: - dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_DROP; - flow->actions |= MLX5_FLOW_ACTION_DROP; - break; - case RTE_FLOW_ACTION_TYPE_QUEUE: - queue = action->conf; - flow->rss.queue_num = 1; - (*flow->queue)[0] = queue->index; - flow->actions |= MLX5_FLOW_ACTION_QUEUE; - break; - case RTE_FLOW_ACTION_TYPE_RSS: - rss = action->conf; - if (flow->queue) - memcpy((*flow->queue), rss->queue, - rss->queue_num * sizeof(uint16_t)); - flow->rss.queue_num = rss->queue_num; - memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); - flow->rss.types = rss->types; - flow->rss.level = rss->level; - /* Added to array only in apply since we need the QP */ - flow->actions |= MLX5_FLOW_ACTION_RSS; - break; - default: - break; - } - dev_flow->dv.actions_n = actions_n; -} - static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -1203,10 +1693,12 @@ flow_dv_matcher_register(struct rte_eth_dev *dev, dv_attr.flags |= IBV_FLOW_ATTR_FLAGS_EGRESS; cache_matcher->matcher_object = mlx5_glue->dv_create_flow_matcher(priv->ctx, &dv_attr); - if (!cache_matcher->matcher_object) + if (!cache_matcher->matcher_object) { + rte_free(cache_matcher); return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "cannot create matcher"); + } rte_atomic32_inc(&cache_matcher->refcnt); LIST_INSERT_HEAD(&priv->matchers, cache_matcher, next); dev_flow->dv.matcher = cache_matcher; @@ -1217,7 +1709,6 @@ flow_dv_matcher_register(struct rte_eth_dev *dev, return 0; } - /** * Fill the flow with DV spec. * @@ -1242,37 +1733,264 @@ flow_dv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, const struct rte_flow_attr *attr, const struct rte_flow_item items[], - const struct rte_flow_action actions[] __rte_unused, + const struct rte_flow_action actions[], struct rte_flow_error *error) { struct priv *priv = dev->data->dev_private; + struct rte_flow *flow = dev_flow->flow; + uint64_t item_flags = 0; + uint64_t action_flags = 0; uint64_t priority = attr->priority; struct mlx5_flow_dv_matcher matcher = { .mask = { .size = sizeof(matcher.mask.buf), }, }; - void *match_value = dev_flow->dv.value.buf; - int tunnel = 0; + int actions_n = 0; if (priority == MLX5_FLOW_PRIO_RSVD) priority = priv->config.flow_prio - 1; for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { - tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL); - flow_dv_create_item(&matcher, match_value, items, dev_flow, - tunnel); + int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + void *match_mask = matcher.mask.buf; + void *match_value = dev_flow->dv.value.buf; + + switch (items->type) { + case RTE_FLOW_ITEM_TYPE_ETH: + flow_dv_translate_item_eth(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L2; + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + flow_dv_translate_item_vlan(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L2; + item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | + MLX5_FLOW_LAYER_INNER_VLAN) : + (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + flow_dv_translate_item_ipv4(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L3; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, + MLX5_IPV4_LAYER_TYPES, + MLX5_IPV4_IBV_RX_HASH); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + flow_dv_translate_item_ipv6(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L3; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, + MLX5_IPV6_LAYER_TYPES, + MLX5_IPV6_IBV_RX_HASH); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; + break; + case RTE_FLOW_ITEM_TYPE_TCP: + flow_dv_translate_item_tcp(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L4; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, ETH_RSS_TCP, + IBV_RX_HASH_SRC_PORT_TCP | + IBV_RX_HASH_DST_PORT_TCP); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + flow_dv_translate_item_udp(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L4; + dev_flow->verbs.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, ETH_RSS_UDP, + IBV_RX_HASH_SRC_PORT_UDP | + IBV_RX_HASH_DST_PORT_UDP); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; + break; + case RTE_FLOW_ITEM_TYPE_GRE: + flow_dv_translate_item_gre(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_GRE; + break; + case RTE_FLOW_ITEM_TYPE_NVGRE: + flow_dv_translate_item_nvgre(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_GRE; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + flow_dv_translate_item_vxlan(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_VXLAN; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: + flow_dv_translate_item_vxlan(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + break; + case RTE_FLOW_ITEM_TYPE_META: + flow_dv_translate_item_meta(match_mask, match_value, + items); + item_flags |= MLX5_FLOW_ITEM_METADATA; + break; + default: + break; + } } + dev_flow->layers = item_flags; + /* Register matcher. */ matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf, - matcher.mask.size); - if (priority == MLX5_FLOW_PRIO_RSVD) - priority = priv->config.flow_prio - 1; + matcher.mask.size); matcher.priority = mlx5_flow_adjust_priority(dev, priority, matcher.priority); matcher.egress = attr->egress; if (flow_dv_matcher_register(dev, &matcher, dev_flow, error)) return -rte_errno; - for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) - flow_dv_create_action(actions, dev_flow); + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + const struct rte_flow_action_queue *queue; + const struct rte_flow_action_rss *rss; + const struct rte_flow_action *action = actions; + const uint8_t *rss_key; + + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_VOID: + break; + case RTE_FLOW_ACTION_TYPE_FLAG: + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_TAG; + dev_flow->dv.actions[actions_n].tag_value = + mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT); + actions_n++; + action_flags |= MLX5_FLOW_ACTION_FLAG; + break; + case RTE_FLOW_ACTION_TYPE_MARK: + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_TAG; + dev_flow->dv.actions[actions_n].tag_value = + mlx5_flow_mark_set + (((const struct rte_flow_action_mark *) + (actions->conf))->id); + actions_n++; + action_flags |= MLX5_FLOW_ACTION_MARK; + break; + case RTE_FLOW_ACTION_TYPE_DROP: + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_DROP; + action_flags |= MLX5_FLOW_ACTION_DROP; + break; + case RTE_FLOW_ACTION_TYPE_QUEUE: + queue = actions->conf; + flow->rss.queue_num = 1; + (*flow->queue)[0] = queue->index; + action_flags |= MLX5_FLOW_ACTION_QUEUE; + break; + case RTE_FLOW_ACTION_TYPE_RSS: + rss = actions->conf; + if (flow->queue) + memcpy((*flow->queue), rss->queue, + rss->queue_num * sizeof(uint16_t)); + flow->rss.queue_num = rss->queue_num; + /* NULL RSS key indicates default RSS key. */ + rss_key = !rss->key ? rss_hash_default_key : rss->key; + memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN); + /* RSS type 0 indicates default RSS type ETH_RSS_IP. */ + flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types; + flow->rss.level = rss->level; + action_flags |= MLX5_FLOW_ACTION_RSS; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: + if (flow_dv_create_action_l2_encap(dev, actions, + dev_flow, error)) + return -rte_errno; + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + dev_flow->dv.encap_decap->verbs_action; + actions_n++; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP ? + MLX5_FLOW_ACTION_VXLAN_ENCAP : + MLX5_FLOW_ACTION_NVGRE_ENCAP; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: + if (flow_dv_create_action_l2_decap(dev, dev_flow, + error)) + return -rte_errno; + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + dev_flow->dv.encap_decap->verbs_action; + actions_n++; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_VXLAN_DECAP ? + MLX5_FLOW_ACTION_VXLAN_DECAP : + MLX5_FLOW_ACTION_NVGRE_DECAP; + break; + case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: + /* Handle encap with preceding decap. */ + if (action_flags & MLX5_FLOW_ACTION_RAW_DECAP) { + if (flow_dv_create_action_raw_encap + (dev, actions, dev_flow, attr, error)) + return -rte_errno; + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + dev_flow->dv.encap_decap->verbs_action; + } else { + /* Handle encap without preceding decap. */ + if (flow_dv_create_action_l2_encap(dev, actions, + dev_flow, + error)) + return -rte_errno; + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + dev_flow->dv.encap_decap->verbs_action; + } + actions_n++; + action_flags |= MLX5_FLOW_ACTION_RAW_ENCAP; + break; + case RTE_FLOW_ACTION_TYPE_RAW_DECAP: + /* Check if this decap is followed by encap. */ + for (; action->type != RTE_FLOW_ACTION_TYPE_END && + action->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP; + action++) { + } + /* Handle decap only if it isn't followed by encap. */ + if (action->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP) { + if (flow_dv_create_action_l2_decap(dev, + dev_flow, + error)) + return -rte_errno; + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + dev_flow->dv.encap_decap->verbs_action; + actions_n++; + } + /* If decap is followed by encap, handle it at encap. */ + action_flags |= MLX5_FLOW_ACTION_RAW_DECAP; + break; + default: + break; + } + } + dev_flow->dv.actions_n = actions_n; + flow->actions = action_flags; return 0; } @@ -1403,6 +2121,37 @@ flow_dv_matcher_release(struct rte_eth_dev *dev, } /** + * Release an encap/decap resource. + * + * @param flow + * Pointer to mlx5_flow. + * + * @return + * 1 while a reference on it exists, 0 when freed. + */ +static int +flow_dv_encap_decap_resource_release(struct mlx5_flow *flow) +{ + struct mlx5_flow_dv_encap_decap_resource *cache_resource = + flow->dv.encap_decap; + + assert(cache_resource->verbs_action); + DRV_LOG(DEBUG, "encap/decap resource %p: refcnt %d--", + (void *)cache_resource, + rte_atomic32_read(&cache_resource->refcnt)); + if (rte_atomic32_dec_and_test(&cache_resource->refcnt)) { + claim_zero(mlx5_glue->destroy_flow_action + (cache_resource->verbs_action)); + LIST_REMOVE(cache_resource, next); + rte_free(cache_resource); + DRV_LOG(DEBUG, "encap/decap resource %p: removed", + (void *)cache_resource); + return 0; + } + return 1; +} + +/** * Remove the flow from the NIC but keeps it in memory. * * @param[in] dev @@ -1457,6 +2206,8 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) LIST_REMOVE(dev_flow, next); if (dev_flow->dv.matcher) flow_dv_matcher_release(dev, dev_flow); + if (dev_flow->dv.encap_decap) + flow_dv_encap_decap_resource_release(dev_flow); rte_free(dev_flow); } } diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c index 719fb106..fb817b23 100644 --- a/drivers/net/mlx5/mlx5_flow_tcf.c +++ b/drivers/net/mlx5/mlx5_flow_tcf.c @@ -113,6 +113,39 @@ struct tc_pedit_sel { #endif /* HAVE_TC_ACT_VLAN */ +#ifdef HAVE_TC_ACT_TUNNEL_KEY + +#include <linux/tc_act/tc_tunnel_key.h> + +#ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT +#define TCA_TUNNEL_KEY_ENC_DST_PORT 9 +#endif + +#ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM +#define TCA_TUNNEL_KEY_NO_CSUM 10 +#endif + +#else /* HAVE_TC_ACT_TUNNEL_KEY */ + +#define TCA_ACT_TUNNEL_KEY 17 +#define TCA_TUNNEL_KEY_ACT_SET 1 +#define TCA_TUNNEL_KEY_ACT_RELEASE 2 +#define TCA_TUNNEL_KEY_PARMS 2 +#define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3 +#define TCA_TUNNEL_KEY_ENC_IPV4_DST 4 +#define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5 +#define TCA_TUNNEL_KEY_ENC_IPV6_DST 6 +#define TCA_TUNNEL_KEY_ENC_KEY_ID 7 +#define TCA_TUNNEL_KEY_ENC_DST_PORT 9 +#define TCA_TUNNEL_KEY_NO_CSUM 10 + +struct tc_tunnel_key { + tc_gen; + int t_action; +}; + +#endif /* HAVE_TC_ACT_TUNNEL_KEY */ + /* Normally found in linux/netlink.h. */ #ifndef NETLINK_CAP_ACK #define NETLINK_CAP_ACK 10 @@ -211,6 +244,45 @@ struct tc_pedit_sel { #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25 #endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID +#define TCA_FLOWER_KEY_ENC_KEY_ID 26 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC +#define TCA_FLOWER_KEY_ENC_IPV4_SRC 27 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK +#define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST +#define TCA_FLOWER_KEY_ENC_IPV4_DST 29 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK +#define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC +#define TCA_FLOWER_KEY_ENC_IPV6_SRC 31 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK +#define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST +#define TCA_FLOWER_KEY_ENC_IPV6_DST 33 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK +#define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT +#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK +#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT +#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK +#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46 +#endif #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS #define TCA_FLOWER_KEY_TCP_FLAGS 71 #endif @@ -241,6 +313,28 @@ struct tc_pedit_sel { #define TCA_ACT_MAX_PRIO 32 #endif +/** UDP port range of VXLAN devices created by driver. */ +#define MLX5_VXLAN_PORT_MIN 30000 +#define MLX5_VXLAN_PORT_MAX 60000 +#define MLX5_VXLAN_DEVICE_PFX "vmlx_" + +/** Tunnel action type, used for @p type in header structure. */ +enum flow_tcf_tunact_type { + FLOW_TCF_TUNACT_VXLAN_DECAP, + FLOW_TCF_TUNACT_VXLAN_ENCAP, +}; + +/** Flags used for @p mask in tunnel action encap descriptors. */ +#define FLOW_TCF_ENCAP_ETH_SRC (1u << 0) +#define FLOW_TCF_ENCAP_ETH_DST (1u << 1) +#define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2) +#define FLOW_TCF_ENCAP_IPV4_DST (1u << 3) +#define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4) +#define FLOW_TCF_ENCAP_IPV6_DST (1u << 5) +#define FLOW_TCF_ENCAP_UDP_SRC (1u << 6) +#define FLOW_TCF_ENCAP_UDP_DST (1u << 7) +#define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8) + /** * Structure for holding netlink context. * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE. @@ -254,6 +348,100 @@ struct mlx5_flow_tcf_context { uint8_t *buf; /* Message buffer. */ }; +/** + * Neigh rule structure. The neigh rule is applied via Netlink to + * outer tunnel iface in order to provide destination MAC address + * for the VXLAN encapsultion. The neigh rule is implicitly related + * to the Flow itself and can be shared by multiple Flows. + */ +struct tcf_neigh_rule { + LIST_ENTRY(tcf_neigh_rule) next; + uint32_t refcnt; + struct ether_addr eth; + uint16_t mask; + union { + struct { + rte_be32_t dst; + } ipv4; + struct { + uint8_t dst[IPV6_ADDR_LEN]; + } ipv6; + }; +}; + +/** + * Local rule structure. The local rule is applied via Netlink to + * outer tunnel iface in order to provide local and peer IP addresses + * of the VXLAN tunnel for encapsulation. The local rule is implicitly + * related to the Flow itself and can be shared by multiple Flows. + */ +struct tcf_local_rule { + LIST_ENTRY(tcf_local_rule) next; + uint32_t refcnt; + uint16_t mask; + union { + struct { + rte_be32_t dst; + rte_be32_t src; + } ipv4; + struct { + uint8_t dst[IPV6_ADDR_LEN]; + uint8_t src[IPV6_ADDR_LEN]; + } ipv6; + }; +}; + +/** VXLAN virtual netdev. */ +struct tcf_vtep { + LIST_ENTRY(tcf_vtep) next; + LIST_HEAD(, tcf_neigh_rule) neigh; + LIST_HEAD(, tcf_local_rule) local; + uint32_t refcnt; + unsigned int ifindex; /**< Own interface index. */ + unsigned int ifouter; /**< Index of device attached to. */ + uint16_t port; + uint8_t created; +}; + +/** Tunnel descriptor header, common for all tunnel types. */ +struct flow_tcf_tunnel_hdr { + uint32_t type; /**< Tunnel action type. */ + struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */ + unsigned int ifindex_org; /**< Original dst/src interface */ + unsigned int *ifindex_ptr; /**< Interface ptr in message. */ +}; + +struct flow_tcf_vxlan_decap { + struct flow_tcf_tunnel_hdr hdr; + uint16_t udp_port; +}; + +struct flow_tcf_vxlan_encap { + struct flow_tcf_tunnel_hdr hdr; + uint32_t mask; + struct { + struct ether_addr dst; + struct ether_addr src; + } eth; + union { + struct { + rte_be32_t dst; + rte_be32_t src; + } ipv4; + struct { + uint8_t dst[IPV6_ADDR_LEN]; + uint8_t src[IPV6_ADDR_LEN]; + } ipv6; + }; +struct { + rte_be16_t src; + rte_be16_t dst; + } udp; + struct { + uint8_t vni[3]; + } vxlan; +}; + /** Structure used when extracting the values of a flow counters * from a netlink message. */ @@ -271,6 +459,7 @@ static const union { struct rte_flow_item_ipv6 ipv6; struct rte_flow_item_tcp tcp; struct rte_flow_item_udp udp; + struct rte_flow_item_vxlan vxlan; } flow_tcf_mask_empty; /** Supported masks for known item types. */ @@ -282,6 +471,7 @@ static const struct { struct rte_flow_item_ipv6 ipv6; struct rte_flow_item_tcp tcp; struct rte_flow_item_udp udp; + struct rte_flow_item_vxlan vxlan; } flow_tcf_mask_supported = { .port_id = { .id = 0xffffffff, @@ -319,6 +509,9 @@ static const struct { .src_port = RTE_BE16(0xffff), .dst_port = RTE_BE16(0xffff), }, + .vxlan = { + .vni = "\xff\xff\xff", + }, }; #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr)) @@ -337,7 +530,15 @@ struct flow_tcf_ptoi { /* Due to a limitation on driver/FW. */ #define MLX5_TCF_GROUP_ID_MAX 3 -#define MLX5_TCF_GROUP_PRIORITY_MAX 14 + +/* + * Due to a limitation on driver/FW, priority ranges from 1 to 16 in kernel. + * Priority in rte_flow attribute starts from 0 and is added by 1 in + * translation. This is subject to be changed to determine the max priority + * based on trial-and-error like Verbs driver once the restriction is lifted or + * the range is extended. + */ +#define MLX5_TCF_GROUP_PRIORITY_MAX 15 #define MLX5_TCF_FATE_ACTIONS \ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \ @@ -347,6 +548,9 @@ struct flow_tcf_ptoi { (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \ MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP) +#define MLX5_TCF_VXLAN_ACTIONS \ + (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP) + #define MLX5_TCF_PEDIT_ACTIONS \ (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \ MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \ @@ -895,19 +1099,13 @@ flow_tcf_validate_attributes(const struct rte_flow_attr *attr, "group ID larger than " RTE_STR(MLX5_TCF_GROUP_ID_MAX) " isn't supported"); - else if (attr->group > 0 && - attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX) + else if (attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, attr, - "lowest priority level is " + "priority more than " RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX) - " when group is configured"); - else if (attr->priority > 0xfffe) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, - attr, - "lowest priority level is 0xfffe"); + " is not supported"); if (!attr->ingress) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, @@ -920,6 +1118,665 @@ flow_tcf_validate_attributes(const struct rte_flow_attr *attr, } /** + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch. + * The routine checks the L2 fields to be used in encapsulation header. + * + * @param[in] item + * Pointer to the item structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + **/ +static int +flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + const struct rte_flow_item_eth *spec = item->spec; + const struct rte_flow_item_eth *mask = item->mask; + + if (!spec) { + /* + * Specification for L2 addresses can be empty + * because these ones are optional and not + * required directly by tc rule. Kernel tries + * to resolve these ones on its own + */ + return 0; + } + if (!mask) { + /* If mask is not specified use the default one. */ + mask = &rte_flow_item_eth_mask; + } + if (memcmp(&mask->dst, + &flow_tcf_mask_empty.eth.dst, + sizeof(flow_tcf_mask_empty.eth.dst))) { + if (memcmp(&mask->dst, + &rte_flow_item_eth_mask.dst, + sizeof(rte_flow_item_eth_mask.dst))) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"eth.dst\" field"); + } + if (memcmp(&mask->src, + &flow_tcf_mask_empty.eth.src, + sizeof(flow_tcf_mask_empty.eth.src))) { + if (memcmp(&mask->src, + &rte_flow_item_eth_mask.src, + sizeof(rte_flow_item_eth_mask.src))) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"eth.src\" field"); + } + if (mask->type != RTE_BE16(0x0000)) { + if (mask->type != RTE_BE16(0xffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"eth.type\" field"); + DRV_LOG(WARNING, + "outer ethernet type field" + " cannot be forced for vxlan" + " encapsulation, parameter ignored"); + } + return 0; +} + +/** + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch. + * The routine checks the IPv4 fields to be used in encapsulation header. + * + * @param[in] item + * Pointer to the item structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + **/ +static int +flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *mask = item->mask; + + if (!spec) { + /* + * Specification for IP addresses cannot be empty + * because it is required by tunnel_key parameter. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "NULL outer ipv4 address" + " specification for vxlan" + " encapsulation"); + } + if (!mask) + mask = &rte_flow_item_ipv4_mask; + if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) { + if (mask->hdr.dst_addr != RTE_BE32(0xffffffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv4.hdr.dst_addr\" field" + " for vxlan encapsulation"); + /* More IPv4 address validations can be put here. */ + } else { + /* + * Kernel uses the destination IP address to determine + * the routing path and obtain the MAC destination + * address, so IP destination address must be + * specified in the tc rule. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer ipv4 destination address" + " must be specified for" + " vxlan encapsulation"); + } + if (mask->hdr.src_addr != RTE_BE32(0x00000000)) { + if (mask->hdr.src_addr != RTE_BE32(0xffffffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv4.hdr.src_addr\" field" + " for vxlan encapsulation"); + /* More IPv4 address validations can be put here. */ + } else { + /* + * Kernel uses the source IP address to select the + * interface for egress encapsulated traffic, so + * it must be specified in the tc rule. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer ipv4 source address" + " must be specified for" + " vxlan encapsulation"); + } + return 0; +} + +/** + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch. + * The routine checks the IPv6 fields to be used in encapsulation header. + * + * @param[in] item + * Pointer to the item structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + const struct rte_flow_item_ipv6 *spec = item->spec; + const struct rte_flow_item_ipv6 *mask = item->mask; + + if (!spec) { + /* + * Specification for IP addresses cannot be empty + * because it is required by tunnel_key parameter. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "NULL outer ipv6 address" + " specification for" + " vxlan encapsulation"); + } + if (!mask) + mask = &rte_flow_item_ipv6_mask; + if (memcmp(&mask->hdr.dst_addr, + &flow_tcf_mask_empty.ipv6.hdr.dst_addr, + IPV6_ADDR_LEN)) { + if (memcmp(&mask->hdr.dst_addr, + &rte_flow_item_ipv6_mask.hdr.dst_addr, + IPV6_ADDR_LEN)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv6.hdr.dst_addr\" field" + " for vxlan encapsulation"); + /* More IPv6 address validations can be put here. */ + } else { + /* + * Kernel uses the destination IP address to determine + * the routing path and obtain the MAC destination + * address (heigh or gate), so IP destination address + * must be specified within the tc rule. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer ipv6 destination address" + " must be specified for" + " vxlan encapsulation"); + } + if (memcmp(&mask->hdr.src_addr, + &flow_tcf_mask_empty.ipv6.hdr.src_addr, + IPV6_ADDR_LEN)) { + if (memcmp(&mask->hdr.src_addr, + &rte_flow_item_ipv6_mask.hdr.src_addr, + IPV6_ADDR_LEN)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv6.hdr.src_addr\" field" + " for vxlan encapsulation"); + /* More L3 address validation can be put here. */ + } else { + /* + * Kernel uses the source IP address to select the + * interface for egress encapsulated traffic, so + * it must be specified in the tc rule. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer L3 source address" + " must be specified for" + " vxlan encapsulation"); + } + return 0; +} + +/** + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch. + * The routine checks the UDP fields to be used in encapsulation header. + * + * @param[in] item + * Pointer to the item structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; + + if (!spec) { + /* + * Specification for UDP ports cannot be empty + * because it is required by tunnel_key parameter. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "NULL UDP port specification " + " for vxlan encapsulation"); + } + if (!mask) + mask = &rte_flow_item_udp_mask; + if (mask->hdr.dst_port != RTE_BE16(0x0000)) { + if (mask->hdr.dst_port != RTE_BE16(0xffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"udp.hdr.dst_port\" field" + " for vxlan encapsulation"); + if (!spec->hdr.dst_port) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer UDP remote port cannot be" + " 0 for vxlan encapsulation"); + } else { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer UDP remote port" + " must be specified for" + " vxlan encapsulation"); + } + if (mask->hdr.src_port != RTE_BE16(0x0000)) { + if (mask->hdr.src_port != RTE_BE16(0xffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"udp.hdr.src_port\" field" + " for vxlan encapsulation"); + DRV_LOG(WARNING, + "outer UDP source port cannot be" + " forced for vxlan encapsulation," + " parameter ignored"); + } + return 0; +} + +/** + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch. + * The routine checks the VNIP fields to be used in encapsulation header. + * + * @param[in] item + * Pointer to the item structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + const struct rte_flow_item_vxlan *spec = item->spec; + const struct rte_flow_item_vxlan *mask = item->mask; + + if (!spec) { + /* Outer VNI is required by tunnel_key parameter. */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "NULL VNI specification" + " for vxlan encapsulation"); + } + if (!mask) + mask = &rte_flow_item_vxlan_mask; + if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2]) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "outer VNI must be specified " + "for vxlan encapsulation"); + if (mask->vni[0] != 0xff || + mask->vni[1] != 0xff || + mask->vni[2] != 0xff) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"vxlan.vni\" field"); + + if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2]) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "vxlan vni cannot be 0"); + return 0; +} + +/** + * Validate VXLAN_ENCAP action item list for E-Switch. + * The routine checks items to be used in encapsulation header. + * + * @param[in] action + * Pointer to the VXLAN_ENCAP action structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action, + struct rte_flow_error *error) +{ + const struct rte_flow_item *items; + int ret; + uint32_t item_flags = 0; + + if (!action->conf) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "Missing vxlan tunnel" + " action configuration"); + items = ((const struct rte_flow_action_vxlan_encap *) + action->conf)->definition; + if (!items) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "Missing vxlan tunnel" + " encapsulation parameters"); + for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + switch (items->type) { + case RTE_FLOW_ITEM_TYPE_VOID: + break; + case RTE_FLOW_ITEM_TYPE_ETH: + ret = mlx5_flow_validate_item_eth(items, item_flags, + error); + if (ret < 0) + return ret; + ret = flow_tcf_validate_vxlan_encap_eth(items, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_LAYER_OUTER_L2; + break; + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + ret = mlx5_flow_validate_item_ipv4(items, item_flags, + error); + if (ret < 0) + return ret; + ret = flow_tcf_validate_vxlan_encap_ipv4(items, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + ret = mlx5_flow_validate_item_ipv6(items, item_flags, + error); + if (ret < 0) + return ret; + ret = flow_tcf_validate_vxlan_encap_ipv6(items, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + ret = mlx5_flow_validate_item_udp(items, item_flags, + 0xFF, error); + if (ret < 0) + return ret; + ret = flow_tcf_validate_vxlan_encap_udp(items, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + ret = mlx5_flow_validate_item_vxlan(items, + item_flags, error); + if (ret < 0) + return ret; + ret = flow_tcf_validate_vxlan_encap_vni(items, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_LAYER_VXLAN; + break; + default: + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, items, + "vxlan encap item not supported"); + } + } + if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "no outer IP layer found" + " for vxlan encapsulation"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "no outer UDP layer found" + " for vxlan encapsulation"); + if (!(item_flags & MLX5_FLOW_LAYER_VXLAN)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "no VXLAN VNI found" + " for vxlan encapsulation"); + return 0; +} + +/** + * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action + * is present in actions list. + * + * @param[in] ipv4 + * Outer IPv4 address item (if any, NULL otherwise). + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4, + struct rte_flow_error *error) +{ + const struct rte_flow_item_ipv4 *spec = ipv4->spec; + const struct rte_flow_item_ipv4 *mask = ipv4->mask; + + if (!spec) { + /* + * Specification for IP addresses cannot be empty + * because it is required as decap parameter. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, ipv4, + "NULL outer ipv4 address" + " specification for vxlan" + " for vxlan decapsulation"); + } + if (!mask) + mask = &rte_flow_item_ipv4_mask; + if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) { + if (mask->hdr.dst_addr != RTE_BE32(0xffffffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv4.hdr.dst_addr\" field"); + /* More IP address validations can be put here. */ + } else { + /* + * Kernel uses the destination IP address + * to determine the ingress network interface + * for traffic being decapsulated. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, ipv4, + "outer ipv4 destination address" + " must be specified for" + " vxlan decapsulation"); + } + /* Source IP address is optional for decap. */ + if (mask->hdr.src_addr != RTE_BE32(0x00000000) && + mask->hdr.src_addr != RTE_BE32(0xffffffff)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv4.hdr.src_addr\" field"); + return 0; +} + +/** + * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action + * is present in actions list. + * + * @param[in] ipv6 + * Outer IPv6 address item (if any, NULL otherwise). + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6, + struct rte_flow_error *error) +{ + const struct rte_flow_item_ipv6 *spec = ipv6->spec; + const struct rte_flow_item_ipv6 *mask = ipv6->mask; + + if (!spec) { + /* + * Specification for IP addresses cannot be empty + * because it is required as decap parameter. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, ipv6, + "NULL outer ipv6 address" + " specification for vxlan" + " decapsulation"); + } + if (!mask) + mask = &rte_flow_item_ipv6_mask; + if (memcmp(&mask->hdr.dst_addr, + &flow_tcf_mask_empty.ipv6.hdr.dst_addr, + IPV6_ADDR_LEN)) { + if (memcmp(&mask->hdr.dst_addr, + &rte_flow_item_ipv6_mask.hdr.dst_addr, + IPV6_ADDR_LEN)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv6.hdr.dst_addr\" field"); + /* More IP address validations can be put here. */ + } else { + /* + * Kernel uses the destination IP address + * to determine the ingress network interface + * for traffic being decapsulated. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, ipv6, + "outer ipv6 destination address must be " + "specified for vxlan decapsulation"); + } + /* Source IP address is optional for decap. */ + if (memcmp(&mask->hdr.src_addr, + &flow_tcf_mask_empty.ipv6.hdr.src_addr, + IPV6_ADDR_LEN)) { + if (memcmp(&mask->hdr.src_addr, + &rte_flow_item_ipv6_mask.hdr.src_addr, + IPV6_ADDR_LEN)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"ipv6.hdr.src_addr\" field"); + } + return 0; +} + +/** + * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action + * is present in actions list. + * + * @param[in] udp + * Outer UDP layer item (if any, NULL otherwise). + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_ernno is set. + **/ +static int +flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp, + struct rte_flow_error *error) +{ + const struct rte_flow_item_udp *spec = udp->spec; + const struct rte_flow_item_udp *mask = udp->mask; + + if (!spec) + /* + * Specification for UDP ports cannot be empty + * because it is required as decap parameter. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, udp, + "NULL UDP port specification" + " for VXLAN decapsulation"); + if (!mask) + mask = &rte_flow_item_udp_mask; + if (mask->hdr.dst_port != RTE_BE16(0x0000)) { + if (mask->hdr.dst_port != RTE_BE16(0xffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"udp.hdr.dst_port\" field"); + if (!spec->hdr.dst_port) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, udp, + "zero decap local UDP port"); + } else { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, udp, + "outer UDP destination port must be " + "specified for vxlan decapsulation"); + } + if (mask->hdr.src_port != RTE_BE16(0x0000)) { + if (mask->hdr.src_port != RTE_BE16(0xffff)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "no support for partial mask on" + " \"udp.hdr.src_port\" field"); + DRV_LOG(WARNING, + "outer UDP local port cannot be " + "forced for VXLAN encapsulation, " + "parameter ignored"); + } + return 0; +} + +/** * Validate flow for E-Switch. * * @param[in] priv @@ -951,6 +1808,7 @@ flow_tcf_validate(struct rte_eth_dev *dev, const struct rte_flow_item_ipv6 *ipv6; const struct rte_flow_item_tcp *tcp; const struct rte_flow_item_udp *udp; + const struct rte_flow_item_vxlan *vxlan; } spec, mask; union { const struct rte_flow_action_port_id *port_id; @@ -960,6 +1818,7 @@ flow_tcf_validate(struct rte_eth_dev *dev, of_set_vlan_vid; const struct rte_flow_action_of_set_vlan_pcp * of_set_vlan_pcp; + const struct rte_flow_action_vxlan_encap *vxlan_encap; const struct rte_flow_action_set_ipv4 *set_ipv4; const struct rte_flow_action_set_ipv6 *set_ipv6; } conf; @@ -978,9 +1837,170 @@ flow_tcf_validate(struct rte_eth_dev *dev, ret = flow_tcf_validate_attributes(attr, error); if (ret < 0) return ret; + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + unsigned int i; + uint64_t current_action_flag = 0; + + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_VOID: + break; + case RTE_FLOW_ACTION_TYPE_PORT_ID: + current_action_flag = MLX5_FLOW_ACTION_PORT_ID; + if (!actions->conf) + break; + conf.port_id = actions->conf; + if (conf.port_id->original) + i = 0; + else + for (i = 0; ptoi[i].ifindex; ++i) + if (ptoi[i].port_id == conf.port_id->id) + break; + if (!ptoi[i].ifindex) + return rte_flow_error_set + (error, ENODEV, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + conf.port_id, + "missing data to convert port ID to" + " ifindex"); + port_id_dev = &rte_eth_devices[conf.port_id->id]; + break; + case RTE_FLOW_ACTION_TYPE_JUMP: + current_action_flag = MLX5_FLOW_ACTION_JUMP; + if (!actions->conf) + break; + conf.jump = actions->conf; + if (attr->group >= conf.jump->group) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "can jump only to a group forward"); + break; + case RTE_FLOW_ACTION_TYPE_DROP: + current_action_flag = MLX5_FLOW_ACTION_DROP; + break; + case RTE_FLOW_ACTION_TYPE_COUNT: + break; + case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: + current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN; + break; + case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: + current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN; + break; + case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: + if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, actions, + "vlan modify is not supported," + " set action must follow push action"); + current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID; + break; + case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: + if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN)) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, actions, + "vlan modify is not supported," + " set action must follow push action"); + current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + ret = flow_tcf_validate_vxlan_encap(actions, error); + if (ret < 0) + return ret; + current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP; + break; + case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: + current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC; + break; + case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: + current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST; + break; + case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: + current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC; + break; + case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: + current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST; + break; + case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: + current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC; + break; + case RTE_FLOW_ACTION_TYPE_SET_TP_DST: + current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST; + break; + case RTE_FLOW_ACTION_TYPE_SET_TTL: + current_action_flag = MLX5_FLOW_ACTION_SET_TTL; + break; + case RTE_FLOW_ACTION_TYPE_DEC_TTL: + current_action_flag = MLX5_FLOW_ACTION_DEC_TTL; + break; + case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC: + current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC; + break; + case RTE_FLOW_ACTION_TYPE_SET_MAC_DST: + current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST; + break; + default: + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "action not supported"); + } + if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) { + if (!actions->conf) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + actions, + "action configuration not set"); + } + if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) && + pedit_validated) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "set actions should be " + "listed successively"); + if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) && + (action_flags & MLX5_TCF_PEDIT_ACTIONS)) + pedit_validated = 1; + if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) && + (action_flags & MLX5_TCF_FATE_ACTIONS)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "can't have multiple fate" + " actions"); + if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) && + (action_flags & MLX5_TCF_VXLAN_ACTIONS)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "can't have multiple vxlan" + " actions"); + if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) && + (action_flags & MLX5_TCF_VLAN_ACTIONS)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "can't have vxlan and vlan" + " actions in the same rule"); + action_flags |= current_action_flag; + } for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { unsigned int i; + if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && + items->type != RTE_FLOW_ITEM_TYPE_ETH) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "only L2 inner item" + " is supported"); switch (items->type) { case RTE_FLOW_ITEM_TYPE_VOID: break; @@ -1034,7 +2054,9 @@ flow_tcf_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_OUTER_L2; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; /* TODO: * Redundant check due to different supported mask. * Same for the rest of items. @@ -1112,6 +2134,12 @@ flow_tcf_validate(struct rte_eth_dev *dev, next_protocol = ((const struct rte_flow_item_ipv4 *) (items->spec))->hdr.next_proto_id; + if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { + ret = flow_tcf_validate_vxlan_decap_ipv4 + (items, error); + if (ret < 0) + return ret; + } break; case RTE_FLOW_ITEM_TYPE_IPV6: ret = mlx5_flow_validate_item_ipv6(items, item_flags, @@ -1139,6 +2167,12 @@ flow_tcf_validate(struct rte_eth_dev *dev, next_protocol = ((const struct rte_flow_item_ipv6 *) (items->spec))->hdr.proto; + if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { + ret = flow_tcf_validate_vxlan_decap_ipv6 + (items, error); + if (ret < 0) + return ret; + } break; case RTE_FLOW_ITEM_TYPE_UDP: ret = mlx5_flow_validate_item_udp(items, item_flags, @@ -1154,6 +2188,12 @@ flow_tcf_validate(struct rte_eth_dev *dev, error); if (!mask.udp) return -rte_errno; + if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { + ret = flow_tcf_validate_vxlan_decap_udp + (items, error); + if (ret < 0) + return ret; + } break; case RTE_FLOW_ITEM_TYPE_TCP: ret = mlx5_flow_validate_item_tcp @@ -1173,141 +2213,41 @@ flow_tcf_validate(struct rte_eth_dev *dev, if (!mask.tcp) return -rte_errno; break; - default: - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM, - NULL, "item not supported"); - } - } - for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { - unsigned int i; - uint64_t current_action_flag = 0; - - switch (actions->type) { - case RTE_FLOW_ACTION_TYPE_VOID: - break; - case RTE_FLOW_ACTION_TYPE_PORT_ID: - current_action_flag = MLX5_FLOW_ACTION_PORT_ID; - if (!actions->conf) - break; - conf.port_id = actions->conf; - if (conf.port_id->original) - i = 0; - else - for (i = 0; ptoi[i].ifindex; ++i) - if (ptoi[i].port_id == conf.port_id->id) - break; - if (!ptoi[i].ifindex) - return rte_flow_error_set - (error, ENODEV, - RTE_FLOW_ERROR_TYPE_ACTION_CONF, - conf.port_id, - "missing data to convert port ID to" - " ifindex"); - port_id_dev = &rte_eth_devices[conf.port_id->id]; - break; - case RTE_FLOW_ACTION_TYPE_JUMP: - current_action_flag = MLX5_FLOW_ACTION_JUMP; - if (!actions->conf) - break; - conf.jump = actions->conf; - if (attr->group >= conf.jump->group) + case RTE_FLOW_ITEM_TYPE_VXLAN: + if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)) return rte_flow_error_set (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "can jump only to a group forward"); - break; - case RTE_FLOW_ACTION_TYPE_DROP: - current_action_flag = MLX5_FLOW_ACTION_DROP; - break; - case RTE_FLOW_ACTION_TYPE_COUNT: - break; - case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: - current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN; - break; - case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: - current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN; - break; - case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: - if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN)) - return rte_flow_error_set - (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, actions, - "vlan modify is not supported," - " set action must follow push action"); - current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID; - break; - case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: - if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN)) + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "vni pattern should be followed by" + " vxlan decapsulation action"); + ret = mlx5_flow_validate_item_vxlan(items, + item_flags, error); + if (ret < 0) + return ret; + item_flags |= MLX5_FLOW_LAYER_VXLAN; + mask.vxlan = flow_tcf_item_mask + (items, &rte_flow_item_vxlan_mask, + &flow_tcf_mask_supported.vxlan, + &flow_tcf_mask_empty.vxlan, + sizeof(flow_tcf_mask_supported.vxlan), error); + if (!mask.vxlan) + return -rte_errno; + if (mask.vxlan->vni[0] != 0xff || + mask.vxlan->vni[1] != 0xff || + mask.vxlan->vni[2] != 0xff) return rte_flow_error_set (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, actions, - "vlan modify is not supported," - " set action must follow push action"); - current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP; - break; - case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: - current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC; - break; - case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: - current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST; - break; - case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: - current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC; - break; - case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: - current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST; - break; - case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: - current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC; - break; - case RTE_FLOW_ACTION_TYPE_SET_TP_DST: - current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST; - break; - case RTE_FLOW_ACTION_TYPE_SET_TTL: - current_action_flag = MLX5_FLOW_ACTION_SET_TTL; - break; - case RTE_FLOW_ACTION_TYPE_DEC_TTL: - current_action_flag = MLX5_FLOW_ACTION_DEC_TTL; - break; - case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC: - current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC; - break; - case RTE_FLOW_ACTION_TYPE_SET_MAC_DST: - current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST; + RTE_FLOW_ERROR_TYPE_ITEM_MASK, + mask.vxlan, + "no support for partial or " + "empty mask on \"vxlan.vni\" field"); break; default: return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "action not supported"); - } - if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) { - if (!actions->conf) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION_CONF, - actions, - "action configuration not set"); + RTE_FLOW_ERROR_TYPE_ITEM, + items, "item not supported"); } - if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) && - pedit_validated) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "set actions should be " - "listed successively"); - if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) && - (action_flags & MLX5_TCF_PEDIT_ACTIONS)) - pedit_validated = 1; - if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) && - (action_flags & MLX5_TCF_FATE_ACTIONS)) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "can't have multiple fate" - " actions"); - action_flags |= current_action_flag; } if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) && (action_flags & MLX5_FLOW_ACTION_DROP)) @@ -1375,6 +2315,12 @@ flow_tcf_validate(struct rte_eth_dev *dev, RTE_FLOW_ERROR_TYPE_ACTION, actions, "vlan actions are supported" " only with port_id action"); + if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) && + !(action_flags & MLX5_FLOW_ACTION_PORT_ID)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "vxlan actions are supported" + " only with port_id action"); if (!(action_flags & MLX5_TCF_FATE_ACTIONS)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, actions, @@ -1398,28 +2344,47 @@ flow_tcf_validate(struct rte_eth_dev *dev, "no ethernet found in" " pattern"); } + if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { + if (!(item_flags & + (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | + MLX5_FLOW_LAYER_OUTER_L3_IPV6))) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "no outer IP pattern found" + " for vxlan decap action"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "no outer UDP pattern found" + " for vxlan decap action"); + if (!(item_flags & MLX5_FLOW_LAYER_VXLAN)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "no VNI pattern found" + " for vxlan decap action"); + } return 0; } /** - * Calculate maximum size of memory for flow items of Linux TC flower and - * extract specified items. + * Calculate maximum size of memory for flow items of Linux TC flower. * + * @param[in] attr + * Pointer to the flow attributes. * @param[in] items * Pointer to the list of items. - * @param[out] item_flags - * Pointer to the detected items. * * @return * Maximum size of memory for items. */ static int -flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, - const struct rte_flow_item items[], - uint64_t *item_flags) +flow_tcf_get_items_size(const struct rte_flow_attr *attr, + const struct rte_flow_item items[]) { int size = 0; - uint64_t flags = 0; size += SZ_NLATTR_STRZ_OF("flower") + SZ_NLATTR_NEST + /* TCA_OPTIONS. */ @@ -1436,7 +2401,6 @@ flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4; /* dst/src MAC addr and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ @@ -1444,33 +2408,31 @@ flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, /* VLAN Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */ SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */ - flags |= MLX5_FLOW_LAYER_OUTER_VLAN; break; case RTE_FLOW_ITEM_TYPE_IPV4: size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint32_t) * 4; /* dst/src IP addr and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; break; case RTE_FLOW_ITEM_TYPE_IPV6: size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ - SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4; + SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4; /* dst/src IP addr and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; break; case RTE_FLOW_ITEM_TYPE_UDP: size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint16_t) * 4; /* dst/src port and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_TCP: size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint16_t) * 4; /* dst/src port and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + size += SZ_NLATTR_TYPE_OF(uint32_t); break; default: DRV_LOG(WARNING, @@ -1480,7 +2442,69 @@ flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, break; } } - *item_flags = flags; + return size; +} + +/** + * Calculate size of memory to store the VXLAN encapsultion + * related items in the Netlink message buffer. Items list + * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action. + * The item list should be validated. + * + * @param[in] action + * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object. + * List of pattern items to scan data from. + * + * @return + * The size the part of Netlink message buffer to store the + * VXLAN encapsulation item attributes. + */ +static int +flow_tcf_vxlan_encap_size(const struct rte_flow_action *action) +{ + const struct rte_flow_item *items; + int size = 0; + + assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP); + assert(action->conf); + + items = ((const struct rte_flow_action_vxlan_encap *) + action->conf)->definition; + assert(items); + for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + switch (items->type) { + case RTE_FLOW_ITEM_TYPE_VOID: + break; + case RTE_FLOW_ITEM_TYPE_ETH: + /* This item does not require message buffer. */ + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2; + break; + case RTE_FLOW_ITEM_TYPE_UDP: { + const struct rte_flow_item_udp *udp = items->mask; + + size += SZ_NLATTR_TYPE_OF(uint16_t); + if (!udp || udp->hdr.src_port != RTE_BE16(0x0000)) + size += SZ_NLATTR_TYPE_OF(uint16_t); + break; + } + case RTE_FLOW_ITEM_TYPE_VXLAN: + size += SZ_NLATTR_TYPE_OF(uint32_t); + break; + default: + assert(false); + DRV_LOG(WARNING, + "unsupported item %p type %d," + " items must be validated" + " before flow creation", + (const void *)items, items->type); + return 0; + } + } return size; } @@ -1553,6 +2577,29 @@ action_of_vlan: SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */ SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */ break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + size += SZ_NLATTR_NEST + /* na_act_index. */ + SZ_NLATTR_STRZ_OF("tunnel_key") + + SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */ + SZ_NLATTR_TYPE_OF(uint8_t); + size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key); + size += flow_tcf_vxlan_encap_size(actions) + + RTE_ALIGN_CEIL /* preceding encap params. */ + (sizeof(struct flow_tcf_vxlan_encap), + MNL_ALIGNTO); + flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + size += SZ_NLATTR_NEST + /* na_act_index. */ + SZ_NLATTR_STRZ_OF("tunnel_key") + + SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */ + SZ_NLATTR_TYPE_OF(uint8_t); + size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key); + size += RTE_ALIGN_CEIL /* preceding decap params. */ + (sizeof(struct flow_tcf_vxlan_decap), + MNL_ALIGNTO); + flags |= MLX5_FLOW_ACTION_VXLAN_DECAP; + break; case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: @@ -1610,10 +2657,6 @@ flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle) * Pointer to the list of items. * @param[in] actions * Pointer to the list of actions. - * @param[out] item_flags - * Pointer to bit mask of all items detected. - * @param[out] action_flags - * Pointer to bit mask of all actions detected. * @param[out] error * Pointer to the error structure. * @@ -1625,18 +2668,21 @@ static struct mlx5_flow * flow_tcf_prepare(const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], - uint64_t *item_flags, uint64_t *action_flags, struct rte_flow_error *error) { - size_t size = sizeof(struct mlx5_flow) + + size_t size = RTE_ALIGN_CEIL + (sizeof(struct mlx5_flow), + alignof(struct flow_tcf_tunnel_hdr)) + MNL_ALIGN(sizeof(struct nlmsghdr)) + MNL_ALIGN(sizeof(struct tcmsg)); struct mlx5_flow *dev_flow; + uint64_t action_flags = 0; struct nlmsghdr *nlh; struct tcmsg *tcm; + uint8_t *sp, *tun = NULL; - size += flow_tcf_get_items_and_size(attr, items, item_flags); - size += flow_tcf_get_actions_and_size(actions, action_flags); + size += flow_tcf_get_items_size(attr, items); + size += flow_tcf_get_actions_and_size(actions, &action_flags); dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO); if (!dev_flow) { rte_flow_error_set(error, ENOMEM, @@ -1644,14 +2690,52 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, "not enough memory to create E-Switch flow"); return NULL; } - nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1)); + sp = (uint8_t *)(dev_flow + 1); + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) { + sp = RTE_PTR_ALIGN + (sp, alignof(struct flow_tcf_tunnel_hdr)); + tun = sp; + sp += RTE_ALIGN_CEIL + (sizeof(struct flow_tcf_vxlan_encap), + MNL_ALIGNTO); +#ifndef NDEBUG + size -= RTE_ALIGN_CEIL + (sizeof(struct flow_tcf_vxlan_encap), + MNL_ALIGNTO); +#endif + } else if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { + sp = RTE_PTR_ALIGN + (sp, alignof(struct flow_tcf_tunnel_hdr)); + tun = sp; + sp += RTE_ALIGN_CEIL + (sizeof(struct flow_tcf_vxlan_decap), + MNL_ALIGNTO); +#ifndef NDEBUG + size -= RTE_ALIGN_CEIL + (sizeof(struct flow_tcf_vxlan_decap), + MNL_ALIGNTO); +#endif + } else { + sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO); + } + nlh = mnl_nlmsg_put_header(sp); tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm)); *dev_flow = (struct mlx5_flow){ .tcf = (struct mlx5_flow_tcf){ +#ifndef NDEBUG + .nlsize = size - RTE_ALIGN_CEIL + (sizeof(struct mlx5_flow), + alignof(struct flow_tcf_tunnel_hdr)), +#endif + .tunnel = (struct flow_tcf_tunnel_hdr *)tun, .nlh = nlh, .tcm = tcm, }, }; + if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) + dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP; + else if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) + dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP; /* * Generate a reasonably unique handle based on the address of the * target buffer. @@ -1702,6 +2786,241 @@ flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused, } /** + * Convert VXLAN VNI to 32-bit integer. + * + * @param[in] vni + * VXLAN VNI in 24-bit wire format. + * + * @return + * VXLAN VNI as a 32-bit integer value in network endian. + */ +static inline rte_be32_t +vxlan_vni_as_be32(const uint8_t vni[3]) +{ + union { + uint8_t vni[4]; + rte_be32_t dword; + } ret = { + .vni = { 0, vni[0], vni[1], vni[2] }, + }; + return ret.dword; +} + +/** + * Helper function to process RTE_FLOW_ITEM_TYPE_ETH entry in configuration + * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the MAC address fields + * in the encapsulation parameters structure. The item must be prevalidated, + * no any validation checks performed by function. + * + * @param[in] spec + * RTE_FLOW_ITEM_TYPE_ETH entry specification. + * @param[in] mask + * RTE_FLOW_ITEM_TYPE_ETH entry mask. + * @param[out] encap + * Structure to fill the gathered MAC address data. + */ +static void +flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth *spec, + const struct rte_flow_item_eth *mask, + struct flow_tcf_vxlan_encap *encap) +{ + /* Item must be validated before. No redundant checks. */ + assert(spec); + if (!mask || !memcmp(&mask->dst, + &rte_flow_item_eth_mask.dst, + sizeof(rte_flow_item_eth_mask.dst))) { + /* + * Ethernet addresses are not supported by + * tc as tunnel_key parameters. Destination + * address is needed to form encap packet + * header and retrieved by kernel from + * implicit sources (ARP table, etc), + * address masks are not supported at all. + */ + encap->eth.dst = spec->dst; + encap->mask |= FLOW_TCF_ENCAP_ETH_DST; + } + if (!mask || !memcmp(&mask->src, + &rte_flow_item_eth_mask.src, + sizeof(rte_flow_item_eth_mask.src))) { + /* + * Ethernet addresses are not supported by + * tc as tunnel_key parameters. Source ethernet + * address is ignored anyway. + */ + encap->eth.src = spec->src; + encap->mask |= FLOW_TCF_ENCAP_ETH_SRC; + } +} + +/** + * Helper function to process RTE_FLOW_ITEM_TYPE_IPV4 entry in configuration + * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV4 address fields + * in the encapsulation parameters structure. The item must be prevalidated, + * no any validation checks performed by function. + * + * @param[in] spec + * RTE_FLOW_ITEM_TYPE_IPV4 entry specification. + * @param[out] encap + * Structure to fill the gathered IPV4 address data. + */ +static void +flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec, + struct flow_tcf_vxlan_encap *encap) +{ + /* Item must be validated before. No redundant checks. */ + assert(spec); + encap->ipv4.dst = spec->hdr.dst_addr; + encap->ipv4.src = spec->hdr.src_addr; + encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC | + FLOW_TCF_ENCAP_IPV4_DST; +} + +/** + * Helper function to process RTE_FLOW_ITEM_TYPE_IPV6 entry in configuration + * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV6 address fields + * in the encapsulation parameters structure. The item must be prevalidated, + * no any validation checks performed by function. + * + * @param[in] spec + * RTE_FLOW_ITEM_TYPE_IPV6 entry specification. + * @param[out] encap + * Structure to fill the gathered IPV6 address data. + */ +static void +flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec, + struct flow_tcf_vxlan_encap *encap) +{ + /* Item must be validated before. No redundant checks. */ + assert(spec); + memcpy(encap->ipv6.dst, spec->hdr.dst_addr, IPV6_ADDR_LEN); + memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN); + encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC | + FLOW_TCF_ENCAP_IPV6_DST; +} + +/** + * Helper function to process RTE_FLOW_ITEM_TYPE_UDP entry in configuration + * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the UDP port fields + * in the encapsulation parameters structure. The item must be prevalidated, + * no any validation checks performed by function. + * + * @param[in] spec + * RTE_FLOW_ITEM_TYPE_UDP entry specification. + * @param[in] mask + * RTE_FLOW_ITEM_TYPE_UDP entry mask. + * @param[out] encap + * Structure to fill the gathered UDP port data. + */ +static void +flow_tcf_parse_vxlan_encap_udp(const struct rte_flow_item_udp *spec, + const struct rte_flow_item_udp *mask, + struct flow_tcf_vxlan_encap *encap) +{ + assert(spec); + encap->udp.dst = spec->hdr.dst_port; + encap->mask |= FLOW_TCF_ENCAP_UDP_DST; + if (!mask || mask->hdr.src_port != RTE_BE16(0x0000)) { + encap->udp.src = spec->hdr.src_port; + encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC; + } +} + +/** + * Helper function to process RTE_FLOW_ITEM_TYPE_VXLAN entry in configuration + * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the VNI fields + * in the encapsulation parameters structure. The item must be prevalidated, + * no any validation checks performed by function. + * + * @param[in] spec + * RTE_FLOW_ITEM_TYPE_VXLAN entry specification. + * @param[out] encap + * Structure to fill the gathered VNI address data. + */ +static void +flow_tcf_parse_vxlan_encap_vni(const struct rte_flow_item_vxlan *spec, + struct flow_tcf_vxlan_encap *encap) +{ + /* Item must be validated before. Do not redundant checks. */ + assert(spec); + memcpy(encap->vxlan.vni, spec->vni, sizeof(encap->vxlan.vni)); + encap->mask |= FLOW_TCF_ENCAP_VXLAN_VNI; +} + +/** + * Populate consolidated encapsulation object from list of pattern items. + * + * Helper function to process configuration of action such as + * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. The item list should be + * validated, there is no way to return an meaningful error. + * + * @param[in] action + * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object. + * List of pattern items to gather data from. + * @param[out] src + * Structure to fill gathered data. + */ +static void +flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action, + struct flow_tcf_vxlan_encap *encap) +{ + union { + const struct rte_flow_item_eth *eth; + const struct rte_flow_item_ipv4 *ipv4; + const struct rte_flow_item_ipv6 *ipv6; + const struct rte_flow_item_udp *udp; + const struct rte_flow_item_vxlan *vxlan; + } spec, mask; + const struct rte_flow_item *items; + + assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP); + assert(action->conf); + + items = ((const struct rte_flow_action_vxlan_encap *) + action->conf)->definition; + assert(items); + for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + switch (items->type) { + case RTE_FLOW_ITEM_TYPE_VOID: + break; + case RTE_FLOW_ITEM_TYPE_ETH: + mask.eth = items->mask; + spec.eth = items->spec; + flow_tcf_parse_vxlan_encap_eth(spec.eth, mask.eth, + encap); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + spec.ipv4 = items->spec; + flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap); + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + spec.ipv6 = items->spec; + flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap); + break; + case RTE_FLOW_ITEM_TYPE_UDP: + mask.udp = items->mask; + spec.udp = items->spec; + flow_tcf_parse_vxlan_encap_udp(spec.udp, mask.udp, + encap); + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + spec.vxlan = items->spec; + flow_tcf_parse_vxlan_encap_vni(spec.vxlan, encap); + break; + default: + assert(false); + DRV_LOG(WARNING, + "unsupported item %p type %d," + " items must be validated" + " before flow creation", + (const void *)items, items->type); + encap->mask = 0; + return; + } + } +} + +/** * Translate flow for Linux TC flower and construct Netlink message. * * @param[in] priv @@ -1735,6 +3054,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, const struct rte_flow_item_ipv6 *ipv6; const struct rte_flow_item_tcp *tcp; const struct rte_flow_item_udp *udp; + const struct rte_flow_item_vxlan *vxlan; } spec, mask; union { const struct rte_flow_action_port_id *port_id; @@ -1745,6 +3065,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, const struct rte_flow_action_of_set_vlan_pcp * of_set_vlan_pcp; } conf; + union { + struct flow_tcf_tunnel_hdr *hdr; + struct flow_tcf_vxlan_decap *vxlan; + } decap = { + .hdr = NULL, + }; + union { + struct flow_tcf_tunnel_hdr *hdr; + struct flow_tcf_vxlan_encap *vxlan; + } encap = { + .hdr = NULL, + }; struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)]; struct nlmsghdr *nlh = dev_flow->tcf.nlh; struct tcmsg *tcm = dev_flow->tcf.tcm; @@ -1762,6 +3094,20 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi, PTOI_TABLE_SZ_MAX(dev))); + if (dev_flow->tcf.tunnel) { + switch (dev_flow->tcf.tunnel->type) { + case FLOW_TCF_TUNACT_VXLAN_DECAP: + decap.vxlan = dev_flow->tcf.vxlan_decap; + break; + case FLOW_TCF_TUNACT_VXLAN_ENCAP: + encap.vxlan = dev_flow->tcf.vxlan_encap; + break; + /* New tunnel actions can be added here. */ + default: + assert(false); + break; + } + } nlh = dev_flow->tcf.nlh; tcm = dev_flow->tcf.tcm; /* Prepare API must have been called beforehand. */ @@ -1779,7 +3125,6 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group); mnl_attr_put_strz(nlh, TCA_KIND, "flower"); na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS); - mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW); for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { unsigned int i; @@ -1807,7 +3152,9 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, tcm->tcm_ifindex = ptoi[i].ifindex; break; case RTE_FLOW_ITEM_TYPE_ETH: - item_flags |= MLX5_FLOW_LAYER_OUTER_L2; + item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ? + MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; mask.eth = flow_tcf_item_mask (items, &rte_flow_item_eth_mask, &flow_tcf_mask_supported.eth, @@ -1818,6 +3165,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, if (mask.eth == &flow_tcf_mask_empty.eth) break; spec.eth = items->spec; + if (decap.vxlan && + !(item_flags & MLX5_FLOW_LAYER_VXLAN)) { + DRV_LOG(WARNING, + "outer L2 addresses cannot be forced" + " for vxlan decapsulation, parameter" + " ignored"); + break; + } if (mask.eth->type) { mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE, spec.eth->type); @@ -1839,8 +3194,11 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, ETHER_ADDR_LEN, mask.eth->src.addr_bytes); } + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_VLAN: + assert(!encap.hdr); + assert(!decap.hdr); item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN; mask.vlan = flow_tcf_item_mask (items, &rte_flow_item_vlan_mask, @@ -1872,6 +3230,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, rte_be_to_cpu_16 (spec.vlan->tci & RTE_BE16(0x0fff))); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_IPV4: item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; @@ -1882,36 +3241,53 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, sizeof(flow_tcf_mask_supported.ipv4), error); assert(mask.ipv4); - if (!eth_type_set || !vlan_eth_type_set) - mnl_attr_put_u16(nlh, + spec.ipv4 = items->spec; + if (!decap.vxlan) { + if (!eth_type_set && !vlan_eth_type_set) + mnl_attr_put_u16 + (nlh, vlan_present ? TCA_FLOWER_KEY_VLAN_ETH_TYPE : TCA_FLOWER_KEY_ETH_TYPE, RTE_BE16(ETH_P_IP)); - eth_type_set = 1; - vlan_eth_type_set = 1; - if (mask.ipv4 == &flow_tcf_mask_empty.ipv4) - break; - spec.ipv4 = items->spec; - if (mask.ipv4->hdr.next_proto_id) { - mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, - spec.ipv4->hdr.next_proto_id); - ip_proto_set = 1; + eth_type_set = 1; + vlan_eth_type_set = 1; + if (mask.ipv4 == &flow_tcf_mask_empty.ipv4) + break; + if (mask.ipv4->hdr.next_proto_id) { + mnl_attr_put_u8 + (nlh, TCA_FLOWER_KEY_IP_PROTO, + spec.ipv4->hdr.next_proto_id); + ip_proto_set = 1; + } + } else { + assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4); } if (mask.ipv4->hdr.src_addr) { - mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC, - spec.ipv4->hdr.src_addr); - mnl_attr_put_u32(nlh, - TCA_FLOWER_KEY_IPV4_SRC_MASK, - mask.ipv4->hdr.src_addr); + mnl_attr_put_u32 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV4_SRC : + TCA_FLOWER_KEY_IPV4_SRC, + spec.ipv4->hdr.src_addr); + mnl_attr_put_u32 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK : + TCA_FLOWER_KEY_IPV4_SRC_MASK, + mask.ipv4->hdr.src_addr); } if (mask.ipv4->hdr.dst_addr) { - mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST, - spec.ipv4->hdr.dst_addr); - mnl_attr_put_u32(nlh, - TCA_FLOWER_KEY_IPV4_DST_MASK, - mask.ipv4->hdr.dst_addr); + mnl_attr_put_u32 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV4_DST : + TCA_FLOWER_KEY_IPV4_DST, + spec.ipv4->hdr.dst_addr); + mnl_attr_put_u32 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK : + TCA_FLOWER_KEY_IPV4_DST_MASK, + mask.ipv4->hdr.dst_addr); } + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_IPV6: item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; @@ -1922,38 +3298,54 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, sizeof(flow_tcf_mask_supported.ipv6), error); assert(mask.ipv6); - if (!eth_type_set || !vlan_eth_type_set) - mnl_attr_put_u16(nlh, + spec.ipv6 = items->spec; + if (!decap.vxlan) { + if (!eth_type_set || !vlan_eth_type_set) { + mnl_attr_put_u16 + (nlh, vlan_present ? TCA_FLOWER_KEY_VLAN_ETH_TYPE : TCA_FLOWER_KEY_ETH_TYPE, RTE_BE16(ETH_P_IPV6)); - eth_type_set = 1; - vlan_eth_type_set = 1; - if (mask.ipv6 == &flow_tcf_mask_empty.ipv6) - break; - spec.ipv6 = items->spec; - if (mask.ipv6->hdr.proto) { - mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, - spec.ipv6->hdr.proto); - ip_proto_set = 1; + } + eth_type_set = 1; + vlan_eth_type_set = 1; + if (mask.ipv6 == &flow_tcf_mask_empty.ipv6) + break; + if (mask.ipv6->hdr.proto) { + mnl_attr_put_u8 + (nlh, TCA_FLOWER_KEY_IP_PROTO, + spec.ipv6->hdr.proto); + ip_proto_set = 1; + } + } else { + assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6); } if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) { - mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC, - sizeof(spec.ipv6->hdr.src_addr), + mnl_attr_put(nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV6_SRC : + TCA_FLOWER_KEY_IPV6_SRC, + IPV6_ADDR_LEN, spec.ipv6->hdr.src_addr); - mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK, - sizeof(mask.ipv6->hdr.src_addr), + mnl_attr_put(nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK : + TCA_FLOWER_KEY_IPV6_SRC_MASK, + IPV6_ADDR_LEN, mask.ipv6->hdr.src_addr); } if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) { - mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST, - sizeof(spec.ipv6->hdr.dst_addr), + mnl_attr_put(nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV6_DST : + TCA_FLOWER_KEY_IPV6_DST, + IPV6_ADDR_LEN, spec.ipv6->hdr.dst_addr); - mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK, - sizeof(mask.ipv6->hdr.dst_addr), + mnl_attr_put(nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK : + TCA_FLOWER_KEY_IPV6_DST_MASK, + IPV6_ADDR_LEN, mask.ipv6->hdr.dst_addr); } + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_UDP: item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; @@ -1964,26 +3356,45 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, sizeof(flow_tcf_mask_supported.udp), error); assert(mask.udp); - if (!ip_proto_set) - mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, - IPPROTO_UDP); - if (mask.udp == &flow_tcf_mask_empty.udp) - break; spec.udp = items->spec; + if (!decap.vxlan) { + if (!ip_proto_set) + mnl_attr_put_u8 + (nlh, TCA_FLOWER_KEY_IP_PROTO, + IPPROTO_UDP); + if (mask.udp == &flow_tcf_mask_empty.udp) + break; + } else { + assert(mask.udp != &flow_tcf_mask_empty.udp); + decap.vxlan->udp_port = + rte_be_to_cpu_16 + (spec.udp->hdr.dst_port); + } if (mask.udp->hdr.src_port) { - mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC, - spec.udp->hdr.src_port); - mnl_attr_put_u16(nlh, - TCA_FLOWER_KEY_UDP_SRC_MASK, - mask.udp->hdr.src_port); + mnl_attr_put_u16 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT : + TCA_FLOWER_KEY_UDP_SRC, + spec.udp->hdr.src_port); + mnl_attr_put_u16 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK : + TCA_FLOWER_KEY_UDP_SRC_MASK, + mask.udp->hdr.src_port); } if (mask.udp->hdr.dst_port) { - mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST, - spec.udp->hdr.dst_port); - mnl_attr_put_u16(nlh, - TCA_FLOWER_KEY_UDP_DST_MASK, - mask.udp->hdr.dst_port); + mnl_attr_put_u16 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_UDP_DST_PORT : + TCA_FLOWER_KEY_UDP_DST, + spec.udp->hdr.dst_port); + mnl_attr_put_u16 + (nlh, decap.vxlan ? + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK : + TCA_FLOWER_KEY_UDP_DST_MASK, + mask.udp->hdr.dst_port); } + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_TCP: item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP; @@ -2026,6 +3437,16 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, rte_cpu_to_be_16 (mask.tcp->hdr.tcp_flags)); } + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + assert(decap.vxlan); + item_flags |= MLX5_FLOW_LAYER_VXLAN; + spec.vxlan = items->spec; + mnl_attr_put_u32(nlh, + TCA_FLOWER_KEY_ENC_KEY_ID, + vxlan_vni_as_be32(spec.vxlan->vni)); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; default: return rte_flow_error_set(error, ENOTSUP, @@ -2059,6 +3480,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred"); na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS); assert(na_act); + if (encap.hdr) { + assert(dev_flow->tcf.tunnel); + dev_flow->tcf.tunnel->ifindex_ptr = + &((struct tc_mirred *) + mnl_attr_get_payload + (mnl_nlmsg_get_payload_tail + (nlh)))->ifindex; + } mnl_attr_put(nlh, TCA_MIRRED_PARMS, sizeof(struct tc_mirred), &(struct tc_mirred){ @@ -2176,6 +3605,74 @@ override_na_vlan_priority: conf.of_set_vlan_pcp->vlan_pcp; } break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + assert(decap.vxlan); + assert(dev_flow->tcf.tunnel); + dev_flow->tcf.tunnel->ifindex_ptr = + (unsigned int *)&tcm->tcm_ifindex; + na_act_index = + mnl_attr_nest_start(nlh, na_act_index_cur++); + assert(na_act_index); + mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key"); + na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS); + assert(na_act); + mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS, + sizeof(struct tc_tunnel_key), + &(struct tc_tunnel_key){ + .action = TC_ACT_PIPE, + .t_action = TCA_TUNNEL_KEY_ACT_RELEASE, + }); + mnl_attr_nest_end(nlh, na_act); + mnl_attr_nest_end(nlh, na_act_index); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + assert(encap.vxlan); + flow_tcf_vxlan_encap_parse(actions, encap.vxlan); + na_act_index = + mnl_attr_nest_start(nlh, na_act_index_cur++); + assert(na_act_index); + mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key"); + na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS); + assert(na_act); + mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS, + sizeof(struct tc_tunnel_key), + &(struct tc_tunnel_key){ + .action = TC_ACT_PIPE, + .t_action = TCA_TUNNEL_KEY_ACT_SET, + }); + if (encap.vxlan->mask & FLOW_TCF_ENCAP_UDP_DST) + mnl_attr_put_u16(nlh, + TCA_TUNNEL_KEY_ENC_DST_PORT, + encap.vxlan->udp.dst); + if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_SRC) + mnl_attr_put_u32(nlh, + TCA_TUNNEL_KEY_ENC_IPV4_SRC, + encap.vxlan->ipv4.src); + if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_DST) + mnl_attr_put_u32(nlh, + TCA_TUNNEL_KEY_ENC_IPV4_DST, + encap.vxlan->ipv4.dst); + if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_SRC) + mnl_attr_put(nlh, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, + sizeof(encap.vxlan->ipv6.src), + &encap.vxlan->ipv6.src); + if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_DST) + mnl_attr_put(nlh, + TCA_TUNNEL_KEY_ENC_IPV6_DST, + sizeof(encap.vxlan->ipv6.dst), + &encap.vxlan->ipv6.dst); + if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI) + mnl_attr_put_u32(nlh, + TCA_TUNNEL_KEY_ENC_KEY_ID, + vxlan_vni_as_be32 + (encap.vxlan->vxlan.vni)); + mnl_attr_put_u8(nlh, TCA_TUNNEL_KEY_NO_CSUM, 0); + mnl_attr_nest_end(nlh, na_act); + mnl_attr_nest_end(nlh, na_act_index); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: @@ -2202,47 +3699,1381 @@ override_na_vlan_priority: assert(na_flower); assert(na_flower_act); mnl_attr_nest_end(nlh, na_flower_act); + mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ? + 0 : TCA_CLS_FLAGS_SKIP_SW); mnl_attr_nest_end(nlh, na_flower); + if (dev_flow->tcf.tunnel && dev_flow->tcf.tunnel->ifindex_ptr) + dev_flow->tcf.tunnel->ifindex_org = + *dev_flow->tcf.tunnel->ifindex_ptr; + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); return 0; } /** * Send Netlink message with acknowledgment. * - * @param ctx + * @param tcf * Flow context to use. * @param nlh * Message to send. This function always raises the NLM_F_ACK flag before * sending. + * @param[in] msglen + * Message length. Message buffer may contain multiple commands and + * nlmsg_len field not always corresponds to actual message length. + * If 0 specified the nlmsg_len field in header is used as message length. + * @param[in] cb + * Callback handler for received message. + * @param[in] arg + * Context pointer for callback handler. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh) +flow_tcf_nl_ack(struct mlx5_flow_tcf_context *tcf, + struct nlmsghdr *nlh, + uint32_t msglen, + mnl_cb_t cb, void *arg) { - alignas(struct nlmsghdr) - uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) + - nlh->nlmsg_len - sizeof(*nlh)]; - uint32_t seq = ctx->seq++; - struct mnl_socket *nl = ctx->nl; - int ret; + unsigned int portid = mnl_socket_get_portid(tcf->nl); + uint32_t seq = tcf->seq++; + int err, ret; - nlh->nlmsg_flags |= NLM_F_ACK; + assert(tcf->nl); + assert(tcf->buf); + if (!seq) + /* seq 0 is reserved for kernel event-driven notifications. */ + seq = tcf->seq++; nlh->nlmsg_seq = seq; - ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len); - if (ret != -1) - ret = mnl_socket_recvfrom(nl, ans, sizeof(ans)); - if (ret != -1) - ret = mnl_cb_run - (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL); + if (!msglen) { + msglen = nlh->nlmsg_len; + nlh->nlmsg_flags |= NLM_F_ACK; + } + ret = mnl_socket_sendto(tcf->nl, nlh, msglen); + err = (ret <= 0) ? errno : 0; + nlh = (struct nlmsghdr *)(tcf->buf); + /* + * The following loop postpones non-fatal errors until multipart + * messages are complete. + */ if (ret > 0) + while (true) { + ret = mnl_socket_recvfrom(tcf->nl, tcf->buf, + tcf->buf_size); + if (ret < 0) { + err = errno; + if (err != ENOSPC) + break; + } + if (!err) { + ret = mnl_cb_run(nlh, ret, seq, portid, + cb, arg); + if (ret < 0) { + err = errno; + break; + } + } + /* Will receive till end of multipart message */ + if (!(nlh->nlmsg_flags & NLM_F_MULTI) || + nlh->nlmsg_type == NLMSG_DONE) + break; + } + if (!err) return 0; - rte_errno = errno; - return -rte_errno; + rte_errno = err; + return -err; +} + +#define MNL_BUF_EXTRA_SPACE 16 +#define MNL_REQUEST_SIZE_MIN 256 +#define MNL_REQUEST_SIZE_MAX 2048 +#define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \ + MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX) + +/* Data structures used by flow_tcf_xxx_cb() routines. */ +struct tcf_nlcb_buf { + LIST_ENTRY(tcf_nlcb_buf) next; + uint32_t size; + alignas(struct nlmsghdr) + uint8_t msg[]; /**< Netlink message data. */ +}; + +struct tcf_nlcb_context { + unsigned int ifindex; /**< Base interface index. */ + uint32_t bufsize; + LIST_HEAD(, tcf_nlcb_buf) nlbuf; +}; + +/** + * Allocate space for netlink command in buffer list + * + * @param[in, out] ctx + * Pointer to callback context with command buffers list. + * @param[in] size + * Required size of data buffer to be allocated. + * + * @return + * Pointer to allocated memory, aligned as message header. + * NULL if some error occurred. + */ +static struct nlmsghdr * +flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size) +{ + struct tcf_nlcb_buf *buf; + struct nlmsghdr *nlh; + + size = NLMSG_ALIGN(size); + buf = LIST_FIRST(&ctx->nlbuf); + if (buf && (buf->size + size) <= ctx->bufsize) { + nlh = (struct nlmsghdr *)&buf->msg[buf->size]; + buf->size += size; + return nlh; + } + if (size > ctx->bufsize) { + DRV_LOG(WARNING, "netlink: too long command buffer requested"); + return NULL; + } + buf = rte_malloc(__func__, + ctx->bufsize + sizeof(struct tcf_nlcb_buf), + alignof(struct tcf_nlcb_buf)); + if (!buf) { + DRV_LOG(WARNING, "netlink: no memory for command buffer"); + return NULL; + } + LIST_INSERT_HEAD(&ctx->nlbuf, buf, next); + buf->size = size; + nlh = (struct nlmsghdr *)&buf->msg[0]; + return nlh; +} + +/** + * Set NLM_F_ACK flags in the last netlink command in buffer. + * Only last command in the buffer will be acked by system. + * + * @param[in, out] buf + * Pointer to buffer with netlink commands. + */ +static void +flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf) +{ + struct nlmsghdr *nlh; + uint32_t size = 0; + + assert(buf->size); + do { + nlh = (struct nlmsghdr *)&buf->msg[size]; + size += NLMSG_ALIGN(nlh->nlmsg_len); + if (size >= buf->size) { + nlh->nlmsg_flags |= NLM_F_ACK; + break; + } + } while (true); +} + +/** + * Send the buffers with prepared netlink commands. Scans the list and + * sends all found buffers. Buffers are sent and freed anyway in order + * to prevent memory leakage if some every message in received packet. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in, out] ctx + * Pointer to callback context with command buffers list. + * + * @return + * Zero value on success, negative errno value otherwise + * and rte_errno is set. + */ +static int +flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf, + struct tcf_nlcb_context *ctx) +{ + struct tcf_nlcb_buf *bc, *bn; + struct nlmsghdr *nlh; + int ret = 0; + + bc = LIST_FIRST(&ctx->nlbuf); + while (bc) { + int rc; + + bn = LIST_NEXT(bc, next); + if (bc->size) { + flow_tcf_setack_nlcmd(bc); + nlh = (struct nlmsghdr *)&bc->msg; + rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL); + if (rc && !ret) + ret = rc; + } + rte_free(bc); + bc = bn; + } + LIST_INIT(&ctx->nlbuf); + return ret; +} + +/** + * Collect local IP address rules with scope link attribute on specified + * network device. This is callback routine called by libmnl mnl_cb_run() + * in loop for every message in received packet. + * + * @param[in] nlh + * Pointer to reply header. + * @param[in, out] arg + * Opaque data pointer for this callback. + * + * @return + * A positive, nonzero value on success, negative errno value otherwise + * and rte_errno is set. + */ +static int +flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) +{ + struct tcf_nlcb_context *ctx = arg; + struct nlmsghdr *cmd; + struct ifaddrmsg *ifa; + struct nlattr *na; + struct nlattr *na_local = NULL; + struct nlattr *na_peer = NULL; + unsigned char family; + + if (nlh->nlmsg_type != RTM_NEWADDR) { + rte_errno = EINVAL; + return -rte_errno; + } + ifa = mnl_nlmsg_get_payload(nlh); + family = ifa->ifa_family; + if (ifa->ifa_index != ctx->ifindex || + ifa->ifa_scope != RT_SCOPE_LINK || + !(ifa->ifa_flags & IFA_F_PERMANENT) || + (family != AF_INET && family != AF_INET6)) + return 1; + mnl_attr_for_each(na, nlh, sizeof(*ifa)) { + switch (mnl_attr_get_type(na)) { + case IFA_LOCAL: + na_local = na; + break; + case IFA_ADDRESS: + na_peer = na; + break; + } + if (na_local && na_peer) + break; + } + if (!na_local || !na_peer) + return 1; + /* Local rule found with scope link, permanent and assigned peer. */ + cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ifaddrmsg)) + + (family == AF_INET6 + ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) + : 2 * SZ_NLATTR_TYPE_OF(uint32_t))); + if (!cmd) { + rte_errno = ENOMEM; + return -rte_errno; + } + cmd = mnl_nlmsg_put_header(cmd); + cmd->nlmsg_type = RTM_DELADDR; + cmd->nlmsg_flags = NLM_F_REQUEST; + ifa = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifa)); + ifa->ifa_flags = IFA_F_PERMANENT; + ifa->ifa_scope = RT_SCOPE_LINK; + ifa->ifa_index = ctx->ifindex; + if (family == AF_INET) { + ifa->ifa_family = AF_INET; + ifa->ifa_prefixlen = 32; + mnl_attr_put_u32(cmd, IFA_LOCAL, mnl_attr_get_u32(na_local)); + mnl_attr_put_u32(cmd, IFA_ADDRESS, mnl_attr_get_u32(na_peer)); + } else { + ifa->ifa_family = AF_INET6; + ifa->ifa_prefixlen = 128; + mnl_attr_put(cmd, IFA_LOCAL, IPV6_ADDR_LEN, + mnl_attr_get_payload(na_local)); + mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN, + mnl_attr_get_payload(na_peer)); + } + return 1; +} + +/** + * Cleanup the local IP addresses on outer interface. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] ifindex + * Network inferface index to perform cleanup. + */ +static void +flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf, + unsigned int ifindex) +{ + struct nlmsghdr *nlh; + struct ifaddrmsg *ifa; + struct tcf_nlcb_context ctx = { + .ifindex = ifindex, + .bufsize = MNL_REQUEST_SIZE, + .nlbuf = LIST_HEAD_INITIALIZER(), + }; + int ret; + + assert(ifindex); + /* + * Seek and destroy leftovers of local IP addresses with + * matching properties "scope link". + */ + nlh = mnl_nlmsg_put_header(tcf->buf); + nlh->nlmsg_type = RTM_GETADDR; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa)); + ifa->ifa_family = AF_UNSPEC; + ifa->ifa_index = ifindex; + ifa->ifa_scope = RT_SCOPE_LINK; + ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx); + if (ret) + DRV_LOG(WARNING, "netlink: query device list error %d", ret); + ret = flow_tcf_send_nlcmd(tcf, &ctx); + if (ret) + DRV_LOG(WARNING, "netlink: device delete error %d", ret); +} + +/** + * Collect neigh permament rules on specified network device. + * This is callback routine called by libmnl mnl_cb_run() in loop for + * every message in received packet. + * + * @param[in] nlh + * Pointer to reply header. + * @param[in, out] arg + * Opaque data pointer for this callback. + * + * @return + * A positive, nonzero value on success, negative errno value otherwise + * and rte_errno is set. + */ +static int +flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) +{ + struct tcf_nlcb_context *ctx = arg; + struct nlmsghdr *cmd; + struct ndmsg *ndm; + struct nlattr *na; + struct nlattr *na_ip = NULL; + struct nlattr *na_mac = NULL; + unsigned char family; + + if (nlh->nlmsg_type != RTM_NEWNEIGH) { + rte_errno = EINVAL; + return -rte_errno; + } + ndm = mnl_nlmsg_get_payload(nlh); + family = ndm->ndm_family; + if (ndm->ndm_ifindex != (int)ctx->ifindex || + !(ndm->ndm_state & NUD_PERMANENT) || + (family != AF_INET && family != AF_INET6)) + return 1; + mnl_attr_for_each(na, nlh, sizeof(*ndm)) { + switch (mnl_attr_get_type(na)) { + case NDA_DST: + na_ip = na; + break; + case NDA_LLADDR: + na_mac = na; + break; + } + if (na_mac && na_ip) + break; + } + if (!na_mac || !na_ip) + return 1; + /* Neigh rule with permenent attribute found. */ + cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ndmsg)) + + SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) + + (family == AF_INET6 + ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) + : SZ_NLATTR_TYPE_OF(uint32_t))); + if (!cmd) { + rte_errno = ENOMEM; + return -rte_errno; + } + cmd = mnl_nlmsg_put_header(cmd); + cmd->nlmsg_type = RTM_DELNEIGH; + cmd->nlmsg_flags = NLM_F_REQUEST; + ndm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ndm)); + ndm->ndm_ifindex = ctx->ifindex; + ndm->ndm_state = NUD_PERMANENT; + ndm->ndm_flags = 0; + ndm->ndm_type = 0; + if (family == AF_INET) { + ndm->ndm_family = AF_INET; + mnl_attr_put_u32(cmd, NDA_DST, mnl_attr_get_u32(na_ip)); + } else { + ndm->ndm_family = AF_INET6; + mnl_attr_put(cmd, NDA_DST, IPV6_ADDR_LEN, + mnl_attr_get_payload(na_ip)); + } + mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN, + mnl_attr_get_payload(na_mac)); + return 1; +} + +/** + * Cleanup the neigh rules on outer interface. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] ifindex + * Network inferface index to perform cleanup. + */ +static void +flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf, + unsigned int ifindex) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + struct tcf_nlcb_context ctx = { + .ifindex = ifindex, + .bufsize = MNL_REQUEST_SIZE, + .nlbuf = LIST_HEAD_INITIALIZER(), + }; + int ret; + + assert(ifindex); + /* Seek and destroy leftovers of neigh rules. */ + nlh = mnl_nlmsg_put_header(tcf->buf); + nlh->nlmsg_type = RTM_GETNEIGH; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm)); + ndm->ndm_family = AF_UNSPEC; + ndm->ndm_ifindex = ifindex; + ndm->ndm_state = NUD_PERMANENT; + ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx); + if (ret) + DRV_LOG(WARNING, "netlink: query device list error %d", ret); + ret = flow_tcf_send_nlcmd(tcf, &ctx); + if (ret) + DRV_LOG(WARNING, "netlink: device delete error %d", ret); +} + +/** + * Collect indices of VXLAN encap/decap interfaces associated with device. + * This is callback routine called by libmnl mnl_cb_run() in loop for + * every message in received packet. + * + * @param[in] nlh + * Pointer to reply header. + * @param[in, out] arg + * Opaque data pointer for this callback. + * + * @return + * A positive, nonzero value on success, negative errno value otherwise + * and rte_errno is set. + */ +static int +flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) +{ + struct tcf_nlcb_context *ctx = arg; + struct nlmsghdr *cmd; + struct ifinfomsg *ifm; + struct nlattr *na; + struct nlattr *na_info = NULL; + struct nlattr *na_vxlan = NULL; + bool found = false; + unsigned int vxindex; + + if (nlh->nlmsg_type != RTM_NEWLINK) { + rte_errno = EINVAL; + return -rte_errno; + } + ifm = mnl_nlmsg_get_payload(nlh); + if (!ifm->ifi_index) { + rte_errno = EINVAL; + return -rte_errno; + } + mnl_attr_for_each(na, nlh, sizeof(*ifm)) + if (mnl_attr_get_type(na) == IFLA_LINKINFO) { + na_info = na; + break; + } + if (!na_info) + return 1; + mnl_attr_for_each_nested(na, na_info) { + switch (mnl_attr_get_type(na)) { + case IFLA_INFO_KIND: + if (!strncmp("vxlan", mnl_attr_get_str(na), + mnl_attr_get_len(na))) + found = true; + break; + case IFLA_INFO_DATA: + na_vxlan = na; + break; + } + if (found && na_vxlan) + break; + } + if (!found || !na_vxlan) + return 1; + found = false; + mnl_attr_for_each_nested(na, na_vxlan) { + if (mnl_attr_get_type(na) == IFLA_VXLAN_LINK && + mnl_attr_get_u32(na) == ctx->ifindex) { + found = true; + break; + } + } + if (!found) + return 1; + /* Attached VXLAN device found, store the command to delete. */ + vxindex = ifm->ifi_index; + cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ifinfomsg))); + if (!nlh) { + rte_errno = ENOMEM; + return -rte_errno; + } + cmd = mnl_nlmsg_put_header(cmd); + cmd->nlmsg_type = RTM_DELLINK; + cmd->nlmsg_flags = NLM_F_REQUEST; + ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm)); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_index = vxindex; + return 1; +} + +/** + * Cleanup the outer interface. Removes all found vxlan devices + * attached to specified index, flushes the meigh and local IP + * datavase. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] ifindex + * Network inferface index to perform cleanup. + */ +static void +flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf, + unsigned int ifindex) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + struct tcf_nlcb_context ctx = { + .ifindex = ifindex, + .bufsize = MNL_REQUEST_SIZE, + .nlbuf = LIST_HEAD_INITIALIZER(), + }; + int ret; + + assert(ifindex); + /* + * Seek and destroy leftover VXLAN encap/decap interfaces with + * matching properties. + */ + nlh = mnl_nlmsg_put_header(tcf->buf); + nlh->nlmsg_type = RTM_GETLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); + ifm->ifi_family = AF_UNSPEC; + ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx); + if (ret) + DRV_LOG(WARNING, "netlink: query device list error %d", ret); + ret = flow_tcf_send_nlcmd(tcf, &ctx); + if (ret) + DRV_LOG(WARNING, "netlink: device delete error %d", ret); +} + +/** + * Emit Netlink message to add/remove local address to the outer device. + * The address being added is visible within the link only (scope link). + * + * Note that an implicit route is maintained by the kernel due to the + * presence of a peer address (IFA_ADDRESS). + * + * These rules are used for encapsultion only and allow to assign + * the outer tunnel source IP address. + * + * @param[in] tcf + * Libmnl socket context object. + * @param[in] encap + * Encapsulation properties (source address and its peer). + * @param[in] ifindex + * Network interface to apply rule. + * @param[in] enable + * Toggle between add and remove. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_tcf_rule_local(struct mlx5_flow_tcf_context *tcf, + const struct flow_tcf_vxlan_encap *encap, + unsigned int ifindex, + bool enable, + struct rte_flow_error *error) +{ + struct nlmsghdr *nlh; + struct ifaddrmsg *ifa; + alignas(struct nlmsghdr) + uint8_t buf[mnl_nlmsg_size(sizeof(*ifa) + 128)]; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = enable ? RTM_NEWADDR : RTM_DELADDR; + nlh->nlmsg_flags = + NLM_F_REQUEST | (enable ? NLM_F_CREATE | NLM_F_REPLACE : 0); + nlh->nlmsg_seq = 0; + ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa)); + ifa->ifa_flags = IFA_F_PERMANENT; + ifa->ifa_scope = RT_SCOPE_LINK; + ifa->ifa_index = ifindex; + if (encap->mask & FLOW_TCF_ENCAP_IPV4_SRC) { + ifa->ifa_family = AF_INET; + ifa->ifa_prefixlen = 32; + mnl_attr_put_u32(nlh, IFA_LOCAL, encap->ipv4.src); + if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) + mnl_attr_put_u32(nlh, IFA_ADDRESS, + encap->ipv4.dst); + } else { + assert(encap->mask & FLOW_TCF_ENCAP_IPV6_SRC); + ifa->ifa_family = AF_INET6; + ifa->ifa_prefixlen = 128; + mnl_attr_put(nlh, IFA_LOCAL, + sizeof(encap->ipv6.src), + &encap->ipv6.src); + if (encap->mask & FLOW_TCF_ENCAP_IPV6_DST) + mnl_attr_put(nlh, IFA_ADDRESS, + sizeof(encap->ipv6.dst), + &encap->ipv6.dst); + } + if (!flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL)) + return 0; + return rte_flow_error_set(error, rte_errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: cannot complete IFA request" + " (ip addr add)"); +} + +/** + * Emit Netlink message to add/remove neighbor. + * + * @param[in] tcf + * Libmnl socket context object. + * @param[in] encap + * Encapsulation properties (destination address). + * @param[in] ifindex + * Network interface. + * @param[in] enable + * Toggle between add and remove. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_tcf_rule_neigh(struct mlx5_flow_tcf_context *tcf, + const struct flow_tcf_vxlan_encap *encap, + unsigned int ifindex, + bool enable, + struct rte_flow_error *error) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + alignas(struct nlmsghdr) + uint8_t buf[mnl_nlmsg_size(sizeof(*ndm) + 128)]; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = enable ? RTM_NEWNEIGH : RTM_DELNEIGH; + nlh->nlmsg_flags = + NLM_F_REQUEST | (enable ? NLM_F_CREATE | NLM_F_REPLACE : 0); + nlh->nlmsg_seq = 0; + ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm)); + ndm->ndm_ifindex = ifindex; + ndm->ndm_state = NUD_PERMANENT; + ndm->ndm_flags = 0; + ndm->ndm_type = 0; + if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) { + ndm->ndm_family = AF_INET; + mnl_attr_put_u32(nlh, NDA_DST, encap->ipv4.dst); + } else { + assert(encap->mask & FLOW_TCF_ENCAP_IPV6_DST); + ndm->ndm_family = AF_INET6; + mnl_attr_put(nlh, NDA_DST, sizeof(encap->ipv6.dst), + &encap->ipv6.dst); + } + if (encap->mask & FLOW_TCF_ENCAP_ETH_SRC && enable) + DRV_LOG(WARNING, + "outer ethernet source address cannot be " + "forced for VXLAN encapsulation"); + if (encap->mask & FLOW_TCF_ENCAP_ETH_DST) + mnl_attr_put(nlh, NDA_LLADDR, sizeof(encap->eth.dst), + &encap->eth.dst); + if (!flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL)) + return 0; + return rte_flow_error_set(error, rte_errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: cannot complete ND request" + " (ip neigh)"); +} + +/** + * Manage the local IP addresses and their peers IP addresses on the + * outer interface for encapsulation purposes. The kernel searches the + * appropriate device for tunnel egress traffic using the outer source + * IP, this IP should be assigned to the outer network device, otherwise + * kernel rejects the rule. + * + * Adds or removes the addresses using the Netlink command like this: + * ip addr add <src_ip> peer <dst_ip> scope link dev <ifouter> + * + * The addresses are local to the netdev ("scope link"), this reduces + * the risk of conflicts. Note that an implicit route is maintained by + * the kernel due to the presence of a peer address (IFA_ADDRESS). + * + * @param[in] tcf + * Libmnl socket context object. + * @param[in] vtep + * VTEP object, contains rule database and ifouter index. + * @param[in] dev_flow + * Flow object, contains the tunnel parameters (for encap only). + * @param[in] enable + * Toggle between add and remove. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_tcf_encap_local(struct mlx5_flow_tcf_context *tcf, + struct tcf_vtep *vtep, + struct mlx5_flow *dev_flow, + bool enable, + struct rte_flow_error *error) +{ + const struct flow_tcf_vxlan_encap *encap = dev_flow->tcf.vxlan_encap; + struct tcf_local_rule *rule; + bool found = false; + int ret; + + assert(encap); + assert(encap->hdr.type == FLOW_TCF_TUNACT_VXLAN_ENCAP); + if (encap->mask & FLOW_TCF_ENCAP_IPV4_SRC) { + assert(encap->mask & FLOW_TCF_ENCAP_IPV4_DST); + LIST_FOREACH(rule, &vtep->local, next) { + if (rule->mask & FLOW_TCF_ENCAP_IPV4_SRC && + encap->ipv4.src == rule->ipv4.src && + encap->ipv4.dst == rule->ipv4.dst) { + found = true; + break; + } + } + } else { + assert(encap->mask & FLOW_TCF_ENCAP_IPV6_SRC); + assert(encap->mask & FLOW_TCF_ENCAP_IPV6_DST); + LIST_FOREACH(rule, &vtep->local, next) { + if (rule->mask & FLOW_TCF_ENCAP_IPV6_SRC && + !memcmp(&encap->ipv6.src, &rule->ipv6.src, + sizeof(encap->ipv6.src)) && + !memcmp(&encap->ipv6.dst, &rule->ipv6.dst, + sizeof(encap->ipv6.dst))) { + found = true; + break; + } + } + } + if (found) { + if (enable) { + rule->refcnt++; + return 0; + } + if (!rule->refcnt || !--rule->refcnt) { + LIST_REMOVE(rule, next); + return flow_tcf_rule_local(tcf, encap, + vtep->ifouter, false, error); + } + return 0; + } + if (!enable) { + DRV_LOG(WARNING, "disabling not existing local rule"); + rte_flow_error_set(error, ENOENT, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "disabling not existing local rule"); + return -ENOENT; + } + rule = rte_zmalloc(__func__, sizeof(struct tcf_local_rule), + alignof(struct tcf_local_rule)); + if (!rule) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "unable to allocate memory for local rule"); + return -rte_errno; + } + *rule = (struct tcf_local_rule){.refcnt = 0, + .mask = 0, + }; + if (encap->mask & FLOW_TCF_ENCAP_IPV4_SRC) { + rule->mask = FLOW_TCF_ENCAP_IPV4_SRC + | FLOW_TCF_ENCAP_IPV4_DST; + rule->ipv4.src = encap->ipv4.src; + rule->ipv4.dst = encap->ipv4.dst; + } else { + rule->mask = FLOW_TCF_ENCAP_IPV6_SRC + | FLOW_TCF_ENCAP_IPV6_DST; + memcpy(&rule->ipv6.src, &encap->ipv6.src, IPV6_ADDR_LEN); + memcpy(&rule->ipv6.dst, &encap->ipv6.dst, IPV6_ADDR_LEN); + } + ret = flow_tcf_rule_local(tcf, encap, vtep->ifouter, true, error); + if (ret) { + rte_free(rule); + return ret; + } + rule->refcnt++; + LIST_INSERT_HEAD(&vtep->local, rule, next); + return 0; +} + +/** + * Manage the destination MAC/IP addresses neigh database, kernel uses + * this one to determine the destination MAC address within encapsulation + * header. Adds or removes the entries using the Netlink command like this: + * ip neigh add dev <ifouter> lladdr <dst_mac> to <dst_ip> nud permanent + * + * @param[in] tcf + * Libmnl socket context object. + * @param[in] vtep + * VTEP object, contains rule database and ifouter index. + * @param[in] dev_flow + * Flow object, contains the tunnel parameters (for encap only). + * @param[in] enable + * Toggle between add and remove. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_tcf_encap_neigh(struct mlx5_flow_tcf_context *tcf, + struct tcf_vtep *vtep, + struct mlx5_flow *dev_flow, + bool enable, + struct rte_flow_error *error) +{ + const struct flow_tcf_vxlan_encap *encap = dev_flow->tcf.vxlan_encap; + struct tcf_neigh_rule *rule; + bool found = false; + int ret; + + assert(encap); + assert(encap->hdr.type == FLOW_TCF_TUNACT_VXLAN_ENCAP); + if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) { + assert(encap->mask & FLOW_TCF_ENCAP_IPV4_SRC); + LIST_FOREACH(rule, &vtep->neigh, next) { + if (rule->mask & FLOW_TCF_ENCAP_IPV4_DST && + encap->ipv4.dst == rule->ipv4.dst) { + found = true; + break; + } + } + } else { + assert(encap->mask & FLOW_TCF_ENCAP_IPV6_SRC); + assert(encap->mask & FLOW_TCF_ENCAP_IPV6_DST); + LIST_FOREACH(rule, &vtep->neigh, next) { + if (rule->mask & FLOW_TCF_ENCAP_IPV6_DST && + !memcmp(&encap->ipv6.dst, &rule->ipv6.dst, + sizeof(encap->ipv6.dst))) { + found = true; + break; + } + } + } + if (found) { + if (memcmp(&encap->eth.dst, &rule->eth, + sizeof(encap->eth.dst))) { + DRV_LOG(WARNING, "Destination MAC differs" + " in neigh rule"); + rte_flow_error_set(error, EEXIST, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "Different MAC address" + " neigh rule for the same" + " destination IP"); + return -EEXIST; + } + if (enable) { + rule->refcnt++; + return 0; + } + if (!rule->refcnt || !--rule->refcnt) { + LIST_REMOVE(rule, next); + return flow_tcf_rule_neigh(tcf, encap, + vtep->ifouter, + false, error); + } + return 0; + } + if (!enable) { + DRV_LOG(WARNING, "Disabling not existing neigh rule"); + rte_flow_error_set(error, ENOENT, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "unable to allocate memory for neigh rule"); + return -ENOENT; + } + rule = rte_zmalloc(__func__, sizeof(struct tcf_neigh_rule), + alignof(struct tcf_neigh_rule)); + if (!rule) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "unable to allocate memory for neigh rule"); + return -rte_errno; + } + *rule = (struct tcf_neigh_rule){.refcnt = 0, + .mask = 0, + }; + if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) { + rule->mask = FLOW_TCF_ENCAP_IPV4_DST; + rule->ipv4.dst = encap->ipv4.dst; + } else { + rule->mask = FLOW_TCF_ENCAP_IPV6_DST; + memcpy(&rule->ipv6.dst, &encap->ipv6.dst, IPV6_ADDR_LEN); + } + memcpy(&rule->eth, &encap->eth.dst, sizeof(rule->eth)); + ret = flow_tcf_rule_neigh(tcf, encap, vtep->ifouter, true, error); + if (ret) { + rte_free(rule); + return ret; + } + rule->refcnt++; + LIST_INSERT_HEAD(&vtep->neigh, rule, next); + return 0; +} + +/* VTEP device list is shared between PMD port instances. */ +static LIST_HEAD(, tcf_vtep) vtep_list_vxlan = LIST_HEAD_INITIALIZER(); +static pthread_mutex_t vtep_list_mutex = PTHREAD_MUTEX_INITIALIZER; + +/** + * Deletes VTEP network device. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] vtep + * Object represinting the network device to delete. Memory + * allocated for this object is freed by routine. + */ +static void +flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf, + struct tcf_vtep *vtep) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + alignas(struct nlmsghdr) + uint8_t buf[mnl_nlmsg_size(MNL_ALIGN(sizeof(*ifm))) + + MNL_BUF_EXTRA_SPACE]; + int ret; + + assert(!vtep->refcnt); + /* Delete only ifaces those we actually created. */ + if (vtep->created && vtep->ifindex) { + DRV_LOG(INFO, "VTEP delete (%d)", vtep->ifindex); + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = RTM_DELLINK; + nlh->nlmsg_flags = NLM_F_REQUEST; + ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_index = vtep->ifindex; + assert(sizeof(buf) >= nlh->nlmsg_len); + ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + if (ret) + DRV_LOG(WARNING, "netlink: error deleting vxlan" + " encap/decap ifindex %u", + ifm->ifi_index); + } + rte_free(vtep); +} + +/** + * Creates VTEP network device. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] ifouter + * Outer interface to attach new-created VXLAN device + * If zero the VXLAN device will not be attached to any device. + * These VTEPs are used for decapsulation and can be precreated + * and shared between processes. + * @param[in] port + * UDP port of created VTEP device. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * Pointer to created device structure on success, + * NULL otherwise and rte_errno is set. + */ +#ifdef HAVE_IFLA_VXLAN_COLLECT_METADATA +static struct tcf_vtep* +flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf, + unsigned int ifouter, + uint16_t port, struct rte_flow_error *error) +{ + struct tcf_vtep *vtep; + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + char name[sizeof(MLX5_VXLAN_DEVICE_PFX) + 24]; + alignas(struct nlmsghdr) + uint8_t buf[mnl_nlmsg_size(sizeof(*ifm)) + + SZ_NLATTR_DATA_OF(sizeof(name)) + + SZ_NLATTR_NEST * 2 + + SZ_NLATTR_STRZ_OF("vxlan") + + SZ_NLATTR_DATA_OF(sizeof(uint32_t)) + + SZ_NLATTR_DATA_OF(sizeof(uint16_t)) + + SZ_NLATTR_DATA_OF(sizeof(uint8_t)) * 3 + + MNL_BUF_EXTRA_SPACE]; + struct nlattr *na_info; + struct nlattr *na_vxlan; + rte_be16_t vxlan_port = rte_cpu_to_be_16(port); + int ret; + + vtep = rte_zmalloc(__func__, sizeof(*vtep), alignof(struct tcf_vtep)); + if (!vtep) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "unable to allocate memory for VTEP"); + return NULL; + } + *vtep = (struct tcf_vtep){ + .port = port, + .local = LIST_HEAD_INITIALIZER(), + .neigh = LIST_HEAD_INITIALIZER(), + }; + memset(buf, 0, sizeof(buf)); + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = RTM_NEWLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_type = 0; + ifm->ifi_index = 0; + ifm->ifi_flags = IFF_UP; + ifm->ifi_change = 0xffffffff; + snprintf(name, sizeof(name), "%s%u", MLX5_VXLAN_DEVICE_PFX, port); + mnl_attr_put_strz(nlh, IFLA_IFNAME, name); + na_info = mnl_attr_nest_start(nlh, IFLA_LINKINFO); + assert(na_info); + mnl_attr_put_strz(nlh, IFLA_INFO_KIND, "vxlan"); + na_vxlan = mnl_attr_nest_start(nlh, IFLA_INFO_DATA); + if (ifouter) + mnl_attr_put_u32(nlh, IFLA_VXLAN_LINK, ifouter); + assert(na_vxlan); + mnl_attr_put_u8(nlh, IFLA_VXLAN_COLLECT_METADATA, 1); + mnl_attr_put_u8(nlh, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1); + mnl_attr_put_u8(nlh, IFLA_VXLAN_LEARNING, 0); + mnl_attr_put_u16(nlh, IFLA_VXLAN_PORT, vxlan_port); + mnl_attr_nest_end(nlh, na_vxlan); + mnl_attr_nest_end(nlh, na_info); + assert(sizeof(buf) >= nlh->nlmsg_len); + ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + if (ret) { + DRV_LOG(WARNING, + "netlink: VTEP %s create failure (%d)", + name, rte_errno); + if (rte_errno != EEXIST || ifouter) + /* + * Some unhandled error occurred or device is + * for encapsulation and cannot be shared. + */ + goto error; + } else { + /* + * Mark device we actually created. + * We should explicitly delete + * when we do not need it anymore. + */ + vtep->created = 1; + } + /* Try to get ifindex of created of pre-existing device. */ + ret = if_nametoindex(name); + if (!ret) { + DRV_LOG(WARNING, + "VTEP %s failed to get index (%d)", name, errno); + rte_flow_error_set + (error, -errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: failed to retrieve VTEP ifindex"); + goto error; + } + vtep->ifindex = ret; + vtep->ifouter = ifouter; + memset(buf, 0, sizeof(buf)); + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = RTM_NEWLINK; + nlh->nlmsg_flags = NLM_F_REQUEST; + ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_type = 0; + ifm->ifi_index = vtep->ifindex; + ifm->ifi_flags = IFF_UP; + ifm->ifi_change = IFF_UP; + ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + if (ret) { + rte_flow_error_set(error, -errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: failed to set VTEP link up"); + DRV_LOG(WARNING, "netlink: VTEP %s set link up failure (%d)", + name, rte_errno); + goto clean; + } + ret = mlx5_flow_tcf_init(tcf, vtep->ifindex, error); + if (ret) { + DRV_LOG(WARNING, "VTEP %s init failure (%d)", name, rte_errno); + goto clean; + } + DRV_LOG(INFO, "VTEP create (%d, %d)", vtep->port, vtep->ifindex); + vtep->refcnt = 1; + return vtep; +clean: + flow_tcf_vtep_delete(tcf, vtep); + return NULL; +error: + rte_free(vtep); + return NULL; +} +#else +static struct tcf_vtep* +flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf __rte_unused, + unsigned int ifouter __rte_unused, + uint16_t port __rte_unused, + struct rte_flow_error *error) +{ + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: failed to create VTEP, " + "vxlan metadata are not supported by kernel"); + return NULL; +} +#endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */ + +/** + * Acquire target interface index for VXLAN tunneling decapsulation. + * In order to share the UDP port within the other interfaces the + * VXLAN device created as not attached to any interface (if created). + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] dev_flow + * Flow tcf object with tunnel structure pointer set. + * @param[out] error + * Perform verbose error reporting if not NULL. + * @return + * Interface descriptor pointer on success, + * NULL otherwise and rte_errno is set. + */ +static struct tcf_vtep* +flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context *tcf, + struct mlx5_flow *dev_flow, + struct rte_flow_error *error) +{ + struct tcf_vtep *vtep; + uint16_t port = dev_flow->tcf.vxlan_decap->udp_port; + + LIST_FOREACH(vtep, &vtep_list_vxlan, next) { + if (vtep->port == port) + break; + } + if (vtep && vtep->ifouter) { + rte_flow_error_set(error, -errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "Failed to create decap VTEP with specified" + " UDP port, atatched device exists"); + return NULL; + } + if (vtep) { + /* Device exists, just increment the reference counter. */ + vtep->refcnt++; + assert(vtep->ifindex); + return vtep; + } + /* No decapsulation device exists, try to create the new one. */ + vtep = flow_tcf_vtep_create(tcf, 0, port, error); + if (vtep) + LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next); + return vtep; +} + +/** + * Aqcuire target interface index for VXLAN tunneling encapsulation. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] ifouter + * Network interface index to attach VXLAN encap device to. + * @param[in] dev_flow + * Flow tcf object with tunnel structure pointer set. + * @param[out] error + * Perform verbose error reporting if not NULL. + * @return + * Interface descriptor pointer on success, + * NULL otherwise and rte_errno is set. + */ +static struct tcf_vtep* +flow_tcf_encap_vtep_acquire(struct mlx5_flow_tcf_context *tcf, + unsigned int ifouter, + struct mlx5_flow *dev_flow __rte_unused, + struct rte_flow_error *error) +{ + static uint16_t encap_port = MLX5_VXLAN_PORT_MIN - 1; + struct tcf_vtep *vtep; + int ret; + + assert(ifouter); + /* Look whether the attached VTEP for encap is created. */ + LIST_FOREACH(vtep, &vtep_list_vxlan, next) { + if (vtep->ifouter == ifouter) + break; + } + if (vtep) { + /* VTEP already exists, just increment the reference. */ + vtep->refcnt++; + } else { + uint16_t pcnt; + + /* Not found, we should create the new attached VTEP. */ + flow_tcf_encap_iface_cleanup(tcf, ifouter); + flow_tcf_encap_local_cleanup(tcf, ifouter); + flow_tcf_encap_neigh_cleanup(tcf, ifouter); + for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_MAX + - MLX5_VXLAN_PORT_MIN); pcnt++) { + encap_port++; + /* Wraparound the UDP port index. */ + if (encap_port < MLX5_VXLAN_PORT_MIN || + encap_port > MLX5_VXLAN_PORT_MAX) + encap_port = MLX5_VXLAN_PORT_MIN; + /* Check whether UDP port is in already in use. */ + LIST_FOREACH(vtep, &vtep_list_vxlan, next) { + if (vtep->port == encap_port) + break; + } + if (vtep) { + /* Port is in use, try the next one. */ + vtep = NULL; + continue; + } + vtep = flow_tcf_vtep_create(tcf, ifouter, + encap_port, error); + if (vtep) { + LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next); + break; + } + if (rte_errno != EEXIST) + break; + } + if (!vtep) + return NULL; + } + assert(vtep->ifouter == ifouter); + assert(vtep->ifindex); + /* Create local ipaddr with peer to specify the outer IPs. */ + ret = flow_tcf_encap_local(tcf, vtep, dev_flow, true, error); + if (!ret) { + /* Create neigh rule to specify outer destination MAC. */ + ret = flow_tcf_encap_neigh(tcf, vtep, dev_flow, true, error); + if (ret) + flow_tcf_encap_local(tcf, vtep, + dev_flow, false, error); + } + if (ret) { + if (--vtep->refcnt == 0) + flow_tcf_vtep_delete(tcf, vtep); + return NULL; + } + return vtep; } /** + * Acquires target interface index for tunneling of any type. + * Creates the new VTEP if needed. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] ifouter + * Network interface index to attach VXLAN encap device to. + * @param[in] dev_flow + * Flow tcf object with tunnel structure pointer set. + * @param[out] error + * Perform verbose error reporting if not NULL. + * @return + * Interface descriptor pointer on success, + * NULL otherwise and rte_errno is set. + */ +static struct tcf_vtep* +flow_tcf_vtep_acquire(struct mlx5_flow_tcf_context *tcf, + unsigned int ifouter, + struct mlx5_flow *dev_flow, + struct rte_flow_error *error) +{ + struct tcf_vtep *vtep = NULL; + + assert(dev_flow->tcf.tunnel); + pthread_mutex_lock(&vtep_list_mutex); + switch (dev_flow->tcf.tunnel->type) { + case FLOW_TCF_TUNACT_VXLAN_ENCAP: + vtep = flow_tcf_encap_vtep_acquire(tcf, ifouter, + dev_flow, error); + break; + case FLOW_TCF_TUNACT_VXLAN_DECAP: + vtep = flow_tcf_decap_vtep_acquire(tcf, dev_flow, error); + break; + default: + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "unsupported tunnel type"); + break; + } + pthread_mutex_unlock(&vtep_list_mutex); + return vtep; +} + +/** + * Release tunneling interface by ifindex. Decrements reference + * counter and actually removes the device if counter is zero. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] vtep + * VTEP device descriptor structure. + * @param[in] dev_flow + * Flow tcf object with tunnel structure pointer set. + */ +static void +flow_tcf_vtep_release(struct mlx5_flow_tcf_context *tcf, + struct tcf_vtep *vtep, + struct mlx5_flow *dev_flow) +{ + assert(dev_flow->tcf.tunnel); + pthread_mutex_lock(&vtep_list_mutex); + switch (dev_flow->tcf.tunnel->type) { + case FLOW_TCF_TUNACT_VXLAN_DECAP: + break; + case FLOW_TCF_TUNACT_VXLAN_ENCAP: + /* Remove the encap ancillary rules first. */ + flow_tcf_encap_neigh(tcf, vtep, dev_flow, false, NULL); + flow_tcf_encap_local(tcf, vtep, dev_flow, false, NULL); + break; + default: + assert(false); + DRV_LOG(WARNING, "Unsupported tunnel type"); + break; + } + assert(vtep->refcnt); + if (--vtep->refcnt == 0) { + LIST_REMOVE(vtep, next); + flow_tcf_vtep_delete(tcf, vtep); + } + pthread_mutex_unlock(&vtep_list_mutex); +} + + +/** * Apply flow to E-Switch by sending Netlink message. * * @param[in] dev @@ -2267,11 +5098,35 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, dev_flow = LIST_FIRST(&flow->dev_flows); /* E-Switch flow can't be expanded. */ assert(!LIST_NEXT(dev_flow, next)); + if (dev_flow->tcf.applied) + return 0; nlh = dev_flow->tcf.nlh; nlh->nlmsg_type = RTM_NEWTFILTER; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; - if (!flow_tcf_nl_ack(ctx, nlh)) + if (dev_flow->tcf.tunnel) { + /* + * Replace the interface index, target for + * encapsulation, source for decapsulation. + */ + assert(!dev_flow->tcf.tunnel->vtep); + assert(dev_flow->tcf.tunnel->ifindex_ptr); + /* Acquire actual VTEP device when rule is being applied. */ + dev_flow->tcf.tunnel->vtep = + flow_tcf_vtep_acquire(ctx, + dev_flow->tcf.tunnel->ifindex_org, + dev_flow, error); + if (!dev_flow->tcf.tunnel->vtep) + return -rte_errno; + DRV_LOG(INFO, "Replace ifindex: %d->%d", + dev_flow->tcf.tunnel->vtep->ifindex, + dev_flow->tcf.tunnel->ifindex_org); + *dev_flow->tcf.tunnel->ifindex_ptr = + dev_flow->tcf.tunnel->vtep->ifindex; + } + if (!flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) { + dev_flow->tcf.applied = 1; return 0; + } return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create TC flow rule"); @@ -2295,21 +5150,25 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) if (!flow) return; - if (flow->counter) { - if (--flow->counter->ref_cnt == 0) { - rte_free(flow->counter); - flow->counter = NULL; - } - } dev_flow = LIST_FIRST(&flow->dev_flows); if (!dev_flow) return; /* E-Switch flow can't be expanded. */ assert(!LIST_NEXT(dev_flow, next)); - nlh = dev_flow->tcf.nlh; - nlh->nlmsg_type = RTM_DELTFILTER; - nlh->nlmsg_flags = NLM_F_REQUEST; - flow_tcf_nl_ack(ctx, nlh); + if (dev_flow->tcf.applied) { + nlh = dev_flow->tcf.nlh; + nlh->nlmsg_type = RTM_DELTFILTER; + nlh->nlmsg_flags = NLM_F_REQUEST; + flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL); + if (dev_flow->tcf.tunnel) { + assert(dev_flow->tcf.tunnel->vtep); + flow_tcf_vtep_release(ctx, + dev_flow->tcf.tunnel->vtep, + dev_flow); + dev_flow->tcf.tunnel->vtep = NULL; + } + dev_flow->tcf.applied = 0; + } } /** @@ -2328,6 +5187,12 @@ flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) if (!flow) return; flow_tcf_remove(dev, flow); + if (flow->counter) { + if (--flow->counter->ref_cnt == 0) { + rte_free(flow->counter); + flow->counter = NULL; + } + } dev_flow = LIST_FIRST(&flow->dev_flows); if (!dev_flow) return; @@ -2830,7 +5695,9 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh; struct tcmsg *tcm; alignas(struct nlmsghdr) - uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)]; + uint8_t buf[mnl_nlmsg_size(sizeof(*tcm)) + + SZ_NLATTR_STRZ_OF("ingress") + + MNL_BUF_EXTRA_SPACE]; /* Destroy existing ingress qdisc and everything attached to it. */ nlh = mnl_nlmsg_put_header(buf); @@ -2841,8 +5708,9 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, tcm->tcm_ifindex = ifindex; tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); tcm->tcm_parent = TC_H_INGRESS; + assert(sizeof(buf) >= nlh->nlmsg_len); /* Ignore errors when qdisc is already absent. */ - if (flow_tcf_nl_ack(ctx, nlh) && + if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL) && rte_errno != EINVAL && rte_errno != ENOENT) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -2858,7 +5726,8 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); tcm->tcm_parent = TC_H_INGRESS; mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress"); - if (flow_tcf_nl_ack(ctx, nlh)) + assert(sizeof(buf) >= nlh->nlmsg_len); + if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create ingress" diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 81bc39f9..699cc88c 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -33,6 +33,9 @@ #include "mlx5_glue.h" #include "mlx5_flow.h" +#define VERBS_SPEC_INNER(item_flags) \ + (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0) + /** * Create Verbs flow counter with Verbs library. * @@ -231,27 +234,26 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused, } /** - * Add a verbs item specification into @p flow. + * Add a verbs item specification into @p verbs. * - * @param[in, out] flow - * Pointer to flow structure. + * @param[out] verbs + * Pointer to verbs structure. * @param[in] src * Create specification. * @param[in] size * Size in bytes of the specification to copy. */ static void -flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size) +flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size) { - struct mlx5_flow_verbs *verbs = &flow->verbs; + void *dst; - if (verbs->specs) { - void *dst; - - dst = (void *)(verbs->specs + verbs->size); - memcpy(dst, src, size); - ++verbs->attr->num_of_specs; - } + if (!verbs) + return; + assert(verbs->specs); + dst = (void *)(verbs->specs + verbs->size); + memcpy(dst, src, size); + ++verbs->attr->num_of_specs; verbs->size += size; } @@ -260,24 +262,23 @@ flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size) * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. * @param[in] item_flags - * Bit field with all detected items. - * @param[in, out] dev_flow - * Pointer to dev_flow structure. + * Parsed item flags. */ static void -flow_verbs_translate_item_eth(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags) { const struct rte_flow_item_eth *spec = item->spec; const struct rte_flow_item_eth *mask = item->mask; - const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL); const unsigned int size = sizeof(struct ibv_flow_spec_eth); struct ibv_flow_spec_eth eth = { - .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), + .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags), .size = size, }; @@ -298,11 +299,8 @@ flow_verbs_translate_item_eth(const struct rte_flow_item *item, eth.val.src_mac[i] &= eth.mask.src_mac[i]; } eth.val.ether_type &= eth.mask.ether_type; - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2; } - flow_verbs_spec_add(dev_flow, ð, size); - *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; + flow_verbs_spec_add(&dev_flow->verbs, ð, size); } /** @@ -344,24 +342,24 @@ flow_verbs_item_vlan_update(struct ibv_flow_attr *attr, * the input is valid and that there is space to insert the requested item * into the flow. * - * @param[in] item - * Item specification. - * @param[in, out] item_flags - * Bit mask that holds all detected items. * @param[in, out] dev_flow * Pointer to dev_flow structure. + * @param[in] item + * Item specification. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_vlan(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags) { const struct rte_flow_item_vlan *spec = item->spec; const struct rte_flow_item_vlan *mask = item->mask; unsigned int size = sizeof(struct ibv_flow_spec_eth); - const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL); + const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); struct ibv_flow_spec_eth eth = { - .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), + .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags), .size = size, }; const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : @@ -377,16 +375,10 @@ flow_verbs_translate_item_vlan(const struct rte_flow_item *item, eth.mask.ether_type = mask->inner_type; eth.val.ether_type &= eth.mask.ether_type; } - if (!(*item_flags & l2m)) { - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2; - flow_verbs_spec_add(dev_flow, ð, size); - } else { + if (!(item_flags & l2m)) + flow_verbs_spec_add(&dev_flow->verbs, ð, size); + else flow_verbs_item_vlan_update(dev_flow->verbs.attr, ð); - size = 0; /* Only an update is done in eth specification. */ - } - *item_flags |= tunnel ? - (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : - (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN); } /** @@ -394,32 +386,28 @@ flow_verbs_translate_item_vlan(const struct rte_flow_item *item, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_ipv4(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags) { const struct rte_flow_item_ipv4 *spec = item->spec; const struct rte_flow_item_ipv4 *mask = item->mask; - const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL); unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext); struct ibv_flow_spec_ipv4_ext ipv4 = { - .type = IBV_FLOW_SPEC_IPV4_EXT | - (tunnel ? IBV_FLOW_SPEC_INNER : 0), + .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags), .size = size, }; if (!mask) mask = &rte_flow_item_ipv4_mask; - *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : - MLX5_FLOW_LAYER_OUTER_L3_IPV4; if (spec) { ipv4.val = (struct ibv_flow_ipv4_ext_filter){ .src_ip = spec->hdr.src_addr, @@ -439,12 +427,7 @@ flow_verbs_translate_item_ipv4(const struct rte_flow_item *item, ipv4.val.proto &= ipv4.mask.proto; ipv4.val.tos &= ipv4.mask.tos; } - dev_flow->verbs.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, tunnel, - MLX5_IPV4_LAYER_TYPES, - MLX5_IPV4_IBV_RX_HASH); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3; - flow_verbs_spec_add(dev_flow, &ipv4, size); + flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size); } /** @@ -452,31 +435,28 @@ flow_verbs_translate_item_ipv4(const struct rte_flow_item *item, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_ipv6(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags) { const struct rte_flow_item_ipv6 *spec = item->spec; const struct rte_flow_item_ipv6 *mask = item->mask; - const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL); unsigned int size = sizeof(struct ibv_flow_spec_ipv6); struct ibv_flow_spec_ipv6 ipv6 = { - .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0), + .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags), .size = size, }; if (!mask) mask = &rte_flow_item_ipv6_mask; - *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : - MLX5_FLOW_LAYER_OUTER_L3_IPV6; if (spec) { unsigned int i; uint32_t vtc_flow_val; @@ -516,12 +496,7 @@ flow_verbs_translate_item_ipv6(const struct rte_flow_item *item, ipv6.val.next_hdr &= ipv6.mask.next_hdr; ipv6.val.hop_limit &= ipv6.mask.hop_limit; } - dev_flow->verbs.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, tunnel, - MLX5_IPV6_LAYER_TYPES, - MLX5_IPV6_IBV_RX_HASH); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3; - flow_verbs_spec_add(dev_flow, &ipv6, size); + flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size); } /** @@ -529,46 +504,38 @@ flow_verbs_translate_item_ipv6(const struct rte_flow_item *item, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_udp(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags __rte_unused) { - const struct rte_flow_item_udp *spec = item->spec; - const struct rte_flow_item_udp *mask = item->mask; - const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL); + const struct rte_flow_item_tcp *spec = item->spec; + const struct rte_flow_item_tcp *mask = item->mask; unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); - struct ibv_flow_spec_tcp_udp udp = { - .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), + struct ibv_flow_spec_tcp_udp tcp = { + .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags), .size = size, }; if (!mask) - mask = &rte_flow_item_udp_mask; - *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : - MLX5_FLOW_LAYER_OUTER_L4_UDP; + mask = &rte_flow_item_tcp_mask; if (spec) { - udp.val.dst_port = spec->hdr.dst_port; - udp.val.src_port = spec->hdr.src_port; - udp.mask.dst_port = mask->hdr.dst_port; - udp.mask.src_port = mask->hdr.src_port; + tcp.val.dst_port = spec->hdr.dst_port; + tcp.val.src_port = spec->hdr.src_port; + tcp.mask.dst_port = mask->hdr.dst_port; + tcp.mask.src_port = mask->hdr.src_port; /* Remove unwanted bits from values. */ - udp.val.src_port &= udp.mask.src_port; - udp.val.dst_port &= udp.mask.dst_port; + tcp.val.src_port &= tcp.mask.src_port; + tcp.val.dst_port &= tcp.mask.dst_port; } - dev_flow->verbs.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP, - (IBV_RX_HASH_SRC_PORT_UDP | - IBV_RX_HASH_DST_PORT_UDP)); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4; - flow_verbs_spec_add(dev_flow, &udp, size); + flow_verbs_spec_add(&dev_flow->verbs, &tcp, size); } /** @@ -576,46 +543,38 @@ flow_verbs_translate_item_udp(const struct rte_flow_item *item, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_tcp(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags __rte_unused) { - const struct rte_flow_item_tcp *spec = item->spec; - const struct rte_flow_item_tcp *mask = item->mask; - const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL); + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); - struct ibv_flow_spec_tcp_udp tcp = { - .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), + struct ibv_flow_spec_tcp_udp udp = { + .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags), .size = size, }; if (!mask) - mask = &rte_flow_item_tcp_mask; - *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : - MLX5_FLOW_LAYER_OUTER_L4_TCP; + mask = &rte_flow_item_udp_mask; if (spec) { - tcp.val.dst_port = spec->hdr.dst_port; - tcp.val.src_port = spec->hdr.src_port; - tcp.mask.dst_port = mask->hdr.dst_port; - tcp.mask.src_port = mask->hdr.src_port; + udp.val.dst_port = spec->hdr.dst_port; + udp.val.src_port = spec->hdr.src_port; + udp.mask.dst_port = mask->hdr.dst_port; + udp.mask.src_port = mask->hdr.src_port; /* Remove unwanted bits from values. */ - tcp.val.src_port &= tcp.mask.src_port; - tcp.val.dst_port &= tcp.mask.dst_port; + udp.val.src_port &= udp.mask.src_port; + udp.val.dst_port &= udp.mask.dst_port; } - dev_flow->verbs.hash_fields |= - mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP, - (IBV_RX_HASH_SRC_PORT_TCP | - IBV_RX_HASH_DST_PORT_TCP)); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4; - flow_verbs_spec_add(dev_flow, &tcp, size); + flow_verbs_spec_add(&dev_flow->verbs, &udp, size); } /** @@ -623,17 +582,17 @@ flow_verbs_translate_item_tcp(const struct rte_flow_item *item, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_vxlan(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags __rte_unused) { const struct rte_flow_item_vxlan *spec = item->spec; const struct rte_flow_item_vxlan *mask = item->mask; @@ -657,9 +616,7 @@ flow_verbs_translate_item_vxlan(const struct rte_flow_item *item, /* Remove unwanted bits from values. */ vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; } - flow_verbs_spec_add(dev_flow, &vxlan, size); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2; - *item_flags |= MLX5_FLOW_LAYER_VXLAN; + flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size); } /** @@ -667,17 +624,17 @@ flow_verbs_translate_item_vxlan(const struct rte_flow_item *item, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item, + uint64_t item_flags __rte_unused) { const struct rte_flow_item_vxlan_gpe *spec = item->spec; const struct rte_flow_item_vxlan_gpe *mask = item->mask; @@ -701,9 +658,7 @@ flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item, /* Remove unwanted bits from values. */ vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id; } - flow_verbs_spec_add(dev_flow, &vxlan_gpe, size); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2; - *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size); } /** @@ -763,17 +718,17 @@ flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, * the input is valid and that there is space to insert the requested item * into the flow. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused, - uint64_t *item_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow, + const struct rte_flow_item *item __rte_unused, + uint64_t item_flags) { struct mlx5_flow_verbs *verbs = &dev_flow->verbs; #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT @@ -804,7 +759,7 @@ flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused, tunnel.val.key &= tunnel.mask.key; } #endif - if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) + if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) flow_verbs_item_gre_ip_protocol_update(verbs->attr, IBV_FLOW_SPEC_IPV4_EXT, IPPROTO_GRE); @@ -812,9 +767,7 @@ flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused, flow_verbs_item_gre_ip_protocol_update(verbs->attr, IBV_FLOW_SPEC_IPV6, IPPROTO_GRE); - flow_verbs_spec_add(dev_flow, &tunnel, size); - verbs->attr->priority = MLX5_PRIORITY_MAP_L2; - *item_flags |= MLX5_FLOW_LAYER_GRE; + flow_verbs_spec_add(verbs, &tunnel, size); } /** @@ -822,17 +775,17 @@ flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused, * the input is valid and that there is space to insert the requested action * into the flow. This function also return the action that was added. * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. * @param[in] item * Item specification. - * @param[in, out] item_flags - * Bit mask that marks all detected items. - * @param[in, out] dev_flow - * Pointer to sepacific flow structure. + * @param[in] item_flags + * Parsed item flags. */ static void -flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused, - uint64_t *action_flags __rte_unused, - struct mlx5_flow *dev_flow __rte_unused) +flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused, + const struct rte_flow_item *item __rte_unused, + uint64_t item_flags __rte_unused) { #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT const struct rte_flow_item_mpls *spec = item->spec; @@ -851,25 +804,24 @@ flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused, /* Remove unwanted bits from values. */ mpls.val.label &= mpls.mask.label; } - flow_verbs_spec_add(dev_flow, &mpls, size); - dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2; - *action_flags |= MLX5_FLOW_LAYER_MPLS; + flow_verbs_spec_add(&dev_flow->verbs, &mpls, size); #endif } /** * Convert the @p action into a Verbs specification. This function assumes that * the input is valid and that there is space to insert the requested action - * into the flow. This function also return the action that was added. + * into the flow. * - * @param[in, out] action_flags - * Pointer to the detected actions. * @param[in] dev_flow * Pointer to mlx5_flow. + * @param[in] action + * Action configuration. */ static void -flow_verbs_translate_action_drop(uint64_t *action_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_action_drop + (struct mlx5_flow *dev_flow, + const struct rte_flow_action *action __rte_unused) { unsigned int size = sizeof(struct ibv_flow_spec_action_drop); struct ibv_flow_spec_action_drop drop = { @@ -877,26 +829,22 @@ flow_verbs_translate_action_drop(uint64_t *action_flags, .size = size, }; - flow_verbs_spec_add(dev_flow, &drop, size); - *action_flags |= MLX5_FLOW_ACTION_DROP; + flow_verbs_spec_add(&dev_flow->verbs, &drop, size); } /** * Convert the @p action into a Verbs specification. This function assumes that * the input is valid and that there is space to insert the requested action - * into the flow. This function also return the action that was added. + * into the flow. * - * @param[in] action - * Action configuration. - * @param[in, out] action_flags - * Pointer to the detected actions. * @param[in] dev_flow * Pointer to mlx5_flow. + * @param[in] action + * Action configuration. */ static void -flow_verbs_translate_action_queue(const struct rte_flow_action *action, - uint64_t *action_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow, + const struct rte_flow_action *action) { const struct rte_flow_action_queue *queue = action->conf; struct rte_flow *flow = dev_flow->flow; @@ -904,13 +852,12 @@ flow_verbs_translate_action_queue(const struct rte_flow_action *action, if (flow->queue) (*flow->queue)[0] = queue->index; flow->rss.queue_num = 1; - *action_flags |= MLX5_FLOW_ACTION_QUEUE; } /** * Convert the @p action into a Verbs specification. This function assumes that * the input is valid and that there is space to insert the requested action - * into the flow. This function also return the action that was added. + * into the flow. * * @param[in] action * Action configuration. @@ -920,40 +867,39 @@ flow_verbs_translate_action_queue(const struct rte_flow_action *action, * Pointer to mlx5_flow. */ static void -flow_verbs_translate_action_rss(const struct rte_flow_action *action, - uint64_t *action_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow, + const struct rte_flow_action *action) { const struct rte_flow_action_rss *rss = action->conf; + const uint8_t *rss_key; struct rte_flow *flow = dev_flow->flow; if (flow->queue) memcpy((*flow->queue), rss->queue, rss->queue_num * sizeof(uint16_t)); flow->rss.queue_num = rss->queue_num; - memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); - flow->rss.types = rss->types; + /* NULL RSS key indicates default RSS key. */ + rss_key = !rss->key ? rss_hash_default_key : rss->key; + memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN); + /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ + flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types; flow->rss.level = rss->level; - *action_flags |= MLX5_FLOW_ACTION_RSS; } /** * Convert the @p action into a Verbs specification. This function assumes that * the input is valid and that there is space to insert the requested action - * into the flow. This function also return the action that was added. + * into the flow. * - * @param[in] action - * Action configuration. - * @param[in, out] action_flags - * Pointer to the detected actions. * @param[in] dev_flow * Pointer to mlx5_flow. + * @param[in] action + * Action configuration. */ static void flow_verbs_translate_action_flag - (const struct rte_flow_action *action __rte_unused, - uint64_t *action_flags, - struct mlx5_flow *dev_flow) + (struct mlx5_flow *dev_flow, + const struct rte_flow_action *action __rte_unused) { unsigned int size = sizeof(struct ibv_flow_spec_action_tag); struct ibv_flow_spec_action_tag tag = { @@ -961,87 +907,44 @@ flow_verbs_translate_action_flag .size = size, .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT), }; - *action_flags |= MLX5_FLOW_ACTION_MARK; - flow_verbs_spec_add(dev_flow, &tag, size); -} -/** - * Update verbs specification to modify the flag to mark. - * - * @param[in, out] verbs - * Pointer to the mlx5_flow_verbs structure. - * @param[in] mark_id - * Mark identifier to replace the flag. - */ -static void -flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id) -{ - struct ibv_spec_header *hdr; - int i; - - if (!verbs) - return; - /* Update Verbs specification. */ - hdr = (struct ibv_spec_header *)verbs->specs; - if (!hdr) - return; - for (i = 0; i != verbs->attr->num_of_specs; ++i) { - if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) { - struct ibv_flow_spec_action_tag *t = - (struct ibv_flow_spec_action_tag *)hdr; - - t->tag_id = mlx5_flow_mark_set(mark_id); - } - hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size); - } + flow_verbs_spec_add(&dev_flow->verbs, &tag, size); } /** * Convert the @p action into a Verbs specification. This function assumes that * the input is valid and that there is space to insert the requested action - * into the flow. This function also return the action that was added. + * into the flow. * - * @param[in] action - * Action configuration. - * @param[in, out] action_flags - * Pointer to the detected actions. * @param[in] dev_flow * Pointer to mlx5_flow. + * @param[in] action + * Action configuration. */ static void -flow_verbs_translate_action_mark(const struct rte_flow_action *action, - uint64_t *action_flags, - struct mlx5_flow *dev_flow) +flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow, + const struct rte_flow_action *action) { const struct rte_flow_action_mark *mark = action->conf; unsigned int size = sizeof(struct ibv_flow_spec_action_tag); struct ibv_flow_spec_action_tag tag = { .type = IBV_FLOW_SPEC_ACTION_TAG, .size = size, + .tag_id = mlx5_flow_mark_set(mark->id), }; - struct mlx5_flow_verbs *verbs = &dev_flow->verbs; - if (*action_flags & MLX5_FLOW_ACTION_FLAG) { - flow_verbs_mark_update(verbs, mark->id); - size = 0; - } else { - tag.tag_id = mlx5_flow_mark_set(mark->id); - flow_verbs_spec_add(dev_flow, &tag, size); - } - *action_flags |= MLX5_FLOW_ACTION_MARK; + flow_verbs_spec_add(&dev_flow->verbs, &tag, size); } /** * Convert the @p action into a Verbs specification. This function assumes that * the input is valid and that there is space to insert the requested action - * into the flow. This function also return the action that was added. + * into the flow. * * @param[in] dev * Pointer to the Ethernet device structure. * @param[in] action * Action configuration. - * @param[in, out] action_flags - * Pointer to the detected actions. * @param[in] dev_flow * Pointer to mlx5_flow. * @param[out] error @@ -1051,10 +954,9 @@ flow_verbs_translate_action_mark(const struct rte_flow_action *action, * 0 On success else a negative errno value is returned and rte_errno is set. */ static int -flow_verbs_translate_action_count(struct rte_eth_dev *dev, +flow_verbs_translate_action_count(struct mlx5_flow *dev_flow, const struct rte_flow_action *action, - uint64_t *action_flags, - struct mlx5_flow *dev_flow, + struct rte_eth_dev *dev, struct rte_flow_error *error) { const struct rte_flow_action_count *count = action->conf; @@ -1078,13 +980,12 @@ flow_verbs_translate_action_count(struct rte_eth_dev *dev, "cannot get counter" " context."); } - *action_flags |= MLX5_FLOW_ACTION_COUNT; #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) counter.counter_set_handle = flow->counter->cs->handle; - flow_verbs_spec_add(dev_flow, &counter, size); + flow_verbs_spec_add(&dev_flow->verbs, &counter, size); #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) counter.counters = flow->counter->cs; - flow_verbs_spec_add(dev_flow, &counter, size); + flow_verbs_spec_add(&dev_flow->verbs, &counter, size); #endif return 0; } @@ -1116,7 +1017,6 @@ flow_verbs_validate(struct rte_eth_dev *dev, int ret; uint64_t action_flags = 0; uint64_t item_flags = 0; - int tunnel = 0; uint8_t next_protocol = 0xff; if (items == NULL) @@ -1125,9 +1025,9 @@ flow_verbs_validate(struct rte_eth_dev *dev, if (ret < 0) return ret; for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); int ret = 0; - tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); switch (items->type) { case RTE_FLOW_ITEM_TYPE_VOID: break; @@ -1144,8 +1044,10 @@ flow_verbs_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : - MLX5_FLOW_LAYER_OUTER_VLAN; + item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | + MLX5_FLOW_LAYER_INNER_VLAN) : + (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); break; case RTE_FLOW_ITEM_TYPE_IPV4: ret = mlx5_flow_validate_item_ipv4(items, item_flags, @@ -1307,23 +1209,18 @@ flow_verbs_validate(struct rte_eth_dev *dev, /** * Calculate the required bytes that are needed for the action part of the verbs - * flow, in addtion returns bit-fields with all the detected action, in order to - * avoid another interation over the actions. + * flow. * * @param[in] actions * Pointer to the list of actions. - * @param[out] action_flags - * Pointer to the detected actions. * * @return * The size of the memory needed for all actions. */ static int -flow_verbs_get_actions_and_size(const struct rte_flow_action actions[], - uint64_t *action_flags) +flow_verbs_get_actions_size(const struct rte_flow_action actions[]) { int size = 0; - uint64_t detected_actions = 0; for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { switch (actions->type) { @@ -1331,125 +1228,89 @@ flow_verbs_get_actions_and_size(const struct rte_flow_action actions[], break; case RTE_FLOW_ACTION_TYPE_FLAG: size += sizeof(struct ibv_flow_spec_action_tag); - detected_actions |= MLX5_FLOW_ACTION_FLAG; break; case RTE_FLOW_ACTION_TYPE_MARK: size += sizeof(struct ibv_flow_spec_action_tag); - detected_actions |= MLX5_FLOW_ACTION_MARK; break; case RTE_FLOW_ACTION_TYPE_DROP: size += sizeof(struct ibv_flow_spec_action_drop); - detected_actions |= MLX5_FLOW_ACTION_DROP; break; case RTE_FLOW_ACTION_TYPE_QUEUE: - detected_actions |= MLX5_FLOW_ACTION_QUEUE; break; case RTE_FLOW_ACTION_TYPE_RSS: - detected_actions |= MLX5_FLOW_ACTION_RSS; break; case RTE_FLOW_ACTION_TYPE_COUNT: #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \ defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) size += sizeof(struct ibv_flow_spec_counter_action); #endif - detected_actions |= MLX5_FLOW_ACTION_COUNT; break; default: break; } } - *action_flags = detected_actions; return size; } /** * Calculate the required bytes that are needed for the item part of the verbs - * flow, in addtion returns bit-fields with all the detected action, in order to - * avoid another interation over the actions. + * flow. * - * @param[in] actions + * @param[in] items * Pointer to the list of items. - * @param[in, out] item_flags - * Pointer to the detected items. * * @return * The size of the memory needed for all items. */ static int -flow_verbs_get_items_and_size(const struct rte_flow_item items[], - uint64_t *item_flags) +flow_verbs_get_items_size(const struct rte_flow_item items[]) { int size = 0; - uint64_t detected_items = 0; for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { - int tunnel = !!(detected_items & MLX5_FLOW_LAYER_TUNNEL); - switch (items->type) { case RTE_FLOW_ITEM_TYPE_VOID: break; case RTE_FLOW_ITEM_TYPE_ETH: size += sizeof(struct ibv_flow_spec_eth); - detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: size += sizeof(struct ibv_flow_spec_eth); - detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : - MLX5_FLOW_LAYER_OUTER_VLAN; break; case RTE_FLOW_ITEM_TYPE_IPV4: size += sizeof(struct ibv_flow_spec_ipv4_ext); - detected_items |= tunnel ? - MLX5_FLOW_LAYER_INNER_L3_IPV4 : - MLX5_FLOW_LAYER_OUTER_L3_IPV4; break; case RTE_FLOW_ITEM_TYPE_IPV6: size += sizeof(struct ibv_flow_spec_ipv6); - detected_items |= tunnel ? - MLX5_FLOW_LAYER_INNER_L3_IPV6 : - MLX5_FLOW_LAYER_OUTER_L3_IPV6; break; case RTE_FLOW_ITEM_TYPE_UDP: size += sizeof(struct ibv_flow_spec_tcp_udp); - detected_items |= tunnel ? - MLX5_FLOW_LAYER_INNER_L4_UDP : - MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_TCP: size += sizeof(struct ibv_flow_spec_tcp_udp); - detected_items |= tunnel ? - MLX5_FLOW_LAYER_INNER_L4_TCP : - MLX5_FLOW_LAYER_OUTER_L4_TCP; break; case RTE_FLOW_ITEM_TYPE_VXLAN: size += sizeof(struct ibv_flow_spec_tunnel); - detected_items |= MLX5_FLOW_LAYER_VXLAN; break; case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: size += sizeof(struct ibv_flow_spec_tunnel); - detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE; break; #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT case RTE_FLOW_ITEM_TYPE_GRE: size += sizeof(struct ibv_flow_spec_gre); - detected_items |= MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_MPLS: size += sizeof(struct ibv_flow_spec_mpls); - detected_items |= MLX5_FLOW_LAYER_MPLS; break; #else case RTE_FLOW_ITEM_TYPE_GRE: size += sizeof(struct ibv_flow_spec_tunnel); - detected_items |= MLX5_FLOW_LAYER_TUNNEL; break; #endif default: break; } } - *item_flags = detected_items; return size; } @@ -1464,10 +1325,6 @@ flow_verbs_get_items_and_size(const struct rte_flow_item items[], * Pointer to the list of items. * @param[in] actions * Pointer to the list of actions. - * @param[out] item_flags - * Pointer to bit mask of all items detected. - * @param[out] action_flags - * Pointer to bit mask of all actions detected. * @param[out] error * Pointer to the error structure. * @@ -1479,15 +1336,13 @@ static struct mlx5_flow * flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused, const struct rte_flow_item items[], const struct rte_flow_action actions[], - uint64_t *item_flags, - uint64_t *action_flags, struct rte_flow_error *error) { uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr); struct mlx5_flow *flow; - size += flow_verbs_get_actions_and_size(actions, action_flags); - size += flow_verbs_get_items_and_size(items, item_flags); + size += flow_verbs_get_actions_size(actions); + size += flow_verbs_get_items_size(items); flow = rte_calloc(__func__, 1, size, 0); if (!flow) { rte_flow_error_set(error, ENOMEM, @@ -1528,50 +1383,48 @@ flow_verbs_translate(struct rte_eth_dev *dev, const struct rte_flow_action actions[], struct rte_flow_error *error) { - uint64_t action_flags = 0; + struct rte_flow *flow = dev_flow->flow; uint64_t item_flags = 0; + uint64_t action_flags = 0; uint64_t priority = attr->priority; + uint32_t subpriority = 0; struct priv *priv = dev->data->dev_private; if (priority == MLX5_FLOW_PRIO_RSVD) priority = priv->config.flow_prio - 1; for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { int ret; + switch (actions->type) { case RTE_FLOW_ACTION_TYPE_VOID: break; case RTE_FLOW_ACTION_TYPE_FLAG: - flow_verbs_translate_action_flag(actions, - &action_flags, - dev_flow); + flow_verbs_translate_action_flag(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_FLAG; break; case RTE_FLOW_ACTION_TYPE_MARK: - flow_verbs_translate_action_mark(actions, - &action_flags, - dev_flow); + flow_verbs_translate_action_mark(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_MARK; break; case RTE_FLOW_ACTION_TYPE_DROP: - flow_verbs_translate_action_drop(&action_flags, - dev_flow); + flow_verbs_translate_action_drop(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_DROP; break; case RTE_FLOW_ACTION_TYPE_QUEUE: - flow_verbs_translate_action_queue(actions, - &action_flags, - dev_flow); + flow_verbs_translate_action_queue(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_QUEUE; break; case RTE_FLOW_ACTION_TYPE_RSS: - flow_verbs_translate_action_rss(actions, - &action_flags, - dev_flow); + flow_verbs_translate_action_rss(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_RSS; break; case RTE_FLOW_ACTION_TYPE_COUNT: - ret = flow_verbs_translate_action_count(dev, + ret = flow_verbs_translate_action_count(dev_flow, actions, - &action_flags, - dev_flow, - error); + dev, error); if (ret < 0) return ret; + action_flags |= MLX5_FLOW_ACTION_COUNT; break; default: return rte_flow_error_set(error, ENOTSUP, @@ -1580,51 +1433,100 @@ flow_verbs_translate(struct rte_eth_dev *dev, "action not supported"); } } - /* Device flow should have action flags by flow_drv_prepare(). */ - assert(dev_flow->flow->actions == action_flags); + flow->actions = action_flags; for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + switch (items->type) { case RTE_FLOW_ITEM_TYPE_VOID: break; case RTE_FLOW_ITEM_TYPE_ETH: - flow_verbs_translate_item_eth(items, &item_flags, - dev_flow); + flow_verbs_translate_item_eth(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L2; + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: - flow_verbs_translate_item_vlan(items, &item_flags, - dev_flow); + flow_verbs_translate_item_vlan(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L2; + item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | + MLX5_FLOW_LAYER_INNER_VLAN) : + (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); break; case RTE_FLOW_ITEM_TYPE_IPV4: - flow_verbs_translate_item_ipv4(items, &item_flags, - dev_flow); + flow_verbs_translate_item_ipv4(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L3; + dev_flow->verbs.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, + MLX5_IPV4_LAYER_TYPES, + MLX5_IPV4_IBV_RX_HASH); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; break; case RTE_FLOW_ITEM_TYPE_IPV6: - flow_verbs_translate_item_ipv6(items, &item_flags, - dev_flow); - break; - case RTE_FLOW_ITEM_TYPE_UDP: - flow_verbs_translate_item_udp(items, &item_flags, - dev_flow); + flow_verbs_translate_item_ipv6(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L3; + dev_flow->verbs.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, + MLX5_IPV6_LAYER_TYPES, + MLX5_IPV6_IBV_RX_HASH); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; break; case RTE_FLOW_ITEM_TYPE_TCP: - flow_verbs_translate_item_tcp(items, &item_flags, - dev_flow); + flow_verbs_translate_item_tcp(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L4; + dev_flow->verbs.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, ETH_RSS_TCP, + (IBV_RX_HASH_SRC_PORT_TCP | + IBV_RX_HASH_DST_PORT_TCP)); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + flow_verbs_translate_item_udp(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L4; + dev_flow->verbs.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, ETH_RSS_UDP, + (IBV_RX_HASH_SRC_PORT_UDP | + IBV_RX_HASH_DST_PORT_UDP)); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_VXLAN: - flow_verbs_translate_item_vxlan(items, &item_flags, - dev_flow); + flow_verbs_translate_item_vxlan(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L2; + item_flags |= MLX5_FLOW_LAYER_VXLAN; break; case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: - flow_verbs_translate_item_vxlan_gpe(items, &item_flags, - dev_flow); + flow_verbs_translate_item_vxlan_gpe(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L2; + item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; break; case RTE_FLOW_ITEM_TYPE_GRE: - flow_verbs_translate_item_gre(items, &item_flags, - dev_flow); + flow_verbs_translate_item_gre(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L2; + item_flags |= MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_MPLS: - flow_verbs_translate_item_mpls(items, &item_flags, - dev_flow); + flow_verbs_translate_item_mpls(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L2; + item_flags |= MLX5_FLOW_LAYER_MPLS; break; default: return rte_flow_error_set(error, ENOTSUP, @@ -1633,9 +1535,9 @@ flow_verbs_translate(struct rte_eth_dev *dev, "item not supported"); } } + dev_flow->layers = item_flags; dev_flow->verbs.attr->priority = - mlx5_flow_adjust_priority(dev, priority, - dev_flow->verbs.attr->priority); + mlx5_flow_adjust_priority(dev, priority, subpriority); return 0; } @@ -1669,10 +1571,6 @@ flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow) verbs->hrxq = NULL; } } - if (flow->counter) { - flow_verbs_counter_release(flow->counter); - flow->counter = NULL; - } } /** @@ -1696,6 +1594,10 @@ flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) LIST_REMOVE(dev_flow, next); rte_free(dev_flow); } + if (flow->counter) { + flow_verbs_counter_release(flow->counter); + flow->counter = NULL; + } } /** diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c index 1afb114f..dd10ad6d 100644 --- a/drivers/net/mlx5/mlx5_glue.c +++ b/drivers/net/mlx5/mlx5_glue.c @@ -174,6 +174,17 @@ mlx5_glue_destroy_flow(struct ibv_flow *flow_id) return ibv_destroy_flow(flow_id); } +static int +mlx5_glue_destroy_flow_action(struct ibv_flow_action *action) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + return ibv_destroy_flow_action(action); +#else + (void)action; + return ENOTSUP; +#endif +} + static struct ibv_qp * mlx5_glue_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) { @@ -444,6 +455,30 @@ mlx5_glue_dv_destroy_flow_matcher(struct mlx5dv_flow_matcher *matcher) #endif } +static struct ibv_flow_action * +mlx5_glue_dv_create_flow_action_packet_reformat + (struct ibv_context *ctx, + size_t data_sz, + void *data, + enum mlx5dv_flow_action_packet_reformat_type reformat_type, + enum mlx5dv_flow_table_type ft_type) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + return mlx5dv_create_flow_action_packet_reformat(ctx, + data_sz, + data, + reformat_type, + ft_type); +#else + (void)ctx; + (void)data_sz; + (void)data; + (void)reformat_type; + (void)ft_type; + return NULL; +#endif +} + alignas(RTE_CACHE_LINE_SIZE) const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ .version = MLX5_GLUE_VERSION, @@ -470,6 +505,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ .modify_wq = mlx5_glue_modify_wq, .create_flow = mlx5_glue_create_flow, .destroy_flow = mlx5_glue_destroy_flow, + .destroy_flow_action = mlx5_glue_destroy_flow_action, .create_qp = mlx5_glue_create_qp, .create_qp_ex = mlx5_glue_create_qp_ex, .destroy_qp = mlx5_glue_destroy_qp, @@ -497,4 +533,6 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ .dv_create_flow_matcher = mlx5_glue_dv_create_flow_matcher, .dv_destroy_flow_matcher = mlx5_glue_dv_destroy_flow_matcher, .dv_create_flow = mlx5_glue_dv_create_flow, + .dv_create_flow_action_packet_reformat = + mlx5_glue_dv_create_flow_action_packet_reformat, }; diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h index 44bfefed..2d92ba8b 100644 --- a/drivers/net/mlx5/mlx5_glue.h +++ b/drivers/net/mlx5/mlx5_glue.h @@ -50,6 +50,9 @@ struct mlx5dv_flow_matcher; struct mlx5dv_flow_matcher_attr; struct mlx5dv_flow_action_attr; struct mlx5dv_flow_match_parameters; +struct ibv_flow_action; +enum mlx5dv_flow_action_packet_reformat_type { packet_reformat_type = 0, }; +enum mlx5dv_flow_table_type { flow_table_type = 0, }; #endif /* LIB_GLUE_VERSION must be updated every time this structure is modified. */ @@ -91,6 +94,7 @@ struct mlx5_glue { struct ibv_flow *(*create_flow)(struct ibv_qp *qp, struct ibv_flow_attr *flow); int (*destroy_flow)(struct ibv_flow *flow_id); + int (*destroy_flow_action)(struct ibv_flow_action *action); struct ibv_qp *(*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr); struct ibv_qp *(*create_qp_ex) @@ -154,6 +158,12 @@ struct mlx5_glue { struct mlx5dv_flow_match_parameters *match_value, size_t num_actions, struct mlx5dv_flow_action_attr *actions_attr); + struct ibv_flow_action *(*dv_create_flow_action_packet_reformat) + (struct ibv_context *ctx, + size_t data_sz, + void *data, + enum mlx5dv_flow_action_packet_reformat_type reformat_type, + enum mlx5dv_flow_table_type ft_type); }; const struct mlx5_glue *mlx5_glue; diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index ed993ea6..eef48502 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -841,6 +841,12 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) " timestamp", dev->data->port_id); } +#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD + if (config->cqe_pad) { + attr.cq.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; + attr.cq.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; + } +#endif tmpl->cq = mlx5_glue->cq_ex_to_cq (mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv, &attr.cq.mlx5)); @@ -1758,6 +1764,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev) * first queue index will be taken for the indirection table. * @param queues_n * Number of queues. + * @param tunnel + * Tunnel type. * * @return * The Verbs object initialised, NULL otherwise and rte_errno is set. @@ -1773,6 +1781,9 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq; struct mlx5_ind_table_ibv *ind_tbl; struct ibv_qp *qp; +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT + struct mlx5dv_qp_init_attr qp_init_attr = {0}; +#endif int err; queues_n = hash_fields ? queues_n : 1; @@ -1783,11 +1794,21 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, rte_errno = ENOMEM; return NULL; } - if (!rss_key_len) { - rss_key_len = MLX5_RSS_HASH_KEY_LEN; - rss_key = rss_hash_default_key; - } #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT + if (tunnel) { + qp_init_attr.comp_mask = + MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; + qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; + } +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + if (dev->data->dev_conf.lpbk_mode) { + /* Allow packet sent from NIC loop back w/o source MAC check. */ + qp_init_attr.comp_mask |= + MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; + qp_init_attr.create_flags |= + MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; + } +#endif qp = mlx5_glue->dv_create_qp (priv->ctx, &(struct ibv_qp_init_attr_ex){ @@ -1798,21 +1819,14 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, IBV_QP_INIT_ATTR_RX_HASH, .rx_hash_conf = (struct ibv_rx_hash_conf){ .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, - .rx_hash_key_len = rss_key_len ? rss_key_len : - MLX5_RSS_HASH_KEY_LEN, - .rx_hash_key = rss_key ? - (void *)(uintptr_t)rss_key : - rss_hash_default_key, + .rx_hash_key_len = rss_key_len, + .rx_hash_key = (void *)(uintptr_t)rss_key, .rx_hash_fields_mask = hash_fields, }, .rwq_ind_tbl = ind_tbl->ind_table, .pd = priv->pd, }, - &(struct mlx5dv_qp_init_attr){ - .comp_mask = tunnel ? - MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS : 0, - .create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS, - }); + &qp_init_attr); #else qp = mlx5_glue->create_qp_ex (priv->ctx, @@ -1824,11 +1838,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, IBV_QP_INIT_ATTR_RX_HASH, .rx_hash_conf = (struct ibv_rx_hash_conf){ .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, - .rx_hash_key_len = rss_key_len ? rss_key_len : - MLX5_RSS_HASH_KEY_LEN, - .rx_hash_key = rss_key ? - (void *)(uintptr_t)rss_key : - rss_hash_default_key, + .rx_hash_key_len = rss_key_len, + .rx_hash_key = (void *)(uintptr_t)rss_key, .rx_hash_fields_mask = hash_fields, }, .rwq_ind_tbl = ind_tbl->ind_table, diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 24a054d5..6eceea5f 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -417,20 +417,17 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) } /** - * DPDK callback to check the status of a rx descriptor. + * Internal function to compute the number of used descriptors in an RX queue * - * @param rx_queue - * The rx queue. - * @param[in] offset - * The index of the descriptor in the ring. + * @param rxq + * The Rx queue. * * @return - * The status of the tx descriptor. + * The number of used rx descriptor. */ -int -mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) +static uint32_t +rx_queue_count(struct mlx5_rxq_data *rxq) { - struct mlx5_rxq_data *rxq = rx_queue; struct rxq_zip *zip = &rxq->zip; volatile struct mlx5_cqe *cqe; const unsigned int cqe_n = (1 << rxq->cqe_n); @@ -461,12 +458,73 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; } used = RTE_MIN(used, (1U << rxq->elts_n) - 1); - if (offset < used) + return used; +} + +/** + * DPDK callback to check the status of a rx descriptor. + * + * @param rx_queue + * The Rx queue. + * @param[in] offset + * The index of the descriptor in the ring. + * + * @return + * The status of the tx descriptor. + */ +int +mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) +{ + struct mlx5_rxq_data *rxq = rx_queue; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); + + if (dev->rx_pkt_burst != mlx5_rx_burst) { + rte_errno = ENOTSUP; + return -rte_errno; + } + if (offset >= (1 << rxq->elts_n)) { + rte_errno = EINVAL; + return -rte_errno; + } + if (offset < rx_queue_count(rxq)) return RTE_ETH_RX_DESC_DONE; return RTE_ETH_RX_DESC_AVAIL; } /** + * DPDK callback to get the number of used descriptors in a RX queue + * + * @param dev + * Pointer to the device structure. + * + * @param rx_queue_id + * The Rx queue. + * + * @return + * The number of used rx descriptor. + * -EINVAL if the queue is invalid + */ +uint32_t +mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct priv *priv = dev->data->dev_private; + struct mlx5_rxq_data *rxq; + + if (dev->rx_pkt_burst != mlx5_rx_burst) { + rte_errno = ENOTSUP; + return -rte_errno; + } + rxq = (*priv->rxqs)[rx_queue_id]; + if (!rxq) { + rte_errno = EINVAL; + return -rte_errno; + } + return rx_queue_count(rxq); +} + +/** * DPDK callback for TX. * * @param dpdk_txq diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 1db468c3..1b6200f6 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -345,6 +345,7 @@ uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset); int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); +uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id); /* Vectorized version of mlx5_rxtx.c */ int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev); diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index 1453f4ff..340292ad 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -277,7 +277,7 @@ mlx5_check_vec_tx_support(struct rte_eth_dev *dev) uint64_t offloads = dev->data->dev_conf.txmode.offloads; if (!priv->config.tx_vec_en || - priv->txqs_n > MLX5_VPMD_MIN_TXQS || + priv->txqs_n > (unsigned int)priv->config.txqs_vec || priv->config.mps != MLX5_MPW_ENHANCED || offloads & ~MLX5_VEC_TX_OFFLOAD_CAP) return -ENOTSUP; diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c index aa38ee7a..b330bf3d 100644 --- a/drivers/net/netvsc/hn_ethdev.c +++ b/drivers/net/netvsc/hn_ethdev.c @@ -879,9 +879,7 @@ static struct rte_vmbus_driver rte_netvsc_pmd = { RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd); RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic"); -RTE_INIT(hn_init_log); -static void -hn_init_log(void) +RTE_INIT(hn_init_log) { hn_logtype_init = rte_log_register("pmd.net.netvsc.init"); if (hn_logtype_init >= 0) diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c index 7a84ad8c..3f714ec9 100644 --- a/drivers/net/netvsc/hn_vf.c +++ b/drivers/net/netvsc/hn_vf.c @@ -223,7 +223,7 @@ int hn_vf_link_update(struct rte_eth_dev *dev, rte_spinlock_lock(&hv->vf_lock); vf_dev = hv->vf_dev; if (vf_dev && vf_dev->dev_ops->link_update) - ret = (*vf_dev->dev_ops->link_update)(dev, wait_to_complete); + ret = (*vf_dev->dev_ops->link_update)(vf_dev, wait_to_complete); rte_spinlock_unlock(&hv->vf_lock); return ret; diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index bab1f68e..54c6da92 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -2703,7 +2703,7 @@ nfp_net_init(struct rte_eth_dev *eth_dev) pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); /* NFP can not handle DMA addresses requiring more than 40 bits */ - if (rte_eal_check_dma_mask(40)) { + if (rte_mem_check_dma_mask(40)) { RTE_LOG(ERR, PMD, "device %s can not be used:", pci_dev->device.name); RTE_LOG(ERR, PMD, "\trestricted dma mask to 40 bits!\n"); diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c index cf454b19..d7e1d7b3 100644 --- a/drivers/net/qede/base/ecore_dev.c +++ b/drivers/net/qede/base/ecore_dev.c @@ -3429,6 +3429,14 @@ ecore_hw_init_pf(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, if (rc != ECORE_SUCCESS) return rc; + /* Use the leading hwfn since in CMT only NIG #0 is operational */ + if (IS_LEAD_HWFN(p_hwfn)) { + rc = ecore_llh_hw_init_pf(p_hwfn, p_ptt, + p_params->avoid_eng_affin); + if (rc) + return rc; + } + if (p_params->b_hw_start) { /* enable interrupts */ rc = ecore_int_igu_enable(p_hwfn, p_ptt, p_params->int_mode); diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c index df83666f..ec6190b1 100644 --- a/drivers/net/qede/qede_main.c +++ b/drivers/net/qede/qede_main.c @@ -288,6 +288,7 @@ static int qed_slowpath_start(struct ecore_dev *edev, drv_load_params.mfw_timeout_val = ECORE_LOAD_REQ_LOCK_TO_DEFAULT; drv_load_params.avoid_eng_reset = false; drv_load_params.override_force_load = ECORE_OVERRIDE_FORCE_LOAD_ALWAYS; + hw_init_params.avoid_eng_affin = false; hw_init_params.p_drv_load_params = &drv_load_params; rc = ecore_hw_init(edev, &hw_init_params); diff --git a/drivers/net/softnic/rte_eth_softnic_cli.c b/drivers/net/softnic/rte_eth_softnic_cli.c index c6640d65..57b62337 100644 --- a/drivers/net/softnic/rte_eth_softnic_cli.c +++ b/drivers/net/softnic/rte_eth_softnic_cli.c @@ -1867,7 +1867,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic, p.type = PORT_IN_RXQ; - strcpy(p.dev_name, tokens[t0 + 1]); + strlcpy(p.dev_name, tokens[t0 + 1], sizeof(p.dev_name)); if (strcmp(tokens[t0 + 2], "rxq") != 0) { snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rxq"); @@ -1890,7 +1890,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic, p.type = PORT_IN_SWQ; - strcpy(p.dev_name, tokens[t0 + 1]); + strlcpy(p.dev_name, tokens[t0 + 1], sizeof(p.dev_name)); t0 += 2; } else if (strcmp(tokens[t0], "tmgr") == 0) { @@ -1902,7 +1902,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic, p.type = PORT_IN_TMGR; - strcpy(p.dev_name, tokens[t0 + 1]); + strlcpy(p.dev_name, tokens[t0 + 1], sizeof(p.dev_name)); t0 += 2; } else if (strcmp(tokens[t0], "tap") == 0) { @@ -1914,7 +1914,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic, p.type = PORT_IN_TAP; - strcpy(p.dev_name, tokens[t0 + 1]); + strlcpy(p.dev_name, tokens[t0 + 1], sizeof(p.dev_name)); if (strcmp(tokens[t0 + 2], "mempool") != 0) { snprintf(out, out_size, MSG_ARG_NOT_FOUND, @@ -2009,7 +2009,8 @@ cmd_pipeline_port_in(struct pmd_internals *softnic, return; } - strcpy(p.action_profile_name, tokens[t0 + 1]); + strlcpy(p.action_profile_name, tokens[t0 + 1], + sizeof(p.action_profile_name)); t0 += 2; } @@ -2096,7 +2097,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic, p.type = PORT_OUT_TXQ; - strcpy(p.dev_name, tokens[7]); + strlcpy(p.dev_name, tokens[7], sizeof(p.dev_name)); if (strcmp(tokens[8], "txq") != 0) { snprintf(out, out_size, MSG_ARG_NOT_FOUND, "txq"); @@ -2117,7 +2118,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic, p.type = PORT_OUT_SWQ; - strcpy(p.dev_name, tokens[7]); + strlcpy(p.dev_name, tokens[7], sizeof(p.dev_name)); } else if (strcmp(tokens[6], "tmgr") == 0) { if (n_tokens != 8) { snprintf(out, out_size, MSG_ARG_MISMATCH, @@ -2127,7 +2128,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic, p.type = PORT_OUT_TMGR; - strcpy(p.dev_name, tokens[7]); + strlcpy(p.dev_name, tokens[7], sizeof(p.dev_name)); } else if (strcmp(tokens[6], "tap") == 0) { if (n_tokens != 8) { snprintf(out, out_size, MSG_ARG_MISMATCH, @@ -2137,7 +2138,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic, p.type = PORT_OUT_TAP; - strcpy(p.dev_name, tokens[7]); + strlcpy(p.dev_name, tokens[7], sizeof(p.dev_name)); } else if (strcmp(tokens[6], "sink") == 0) { if ((n_tokens != 7) && (n_tokens != 11)) { snprintf(out, out_size, MSG_ARG_MISMATCH, @@ -2485,7 +2486,8 @@ cmd_pipeline_table(struct pmd_internals *softnic, return; } - strcpy(p.action_profile_name, tokens[t0 + 1]); + strlcpy(p.action_profile_name, tokens[t0 + 1], + sizeof(p.action_profile_name)); t0 += 2; } diff --git a/drivers/net/softnic/rte_eth_softnic_flow.c b/drivers/net/softnic/rte_eth_softnic_flow.c index 285af462..21e75300 100644 --- a/drivers/net/softnic/rte_eth_softnic_flow.c +++ b/drivers/net/softnic/rte_eth_softnic_flow.c @@ -56,7 +56,7 @@ flow_attr_map_set(struct pmd_internals *softnic, map = (ingress) ? &softnic->flow.ingress_map[group_id] : &softnic->flow.egress_map[group_id]; - strcpy(map->pipeline_name, pipeline_name); + strlcpy(map->pipeline_name, pipeline_name, sizeof(map->pipeline_name)); map->table_id = table_id; map->valid = 1; @@ -1624,11 +1624,11 @@ flow_rule_action_get(struct pmd_internals *softnic, /* RTE_TABLE_ACTION_METER */ rule_action->mtr.mtr[0].meter_profile_id = meter_profile_id; rule_action->mtr.mtr[0].policer[e_RTE_METER_GREEN] = - (enum rte_table_action_policer)m->params.action[RTE_MTR_GREEN]; + softnic_table_action_policer(m->params.action[RTE_MTR_GREEN]); rule_action->mtr.mtr[0].policer[e_RTE_METER_YELLOW] = - (enum rte_table_action_policer)m->params.action[RTE_MTR_YELLOW]; + softnic_table_action_policer(m->params.action[RTE_MTR_YELLOW]); rule_action->mtr.mtr[0].policer[e_RTE_METER_RED] = - (enum rte_table_action_policer)m->params.action[RTE_MTR_RED]; + softnic_table_action_policer(m->params.action[RTE_MTR_RED]); rule_action->mtr.tc_mask = 1; rule_action->action_mask |= 1 << RTE_TABLE_ACTION_MTR; break; diff --git a/drivers/net/softnic/rte_eth_softnic_internals.h b/drivers/net/softnic/rte_eth_softnic_internals.h index e12b8ae4..31698b9f 100644 --- a/drivers/net/softnic/rte_eth_softnic_internals.h +++ b/drivers/net/softnic/rte_eth_softnic_internals.h @@ -828,6 +828,9 @@ softnic_table_action_profile_create(struct pmd_internals *p, const char *name, struct softnic_table_action_profile_params *params); +enum rte_table_action_policer +softnic_table_action_policer(enum rte_mtr_policer_action action); + /** * Pipeline */ diff --git a/drivers/net/softnic/rte_eth_softnic_meter.c b/drivers/net/softnic/rte_eth_softnic_meter.c index 73ecf3b1..7b747ba5 100644 --- a/drivers/net/softnic/rte_eth_softnic_meter.c +++ b/drivers/net/softnic/rte_eth_softnic_meter.c @@ -65,6 +65,27 @@ softnic_mtr_meter_profile_find(struct pmd_internals *p, return NULL; } +enum rte_table_action_policer +softnic_table_action_policer(enum rte_mtr_policer_action action) +{ + switch (action) { + case MTR_POLICER_ACTION_COLOR_GREEN: + return RTE_TABLE_ACTION_POLICER_COLOR_GREEN; + + /* FALLTHROUGH */ + case MTR_POLICER_ACTION_COLOR_YELLOW: + return RTE_TABLE_ACTION_POLICER_COLOR_YELLOW; + + /* FALLTHROUGH */ + case MTR_POLICER_ACTION_COLOR_RED: + return RTE_TABLE_ACTION_POLICER_COLOR_RED; + + /* FALLTHROUGH */ + default: + return RTE_TABLE_ACTION_POLICER_DROP; + } +} + static int meter_profile_check(struct rte_eth_dev *dev, uint32_t meter_profile_id, @@ -542,7 +563,7 @@ pmd_mtr_policer_actions_update(struct rte_eth_dev *dev, for (i = 0; i < RTE_MTR_COLORS; i++) if (action_mask & (1 << i)) action.mtr.mtr[0].policer[i] = - (enum rte_table_action_policer)actions[i]; + softnic_table_action_policer(actions[i]); /* Re-add the rule */ status = softnic_pipeline_table_rule_add(p, diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index b38a4b6b..42bdfcbd 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -1467,7 +1467,11 @@ RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); RTE_PMD_REGISTER_PARAM_STRING(net_vhost, "iface=<ifc> " - "queues=<int>"); + "queues=<int> " + "client=<0|1> " + "dequeue-zero-copy=<0|1> " + "iommu-support=<0|1> " + "postcopy-support=<0|1>"); RTE_INIT(vhost_init_log) { diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 10a7e3fc..e1fe36a2 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -588,6 +588,10 @@ virtio_dev_close(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "virtio_dev_close"); + if (!hw->opened) + return; + hw->opened = false; + /* reset the NIC */ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR); @@ -1288,6 +1292,7 @@ virtio_interrupt_handler(void *param) struct rte_eth_dev *dev = param; struct virtio_hw *hw = dev->data->dev_private; uint8_t isr; + uint16_t status; /* Read interrupt status which clears interrupt */ isr = vtpci_isr(hw); @@ -1301,12 +1306,17 @@ virtio_interrupt_handler(void *param) _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); - } - if (isr & VIRTIO_NET_S_ANNOUNCE) { - virtio_notify_peers(dev); - if (hw->cvq) - virtio_ack_link_announce(dev); + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &status, sizeof(status)); + if (status & VIRTIO_NET_S_ANNOUNCE) { + virtio_notify_peers(dev); + if (hw->cvq) + virtio_ack_link_announce(dev); + } + } } } @@ -1679,11 +1689,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) if (ret < 0) goto out; - /* Setup interrupt callback */ - if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_register(eth_dev->intr_handle, - virtio_interrupt_handler, eth_dev); - return 0; out: @@ -1706,11 +1711,6 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->tx_pkt_burst = NULL; eth_dev->rx_pkt_burst = NULL; - /* reset interrupt callback */ - if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_unregister(eth_dev->intr_handle, - virtio_interrupt_handler, - eth_dev); if (eth_dev->device) rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev)); @@ -1928,6 +1928,8 @@ virtio_dev_configure(struct rte_eth_dev *dev) DEV_RX_OFFLOAD_VLAN_STRIP)) hw->use_simple_rx = 0; + hw->opened = true; + return 0; } @@ -1969,6 +1971,12 @@ virtio_dev_start(struct rte_eth_dev *dev) dev->data->dev_conf.intr_conf.rxq) { virtio_intr_disable(dev); + /* Setup interrupt callback */ + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) + rte_intr_callback_register(dev->intr_handle, + virtio_interrupt_handler, + dev); + if (virtio_intr_enable(dev) < 0) { PMD_DRV_LOG(ERR, "interrupt enable failed"); return -EIO; @@ -2012,7 +2020,7 @@ virtio_dev_start(struct rte_eth_dev *dev) } set_rxtx_funcs(dev); - hw->started = 1; + hw->started = true; /* Initialize Link state */ virtio_dev_link_update(dev, 0); @@ -2078,12 +2086,24 @@ virtio_dev_stop(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "stop"); rte_spinlock_lock(&hw->state_lock); - if (intr_conf->lsc || intr_conf->rxq) + if (!hw->started) + goto out_unlock; + hw->started = false; + + if (intr_conf->lsc || intr_conf->rxq) { virtio_intr_disable(dev); - hw->started = 0; + /* Reset interrupt callback */ + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { + rte_intr_callback_unregister(dev->intr_handle, + virtio_interrupt_handler, + dev); + } + } + memset(&link, 0, sizeof(link)); rte_eth_linkstatus_set(dev, &link); +out_unlock: rte_spinlock_unlock(&hw->state_lock); } @@ -2099,7 +2119,7 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet link.link_speed = ETH_SPEED_NUM_10G; link.link_autoneg = ETH_LINK_FIXED; - if (hw->started == 0) { + if (!hw->started) { link.link_status = ETH_LINK_DOWN; } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { PMD_INIT_LOG(DEBUG, "Get link status from hw"); diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index b6a3c80b..21110cd6 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -166,12 +166,6 @@ legacy_set_status(struct virtio_hw *hw, uint8_t status) rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS); } -static void -legacy_reset(struct virtio_hw *hw) -{ - legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); -} - static uint8_t legacy_get_isr(struct virtio_hw *hw) { @@ -250,7 +244,6 @@ legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) const struct virtio_pci_ops legacy_ops = { .read_dev_cfg = legacy_read_dev_config, .write_dev_cfg = legacy_write_dev_config, - .reset = legacy_reset, .get_status = legacy_get_status, .set_status = legacy_set_status, .get_features = legacy_get_features, @@ -339,13 +332,6 @@ modern_set_status(struct virtio_hw *hw, uint8_t status) rte_write8(status, &hw->common_cfg->device_status); } -static void -modern_reset(struct virtio_hw *hw) -{ - modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); - modern_get_status(hw); -} - static uint8_t modern_get_isr(struct virtio_hw *hw) { @@ -438,7 +424,6 @@ modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) const struct virtio_pci_ops modern_ops = { .read_dev_cfg = modern_read_dev_config, .write_dev_cfg = modern_write_dev_config, - .reset = modern_reset, .get_status = modern_get_status, .set_status = modern_set_status, .get_features = modern_get_features, diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 58fdd3d4..e961a58c 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -204,7 +204,6 @@ struct virtio_pci_ops { void *dst, int len); void (*write_dev_cfg)(struct virtio_hw *hw, size_t offset, const void *src, int len); - void (*reset)(struct virtio_hw *hw); uint8_t (*get_status)(struct virtio_hw *hw); void (*set_status)(struct virtio_hw *hw, uint8_t status); @@ -232,7 +231,7 @@ struct virtio_hw { uint64_t req_guest_features; uint64_t guest_features; uint32_t max_queue_pairs; - uint16_t started; + bool started; uint16_t max_mtu; uint16_t vtnet_hdr_size; uint8_t vlan_strip; @@ -258,6 +257,7 @@ struct virtio_hw { */ rte_spinlock_t state_lock; struct rte_mbuf **inject_pkts; + bool opened; struct virtqueue **vqs; }; diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index b4997ee3..20816c93 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -134,9 +134,6 @@ virtio_user_start_device(struct virtio_user_dev *dev) if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0) goto error; - /* Do not check return as already done in init, or reset in stop */ - dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL); - /* Step 0: tell vhost to create queues */ if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0) goto error; @@ -181,21 +178,34 @@ error: int virtio_user_stop_device(struct virtio_user_dev *dev) { + struct vhost_vring_state state; uint32_t i; + int error = 0; pthread_mutex_lock(&dev->mutex); + if (!dev->started) + goto out; + for (i = 0; i < dev->max_queue_pairs; ++i) dev->ops->enable_qp(dev, i, 0); - if (dev->ops->send_request(dev, VHOST_USER_RESET_OWNER, NULL) < 0) { - PMD_DRV_LOG(INFO, "Failed to reset the device\n"); - pthread_mutex_unlock(&dev->mutex); - return -1; + /* Stop the backend. */ + for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + state.index = i; + if (dev->ops->send_request(dev, VHOST_USER_GET_VRING_BASE, + &state) < 0) { + PMD_DRV_LOG(ERR, "get_vring_base failed, index=%u\n", + i); + error = -1; + goto out; + } } + dev->started = false; +out: pthread_mutex_unlock(&dev->mutex); - return 0; + return error; } static inline void @@ -411,7 +421,8 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, dev->queue_pairs = 1; /* mq disabled by default */ dev->queue_size = queue_size; dev->mac_specified = 0; - dev->unsupported_features = 0; + dev->frontend_features = 0; + dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES; parse_mac(dev, mac); if (*ifname) { @@ -447,37 +458,25 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES; } - if (!mrg_rxbuf) { - dev->device_features &= ~(1ull << VIRTIO_NET_F_MRG_RXBUF); + if (!mrg_rxbuf) dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF); - } - if (!in_order) { - dev->device_features &= ~(1ull << VIRTIO_F_IN_ORDER); + if (!in_order) dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER); - } - if (dev->mac_specified) { - dev->device_features |= (1ull << VIRTIO_NET_F_MAC); - } else { - dev->device_features &= ~(1ull << VIRTIO_NET_F_MAC); + if (dev->mac_specified) + dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC); + else dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC); - } if (cq) { /* device does not really need to know anything about CQ, * so if necessary, we just claim to support CQ */ - dev->device_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); + dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); } else { - dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); - /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ - dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); - dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); - dev->device_features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); - dev->device_features &= ~(1ull << VIRTIO_NET_F_MQ); - dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */ dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX); dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN); dev->unsupported_features |= @@ -489,10 +488,14 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, /* The backend will not report this feature, we add it explicitly */ if (is_vhost_user_by_type(dev->path)) - dev->device_features |= (1ull << VIRTIO_NET_F_STATUS); + dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS); - dev->device_features &= VIRTIO_USER_SUPPORTED_FEATURES; - dev->unsupported_features |= ~VIRTIO_USER_SUPPORTED_FEATURES; + /* + * Device features = + * (frontend_features | backend_features) & ~unsupported_features; + */ + dev->device_features |= dev->frontend_features; + dev->device_features &= ~dev->unsupported_features; if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME, virtio_user_mem_event_cb, dev)) { diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index d6e0e137..c42ce5d4 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -33,6 +33,7 @@ struct virtio_user_dev { * and will be sync with device */ uint64_t device_features; /* supported features by device */ + uint64_t frontend_features; /* enabled frontend features */ uint64_t unsupported_features; /* unsupported features mask */ uint8_t status; uint16_t port_id; diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index b51cbc85..61b7c0a3 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -28,7 +28,6 @@ static int virtio_user_server_reconnect(struct virtio_user_dev *dev) { int ret; - int flag; int connectfd; struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id]; @@ -44,14 +43,13 @@ virtio_user_server_reconnect(struct virtio_user_dev *dev) return -1; } + dev->device_features |= dev->frontend_features; + /* umask vhost-user unsupported features */ dev->device_features &= ~(dev->unsupported_features); dev->features &= dev->device_features; - flag = fcntl(connectfd, F_GETFD); - fcntl(connectfd, F_SETFL, flag | O_NONBLOCK); - ret = virtio_user_start_device(dev); if (ret < 0) return -1; @@ -331,7 +329,6 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) const struct virtio_pci_ops virtio_user_ops = { .read_dev_cfg = virtio_user_read_dev_config, .write_dev_cfg = virtio_user_write_dev_config, - .reset = virtio_user_reset, .get_status = virtio_user_get_status, .set_status = virtio_user_set_status, .get_features = virtio_user_get_features, diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 41bcd450..84acd9db 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -360,8 +360,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - if (hw->adapter_stopped == 0) - vmxnet3_dev_close(eth_dev); + if (hw->adapter_stopped == 0) { + PMD_INIT_LOG(DEBUG, "Device has not been closed."); + return -EBUSY; + } eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; @@ -805,7 +807,7 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); if (hw->adapter_stopped == 1) { - PMD_INIT_LOG(DEBUG, "Device already closed."); + PMD_INIT_LOG(DEBUG, "Device already stopped."); return; } @@ -829,7 +831,6 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev) /* reset the device */ VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV); PMD_INIT_LOG(DEBUG, "Device reset."); - hw->adapter_stopped = 0; vmxnet3_dev_clear_queues(dev); @@ -839,6 +840,30 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev) link.link_speed = ETH_SPEED_NUM_10G; link.link_autoneg = ETH_LINK_FIXED; rte_eth_linkstatus_set(dev, &link); + + hw->adapter_stopped = 1; +} + +static void +vmxnet3_free_queues(struct rte_eth_dev *dev) +{ + int i; + + PMD_INIT_FUNC_TRACE(); + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + void *rxq = dev->data->rx_queues[i]; + + vmxnet3_dev_rx_queue_release(rxq); + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + void *txq = dev->data->tx_queues[i]; + + vmxnet3_dev_tx_queue_release(txq); + } + dev->data->nb_tx_queues = 0; } /* @@ -847,12 +872,16 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev) static void vmxnet3_dev_close(struct rte_eth_dev *dev) { - struct vmxnet3_hw *hw = dev->data->dev_private; - PMD_INIT_FUNC_TRACE(); vmxnet3_dev_stop(dev); - hw->adapter_stopped = 1; + vmxnet3_free_queues(dev); + + /* + * flag to rte_eth_dev_close() that it should release the port resources + * (calling rte_eth_dev_release_port()) in addition to closing it. + */ + dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; } static void diff --git a/examples/Makefile b/examples/Makefile index 356fcb1c..33fe0e58 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -17,6 +17,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += distributor DIRS-y += ethtool DIRS-y += exception_path DIRS-$(CONFIG_RTE_LIBRTE_EFD) += server_node_efd +DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += fips_validation DIRS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += flow_classify DIRS-y += flow_filtering DIRS-y += helloworld diff --git a/examples/fips_validation/Makefile b/examples/fips_validation/Makefile new file mode 100644 index 00000000..7b1fe34a --- /dev/null +++ b/examples/fips_validation/Makefile @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +# binary name +APP = fips_validation + +# all source are stored in SRCS-y +SRCS-y := fips_validation.c +SRCS-y += fips_validation_aes.c +SRCS-y += fips_validation_hmac.c +SRCS-y += fips_validation_tdes.c +SRCS-y += fips_validation_gcm.c +SRCS-y += fips_validation_cmac.c +SRCS-y += fips_validation_ccm.c +SRCS-y += main.c + +# Build using pkg-config variables if possible +$(shell pkg-config --exists libdpdk) +ifeq ($(.SHELLSTATUS),0) + +all: shared +.PHONY: shared static +shared: build/$(APP)-shared + ln -sf $(APP)-shared build/$(APP) +static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +PC_FILE := $(shell pkg-config --path libdpdk) +CFLAGS += -O3 $(shell pkg-config --cflags libdpdk) +LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk) +LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk) + +build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) + +build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) + +build: + @mkdir -p $@ + +.PHONY: clean +clean: + rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared + rmdir --ignore-fail-on-non-empty build + +else + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +INC += $(sort $(wildcard *.h)) + +include $(RTE_SDK)/mk/rte.vars.mk + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +CFLAGS += -DALLOW_EXPERIMENTAL_API +CFLAGS += -I$(SRCDIR) +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif diff --git a/examples/fips_validation/fips_validation.c b/examples/fips_validation/fips_validation.c new file mode 100644 index 00000000..a835cc3f --- /dev/null +++ b/examples/fips_validation/fips_validation.c @@ -0,0 +1,595 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <stdio.h> +#include <string.h> + +#include <rte_string_fns.h> +#include <rte_cryptodev.h> +#include <rte_malloc.h> + +#include "fips_validation.h" + +#define skip_white_spaces(pos) \ +({ \ + __typeof__(pos) _p = (pos); \ + for ( ; isspace(*_p); _p++) \ + ; \ + _p; \ +}) + +static int +get_file_line(void) +{ + FILE *fp = info.fp_rd; + char *line = info.one_line_text; + int ret; + uint32_t loc = 0; + + memset(line, 0, MAX_LINE_CHAR); + while ((ret = fgetc(fp)) != EOF) { + char c = (char)ret; + + if (loc >= MAX_LINE_CHAR - 1) + return -ENOMEM; + if (c == '\n') + break; + line[loc++] = c; + } + + if (ret == EOF) + return -EOF; + + return 0; +} + +int +fips_test_fetch_one_block(void) +{ + size_t size; + int ret = 0; + uint32_t i; + + for (i = 0; i < info.nb_vec_lines; i++) { + free(info.vec[i]); + info.vec[i] = NULL; + } + + i = 0; + do { + if (i >= MAX_LINE_PER_VECTOR) { + ret = -ENOMEM; + goto error_exit; + } + + ret = get_file_line(); + size = strlen(info.one_line_text); + if (size == 0) + break; + + info.vec[i] = calloc(1, size + 5); + if (info.vec[i] == NULL) + goto error_exit; + + strlcpy(info.vec[i], info.one_line_text, size + 1); + i++; + } while (ret == 0); + + info.nb_vec_lines = i; + + return ret; + +error_exit: + for (i = 0; i < MAX_LINE_PER_VECTOR; i++) + if (info.vec[i] != NULL) { + free(info.vec[i]); + info.vec[i] = NULL; + } + + info.nb_vec_lines = 0; + + return -ENOMEM; +} + +static int +fips_test_parse_header(void) +{ + uint32_t i; + char *tmp; + int ret; + time_t t = time(NULL); + struct tm *tm_now = localtime(&t); + + ret = fips_test_fetch_one_block(); + if (ret < 0) + return ret; + + for (i = 0; i < info.nb_vec_lines; i++) { + if (strstr(info.vec[i], "AESVS")) { + info.algo = FIPS_TEST_ALGO_AES; + ret = parse_test_aes_init(); + if (ret < 0) + return ret; + } else if (strstr(info.vec[i], "GCM")) { + info.algo = FIPS_TEST_ALGO_AES_GCM; + ret = parse_test_gcm_init(); + if (ret < 0) + return ret; + } else if (strstr(info.vec[i], "CMAC")) { + info.algo = FIPS_TEST_ALGO_AES_CMAC; + ret = parse_test_cmac_init(); + if (ret < 0) + return 0; + } else if (strstr(info.vec[i], "CCM")) { + info.algo = FIPS_TEST_ALGO_AES_CCM; + ret = parse_test_ccm_init(); + if (ret < 0) + return 0; + } else if (strstr(info.vec[i], "HMAC")) { + info.algo = FIPS_TEST_ALGO_HMAC; + ret = parse_test_hmac_init(); + if (ret < 0) + return ret; + } else if (strstr(info.vec[i], "TDES")) { + info.algo = FIPS_TEST_ALGO_TDES; + ret = parse_test_tdes_init(); + if (ret < 0) + return 0; + } + + tmp = strstr(info.vec[i], "# Config info for "); + if (tmp != NULL) { + fprintf(info.fp_wr, "%s%s\n", "# Config info for DPDK Cryptodev ", + info.device_name); + continue; + } + + tmp = strstr(info.vec[i], "# HMAC information for "); + if (tmp != NULL) { + fprintf(info.fp_wr, "%s%s\n", "# HMAC information for " + "DPDK Cryptodev ", + info.device_name); + continue; + } + + tmp = strstr(info.vec[i], "# Config Info for : "); + if (tmp != NULL) { + + fprintf(info.fp_wr, "%s%s\n", "# Config Info for DPDK Cryptodev : ", + info.device_name); + continue; + } + + tmp = strstr(info.vec[i], "# information for "); + if (tmp != NULL) { + + char tmp_output[128] = {0}; + + strlcpy(tmp_output, info.vec[i], tmp - info.vec[i] + 1); + + fprintf(info.fp_wr, "%s%s%s\n", tmp_output, + "information for DPDK Cryptodev ", + info.device_name); + continue; + } + + tmp = strstr(info.vec[i], " test information for "); + if (tmp != NULL) { + char tmp_output[128] = {0}; + + strlcpy(tmp_output, info.vec[i], tmp - info.vec[i] + 1); + + fprintf(info.fp_wr, "%s%s%s\n", tmp_output, + "test information for DPDK Cryptodev ", + info.device_name); + continue; + } + + if (i == info.nb_vec_lines - 1) { + /** update the time as current time, write to file */ + fprintf(info.fp_wr, "%s%s\n", "# Generated on ", + asctime(tm_now)); + continue; + } + + /* to this point, no field need to update, + * only copy to rsp file + */ + fprintf(info.fp_wr, "%s\n", info.vec[i]); + } + + return 0; +} + +static int +parse_file_type(const char *path) +{ + const char *tmp = path + strlen(path) - 3; + + if (strstr(tmp, REQ_FILE_PERFIX)) + info.file_type = FIPS_TYPE_REQ; + else if (strstr(tmp, RSP_FILE_PERFIX)) + info.file_type = FIPS_TYPE_RSP; + else if (strstr(path, FAX_FILE_PERFIX)) + info.file_type = FIPS_TYPE_FAX; + else + return -EINVAL; + + return 0; +} + +int +fips_test_init(const char *req_file_path, const char *rsp_file_path, + const char *device_name) +{ + if (strcmp(req_file_path, rsp_file_path) == 0) { + RTE_LOG(ERR, USER1, "File paths cannot be the same\n"); + return -EINVAL; + } + + fips_test_clear(); + + info.algo = FIPS_TEST_ALGO_MAX; + if (parse_file_type(req_file_path) < 0) { + RTE_LOG(ERR, USER1, "File %s type not supported\n", + req_file_path); + return -EINVAL; + } + + info.fp_rd = fopen(req_file_path, "r"); + if (!info.fp_rd) { + RTE_LOG(ERR, USER1, "Cannot open file %s\n", req_file_path); + return -EINVAL; + } + + info.fp_wr = fopen(rsp_file_path, "w"); + if (!info.fp_wr) { + RTE_LOG(ERR, USER1, "Cannot open file %s\n", rsp_file_path); + return -EINVAL; + } + + info.one_line_text = calloc(1, MAX_LINE_CHAR); + if (!info.one_line_text) { + RTE_LOG(ERR, USER1, "Insufficient memory\n"); + return -ENOMEM; + } + + strlcpy(info.device_name, device_name, sizeof(info.device_name)); + + if (fips_test_parse_header() < 0) { + RTE_LOG(ERR, USER1, "Failed parsing header\n"); + return -1; + } + + return 0; +} + +void +fips_test_clear(void) +{ + if (info.fp_rd) + fclose(info.fp_rd); + if (info.fp_wr) + fclose(info.fp_wr); + if (info.one_line_text) + free(info.one_line_text); + if (info.nb_vec_lines) { + uint32_t i; + + for (i = 0; i < info.nb_vec_lines; i++) + free(info.vec[i]); + } + + memset(&info, 0, sizeof(info)); +} + +int +fips_test_parse_one_case(void) +{ + uint32_t i, j = 0; + uint32_t is_interim = 0; + int ret; + + if (info.interim_callbacks) { + for (i = 0; i < info.nb_vec_lines; i++) { + for (j = 0; info.interim_callbacks[j].key != NULL; j++) + if (strstr(info.vec[i], + info.interim_callbacks[j].key)) { + is_interim = 1; + + ret = info.interim_callbacks[j].cb( + info.interim_callbacks[j].key, + info.vec[i], + info.interim_callbacks[j].val); + if (ret < 0) + return ret; + } + } + } + + if (is_interim) { + for (i = 0; i < info.nb_vec_lines; i++) + fprintf(info.fp_wr, "%s\n", info.vec[i]); + fprintf(info.fp_wr, "\n"); + return 1; + } + + for (i = 0; i < info.nb_vec_lines; i++) { + for (j = 0; info.callbacks[j].key != NULL; j++) + if (strstr(info.vec[i], info.callbacks[j].key)) { + ret = info.callbacks[j].cb( + info.callbacks[j].key, + info.vec[i], info.callbacks[j].val); + if (ret < 0) + return ret; + break; + } + } + + return 0; +} + +void +fips_test_write_one_case(void) +{ + uint32_t i; + + for (i = 0; i < info.nb_vec_lines; i++) + fprintf(info.fp_wr, "%s\n", info.vec[i]); +} + +static int +parser_read_uint64_hex(uint64_t *value, const char *p) +{ + char *next; + uint64_t val; + + p = skip_white_spaces(p); + + val = strtoul(p, &next, 16); + if (p == next) + return -EINVAL; + + p = skip_white_spaces(next); + if (*p != '\0') + return -EINVAL; + + *value = val; + return 0; +} + +int +parser_read_uint8_hex(uint8_t *value, const char *p) +{ + uint64_t val = 0; + int ret = parser_read_uint64_hex(&val, p); + + if (ret < 0) + return ret; + + if (val > UINT8_MAX) + return -ERANGE; + + *value = val; + return 0; +} + +int +parse_uint8_known_len_hex_str(const char *key, char *src, struct fips_val *val) +{ + struct fips_val tmp_val = {0}; + uint32_t len = val->len; + int ret; + + if (len == 0) { + if (val->val != NULL) { + rte_free(val->val); + val->val = NULL; + } + + return 0; + } + + ret = parse_uint8_hex_str(key, src, &tmp_val); + if (ret < 0) + return ret; + + if (tmp_val.len == val->len) { + val->val = tmp_val.val; + return 0; + } + + if (tmp_val.len < val->len) { + rte_free(tmp_val.val); + return -EINVAL; + } + + val->val = rte_zmalloc(NULL, val->len, 0); + if (!val->val) { + rte_free(tmp_val.val); + memset(val, 0, sizeof(*val)); + return -ENOMEM; + } + + memcpy(val->val, tmp_val.val, val->len); + rte_free(tmp_val.val); + + return 0; +} + +int +parse_uint8_hex_str(const char *key, char *src, struct fips_val *val) +{ + uint32_t len, j; + + src += strlen(key); + + len = strlen(src) / 2; + + if (val->val) { + rte_free(val->val); + val->val = NULL; + } + + val->val = rte_zmalloc(NULL, len, 0); + if (!val->val) + return -ENOMEM; + + for (j = 0; j < len; j++) { + char byte[3] = {src[j * 2], src[j * 2 + 1], '\0'}; + + if (parser_read_uint8_hex(&val->val[j], byte) < 0) { + rte_free(val->val); + memset(val, 0, sizeof(*val)); + return -EINVAL; + } + } + + val->len = len; + + return 0; +} + +int +parser_read_uint32_val(const char *key, char *src, struct fips_val *val) +{ + char *data = src + strlen(key); + size_t data_len = strlen(data); + int ret; + + if (data[data_len - 1] == ']') { + char *tmp_data = calloc(1, data_len + 1); + + if (tmp_data == NULL) + return -ENOMEM; + + strlcpy(tmp_data, data, data_len); + + ret = parser_read_uint32(&val->len, tmp_data); + + free(tmp_data); + } else + ret = parser_read_uint32(&val->len, data); + + return ret; +} + +int +parser_read_uint32_bit_val(const char *key, char *src, struct fips_val *val) +{ + int ret; + + ret = parser_read_uint32_val(key, src, val); + + if (ret < 0) + return ret; + + val->len /= 8; + + return 0; +} + +int +writeback_hex_str(const char *key, char *dst, struct fips_val *val) +{ + char *str = dst; + uint32_t len; + + str += strlen(key); + + for (len = 0; len < val->len; len++) + snprintf(str + len * 2, 255, "%02x", val->val[len]); + + return 0; +} + +static int +parser_read_uint64(uint64_t *value, const char *p) +{ + char *next; + uint64_t val; + + p = skip_white_spaces(p); + if (!isdigit(*p)) + return -EINVAL; + + val = strtoul(p, &next, 10); + if (p == next) + return -EINVAL; + + p = next; + switch (*p) { + case 'T': + val *= 1024ULL; + /* fall through */ + case 'G': + val *= 1024ULL; + /* fall through */ + case 'M': + val *= 1024ULL; + /* fall through */ + case 'k': + case 'K': + val *= 1024ULL; + p++; + break; + } + + p = skip_white_spaces(p); + if (*p != '\0') + return -EINVAL; + + *value = val; + return 0; +} + +int +parser_read_uint32(uint32_t *value, char *p) +{ + uint64_t val = 0; + int ret = parser_read_uint64(&val, p); + + if (ret < 0) + return ret; + + if (val > UINT32_MAX) + return -EINVAL; + + *value = val; + return 0; +} + +void +parse_write_hex_str(struct fips_val *src) +{ + writeback_hex_str("", info.one_line_text, src); + + fprintf(info.fp_wr, "%s\n", info.one_line_text); +} + +int +update_info_vec(uint32_t count) +{ + const struct fips_test_callback *cb; + uint32_t i, j; + + if (!info.writeback_callbacks) + return -1; + + cb = &info.writeback_callbacks[0]; + + snprintf(info.vec[0], strlen(info.vec[0]) + 4, "%s%u", cb->key, count); + + for (i = 1; i < info.nb_vec_lines; i++) { + for (j = 1; info.writeback_callbacks[j].key != NULL; j++) { + cb = &info.writeback_callbacks[j]; + if (strstr(info.vec[i], cb->key)) { + cb->cb(cb->key, info.vec[i], cb->val); + break; + } + } + } + + return 0; +} diff --git a/examples/fips_validation/fips_validation.h b/examples/fips_validation/fips_validation.h new file mode 100644 index 00000000..3e291bc3 --- /dev/null +++ b/examples/fips_validation/fips_validation.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _FIPS_VALIDATION_H_ +#define _FIPS_VALIDATION_H_ + +#define FIPS_PARSE_ERR(fmt, args) \ + RTE_LOG(ERR, USER1, "FIPS parse error" ## fmt ## "\n", ## args) + +#define ERR_MSG_SIZE 128 +#define MAX_CASE_LINE 15 +#define MAX_LINE_CHAR 204800 /*< max number of characters per line */ +#define MAX_NB_TESTS 10240 +#define MAX_BUF_SIZE 2048 +#define MAX_STRING_SIZE 64 + +#define POSITIVE_TEST 0 +#define NEGATIVE_TEST -1 + +#define REQ_FILE_PERFIX "req" +#define RSP_FILE_PERFIX "rsp" +#define FAX_FILE_PERFIX "fax" + +enum fips_test_algorithms { + FIPS_TEST_ALGO_AES = 0, + FIPS_TEST_ALGO_AES_GCM, + FIPS_TEST_ALGO_AES_CMAC, + FIPS_TEST_ALGO_AES_CCM, + FIPS_TEST_ALGO_HMAC, + FIPS_TEST_ALGO_TDES, + FIPS_TEST_ALGO_MAX +}; + +enum file_types { + FIPS_TYPE_REQ = 1, + FIPS_TYPE_FAX, + FIPS_TYPE_RSP +}; + +enum fips_test_op { + FIPS_TEST_ENC_AUTH_GEN = 1, + FIPS_TEST_DEC_AUTH_VERIF, +}; + +#define MAX_LINE_PER_VECTOR 16 + +struct fips_val { + uint8_t *val; + uint32_t len; +}; + +struct fips_test_vector { + union { + struct { + struct fips_val key; + struct fips_val digest; + struct fips_val auth_aad; + struct fips_val aad; + } cipher_auth; + struct { + struct fips_val key; + struct fips_val digest; + struct fips_val aad; + } aead; + }; + + struct fips_val pt; + struct fips_val ct; + struct fips_val iv; + + enum rte_crypto_op_status status; +}; + +typedef int (*post_prcess_t)(struct fips_val *val); + +typedef int (*parse_callback_t)(const char *key, char *text, + struct fips_val *val); + +struct fips_test_callback { + const char *key; + parse_callback_t cb; + struct fips_val *val; +}; + +enum fips_aesavs_test_types { + AESAVS_TYPE_GFXBOX = 1, + AESAVS_TYPE_KEYSBOX, + AESAVS_TYPE_VARKEY, + AESAVS_TYPE_VARTXT, + AESAVS_TYPE_MMT, + AESAVS_TYPE_MCT, +}; + +enum fips_tdes_test_types { + TDES_INVERSE_PERMUTATION = 0, + TDES_PERMUTATION, + TDES_SUBSTITUTION_TABLE, + TDES_VARIABLE_KEY, + TDES_VARIABLE_TEXT, + TDES_KAT, + TDES_MCT, /* Monte Carlo (Modes) Test */ + TDES_MMT /* Multi block Message Test */ +}; + +enum fips_ccm_test_types { + CCM_VADT = 1, /* Variable Associated Data Test */ + CCM_VPT, /* Variable Payload Test */ + CCM_VNT, /* Variable Nonce Test */ + CCM_VTT, /* Variable Tag Test */ + CCM_DVPT, /* Decryption-Verification Process Test */ +}; + +struct aesavs_interim_data { + enum fips_aesavs_test_types test_type; + uint32_t cipher_algo; + uint32_t key_len; +}; + +struct hmac_interim_data { + enum rte_crypto_auth_algorithm algo; +}; + +struct tdes_interim_data { + enum fips_tdes_test_types test_type; + uint32_t nb_keys; +}; + +struct ccm_interim_data { + enum fips_ccm_test_types test_type; + uint32_t aad_len; + uint32_t pt_len; + uint32_t digest_len; + uint32_t key_len; + uint32_t iv_len; +}; + +struct fips_test_interim_info { + FILE *fp_rd; + FILE *fp_wr; + enum file_types file_type; + enum fips_test_algorithms algo; + char *one_line_text; + char *vec[MAX_LINE_PER_VECTOR]; + uint32_t nb_vec_lines; + char device_name[MAX_STRING_SIZE]; + + union { + struct aesavs_interim_data aes_data; + struct hmac_interim_data hmac_data; + struct tdes_interim_data tdes_data; + struct ccm_interim_data ccm_data; + + } interim_info; + + enum fips_test_op op; + + const struct fips_test_callback *callbacks; + const struct fips_test_callback *interim_callbacks; + const struct fips_test_callback *writeback_callbacks; + + post_prcess_t parse_writeback; + post_prcess_t kat_check; +}; + +extern struct fips_test_vector vec; +extern struct fips_test_interim_info info; + +int +fips_test_init(const char *req_file_path, const char *rsp_file_path, + const char *device_name); + +void +fips_test_clear(void); + +int +fips_test_fetch_one_block(void); + +int +fips_test_parse_one_case(void); + +void +fips_test_write_one_case(void); + +int +parse_test_aes_init(void); + +int +parse_test_tdes_init(void); + +int +parse_test_hmac_init(void); + +int +parse_test_gcm_init(void); + +int +parse_test_cmac_init(void); + +int +parse_test_ccm_init(void); + +int +parser_read_uint8_hex(uint8_t *value, const char *p); + +int +parse_uint8_hex_str(const char *key, char *src, struct fips_val *val); + +int +parse_uint8_known_len_hex_str(const char *key, char *src, struct fips_val *val); + +int +parser_read_uint32_val(const char *key, char *src, struct fips_val *val); + +int +parser_read_uint32_bit_val(const char *key, char *src, struct fips_val *val); + +int +parser_read_uint32(uint32_t *value, char *p); + +int +parser_read_uint32_val(const char *key, char *src, struct fips_val *val); + +int +writeback_hex_str(const char *key, char *dst, struct fips_val *val); + +void +parse_write_hex_str(struct fips_val *src); + +int +update_info_vec(uint32_t count); + +#endif diff --git a/examples/fips_validation/fips_validation_aes.c b/examples/fips_validation/fips_validation_aes.c new file mode 100644 index 00000000..8cbc158e --- /dev/null +++ b/examples/fips_validation/fips_validation_aes.c @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <string.h> +#include <time.h> +#include <stdio.h> + +#include <rte_cryptodev.h> + +#include "fips_validation.h" + +#define MODE_STR "AESVS" +#define ALGO_STR "test data for " +#define OP_STR "State" +#define KEY_SIZE_STR "Key Length : " + + +#define COUNT_STR "COUNT = " +#define KEY_STR "KEY = " +#define IV_STR "IV = " +#define PT_STR "PLAINTEXT = " +#define CT_STR "CIPHERTEXT = " + +#define OP_ENC_STR "ENCRYPT" +#define OP_DEC_STR "DECRYPT" + +struct { + uint32_t type; + const char *desc; +} aes_test_types[] = { + {AESAVS_TYPE_GFXBOX, "GFSbox"}, + {AESAVS_TYPE_KEYSBOX, "KeySbox"}, + {AESAVS_TYPE_VARKEY, "VarKey"}, + {AESAVS_TYPE_VARTXT, "VarTxt"}, + {TDES_VARIABLE_TEXT, "VARIABLE PLAINTEXT/CIPHERTEXT"}, + {TDES_VARIABLE_TEXT, "KAT"}, + {AESAVS_TYPE_MMT, "MMT"}, + {AESAVS_TYPE_MCT, "MCT"}, +}; + +struct aes_test_algo { + const char *name; + enum rte_crypto_cipher_algorithm algo; +} const algo_con[] = { + {"CBC", RTE_CRYPTO_CIPHER_AES_CBC}, +}; + +static int +parse_interim_enc_dec(const char *key, + __attribute__((__unused__)) char *text, + __attribute__((__unused__)) struct fips_val *val) +{ + if (strcmp(key, OP_ENC_STR) == 0) + info.op = FIPS_TEST_ENC_AUTH_GEN; + else if (strcmp(key, OP_DEC_STR) == 0) + info.op = FIPS_TEST_DEC_AUTH_VERIF; + else + return -1; + + return 0; +} + +struct fips_test_callback aes_tests_interim[] = { + {OP_ENC_STR, parse_interim_enc_dec, NULL}, + {OP_DEC_STR, parse_interim_enc_dec, NULL}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback aes_tests_vectors[] = { + {KEY_STR, parse_uint8_hex_str, &vec.cipher_auth.key}, + {IV_STR, parse_uint8_hex_str, &vec.iv}, + {PT_STR, parse_uint8_hex_str, &vec.pt}, + {CT_STR, parse_uint8_hex_str, &vec.ct}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback aes_tests_interim_vectors[] = { + {OP_ENC_STR, parse_interim_enc_dec, NULL}, + {OP_DEC_STR, parse_interim_enc_dec, NULL}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback aes_writeback_callbacks[] = { + /** First element is used to pass COUNT string */ + {COUNT_STR, NULL, NULL}, + {IV_STR, writeback_hex_str, &vec.iv}, + {KEY_STR, writeback_hex_str, &vec.cipher_auth.key}, + {PT_STR, writeback_hex_str, &vec.pt}, + {CT_STR, writeback_hex_str, &vec.ct}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +static int +parse_test_aes_writeback(struct fips_val *val) +{ + if (info.op == FIPS_TEST_ENC_AUTH_GEN) + fprintf(info.fp_wr, "%s", CT_STR); + else + fprintf(info.fp_wr, "%s", PT_STR); + + parse_write_hex_str(val); + + return 0; +} + +static int +rsp_test_aes_check(struct fips_val *val) +{ + struct fips_val *data; + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) + data = &vec.ct; + else + data = &vec.pt; + + if (memcmp(val->val, data->val, val->len) == 0) + fprintf(info.fp_wr, "Success\n"); + else + fprintf(info.fp_wr, "Failed\n"); + + return 0; +} + +int +parse_test_aes_init(void) +{ + char *tmp; + uint32_t i, j; + + for (i = 0; i < info.nb_vec_lines; i++) { + char *line = info.vec[i]; + + tmp = strstr(line, MODE_STR); + if (tmp) { + for (j = 0; j < RTE_DIM(aes_test_types); j++) + if (strstr(line, aes_test_types[j].desc)) { + info.interim_info.aes_data.test_type = + aes_test_types[j].type; + break; + } + + if (j >= RTE_DIM(aes_test_types)) + return -EINVAL; + + tmp = strstr(line, ALGO_STR); + if (!tmp) + return -EINVAL; + + tmp += strlen(ALGO_STR); + for (j = 0; j < RTE_DIM(algo_con); j++) + if (strcmp(algo_con[j].name, tmp) == 0) { + info.interim_info.aes_data.cipher_algo = + (uint32_t)algo_con[j].algo; + break; + } + if (j >= RTE_DIM(algo_con)) + return -EINVAL; + + continue; + } + + tmp = strstr(line, OP_STR); + if (tmp) + continue; + + tmp = strstr(line, KEY_SIZE_STR); + if (tmp) { + tmp += strlen(KEY_SIZE_STR); + if (parser_read_uint32 + (&info.interim_info.aes_data.key_len, + tmp) < 0) + return -EINVAL; + + info.interim_info.aes_data.key_len /= 8; + + continue; + } + } + + info.parse_writeback = parse_test_aes_writeback; + info.callbacks = aes_tests_vectors; + info.interim_callbacks = aes_tests_interim_vectors; + info.writeback_callbacks = aes_writeback_callbacks; + info.kat_check = rsp_test_aes_check; + + return 0; +} diff --git a/examples/fips_validation/fips_validation_ccm.c b/examples/fips_validation/fips_validation_ccm.c new file mode 100644 index 00000000..632999c1 --- /dev/null +++ b/examples/fips_validation/fips_validation_ccm.c @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <stdio.h> +#include <string.h> + +#include <rte_string_fns.h> +#include <rte_cryptodev.h> +#include <rte_malloc.h> + +#include "fips_validation.h" + +#define DVPT_STR "CCM-DVPT" +#define VADT_STR "CCM-VADT" +#define VPT_STR "CCM-VPT" +#define VNT_STR "CCM-VNT" +#define VTT_STR "CCM-VTT" + +#define PARAM_PREFIX "[" +#define ALEN_PREFIX "Alen = " +#define PLEN_PREFIX "Plen = " +#define IVLEN_PREFIX "Nlen = " +#define DIGESTL_PREFIX "Tlen = " + +#define COUNT_STR "Count = " +#define KEY_STR "Key = " +#define IV_STR "Nonce = " +#define PT_STR "Payload = " +#define CT_STR "CT = " +#define AAD_STR "Adata = " +#define POS_NEG_STR "Result = " + +#define POS_KEYWORD "Pass" +#define NEG_KEYWORD "Fail" + +static int +parser_dvpt_interim(const char *key, char *src, struct fips_val *val) +{ + char *tmp, c, value[10]; + char num_pattern[] = "0123456789"; + int i = 0; + + memset(value, 0, 10); + + tmp = strstr(src, key); + if (!tmp) + return -1; + + tmp += strlen(key); + + c = tmp[0]; + + while (strchr(num_pattern, c) && i < 10) { + value[i++] = c; + c = tmp[i]; + } + + return parser_read_uint32_val("", value, val); +} + +static int +parse_dvpt_ct_hex_str(const char *key, char *src, struct fips_val *val) +{ + int ret; + + val->len = vec.pt.len; + + ret = parse_uint8_known_len_hex_str(key, src, val); + if (ret < 0) + return ret; + + src += strlen(key) + val->len * 2; + + ret = parse_uint8_known_len_hex_str("", src, &vec.aead.digest); + if (ret < 0) { + rte_free(val->val); + memset(val, 0, sizeof(*val)); + return ret; + } + + return 0; +} + +static int +parse_uint8_ccm_aad_str(const char *key, char *src, struct fips_val *val) +{ + uint32_t len = val->len, j; + + src += strlen(key); + + /* CCM aad requires 18 bytes padding before the real content */ + val->val = rte_zmalloc(NULL, len + 18, 0); + if (!val->val) + return -1; + + for (j = 0; j < len; j++) { + char byte[3] = {src[j * 2], src[j * 2 + 1], '\0'}; + + if (parser_read_uint8_hex(&val->val[j + 18], byte) < 0) { + rte_free(val->val); + memset(val, 0, sizeof(*val)); + return -EINVAL; + } + } + + return 0; +} + +struct fips_test_callback ccm_vnt_vec[] = { + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {AAD_STR, parse_uint8_ccm_aad_str, &vec.aead.aad}, + {PT_STR, parse_uint8_known_len_hex_str, &vec.pt}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vnt_interim_vec[] = { + {ALEN_PREFIX, parser_read_uint32_val, &vec.aead.aad}, + {PLEN_PREFIX, parser_read_uint32_val, &vec.pt}, + {DIGESTL_PREFIX, parser_read_uint32_val, &vec.aead.digest}, + {IVLEN_PREFIX, parser_read_uint32_val, &vec.iv}, + {KEY_STR, parse_uint8_hex_str, &vec.aead.key}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vtt_vec[] = { + {AAD_STR, parse_uint8_ccm_aad_str, &vec.aead.aad}, + {PT_STR, parse_uint8_known_len_hex_str, &vec.pt}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vtt_interim_vec[] = { + {ALEN_PREFIX, parser_read_uint32_val, &vec.aead.aad}, + {PLEN_PREFIX, parser_read_uint32_val, &vec.pt}, + {IVLEN_PREFIX, parser_read_uint32_val, &vec.iv}, + {DIGESTL_PREFIX, parser_read_uint32_val, &vec.aead.digest}, + {KEY_STR, parse_uint8_hex_str, &vec.aead.key}, + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vadt_vec[] = { + {AAD_STR, parse_uint8_ccm_aad_str, &vec.aead.aad}, + {PT_STR, parse_uint8_known_len_hex_str, &vec.pt}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vadt_interim_vec[] = { + {PLEN_PREFIX, parser_read_uint32_val, &vec.pt}, + {IVLEN_PREFIX, parser_read_uint32_val, &vec.iv}, + {ALEN_PREFIX, parser_read_uint32_val, &vec.aead.aad}, + {DIGESTL_PREFIX, parser_read_uint32_val, &vec.aead.digest}, + {KEY_STR, parse_uint8_hex_str, &vec.aead.key}, + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vpt_vec[] = { + {AAD_STR, parse_uint8_ccm_aad_str, &vec.aead.aad}, + {PT_STR, parse_uint8_known_len_hex_str, &vec.pt}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_vpt_interim_vec[] = { + {ALEN_PREFIX, parser_read_uint32_val, &vec.aead.aad}, + {IVLEN_PREFIX, parser_read_uint32_val, &vec.iv}, + {DIGESTL_PREFIX, parser_read_uint32_val, &vec.aead.digest}, + {PLEN_PREFIX, parser_read_uint32_val, &vec.pt}, + {KEY_STR, parse_uint8_hex_str, &vec.aead.key}, + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_dvpt_vec[] = { + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {AAD_STR, parse_uint8_ccm_aad_str, &vec.aead.aad}, + {CT_STR, parse_dvpt_ct_hex_str, &vec.ct}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback ccm_dvpt_interim_vec[] = { + {ALEN_PREFIX, parser_dvpt_interim, &vec.aead.aad}, + {PLEN_PREFIX, parser_dvpt_interim, &vec.pt}, + {IVLEN_PREFIX, parser_dvpt_interim, &vec.iv}, + {DIGESTL_PREFIX, parser_dvpt_interim, &vec.aead.digest}, + {KEY_STR, parse_uint8_hex_str, &vec.aead.key}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct ccm_test_types { + const char *str; + uint32_t type; + const struct fips_test_callback *cb; + const struct fips_test_callback *cb_interim; + enum fips_test_op op; +} ctt[] = { + {DVPT_STR, CCM_DVPT, ccm_dvpt_vec, ccm_dvpt_interim_vec, + FIPS_TEST_DEC_AUTH_VERIF}, + {VPT_STR, CCM_VPT, ccm_vpt_vec, ccm_vpt_interim_vec, + FIPS_TEST_ENC_AUTH_GEN}, + {VADT_STR, CCM_VADT, ccm_vadt_vec, ccm_vadt_interim_vec, + FIPS_TEST_ENC_AUTH_GEN}, + {VNT_STR, CCM_VNT, ccm_vnt_vec, ccm_vnt_interim_vec, + FIPS_TEST_ENC_AUTH_GEN}, + {VTT_STR, CCM_VTT, ccm_vtt_vec, ccm_vtt_interim_vec, + FIPS_TEST_ENC_AUTH_GEN}, +}; + +static int +parse_test_ccm_writeback(struct fips_val *val) +{ + struct fips_val tmp_val; + + switch (info.interim_info.ccm_data.test_type) { + case CCM_DVPT: + fprintf(info.fp_wr, "%s", POS_NEG_STR); + if (vec.status == RTE_CRYPTO_OP_STATUS_SUCCESS) { + fprintf(info.fp_wr, "%s\n", POS_KEYWORD); + fprintf(info.fp_wr, "%s", PT_STR); + + tmp_val.val = val->val; + tmp_val.len = vec.pt.len; + + if (tmp_val.len == 0) + fprintf(info.fp_wr, "00\n"); + else + parse_write_hex_str(&tmp_val); + } else + fprintf(info.fp_wr, "%s\n", NEG_KEYWORD); + + break; + + case CCM_VADT: + case CCM_VNT: + case CCM_VPT: + case CCM_VTT: + fprintf(info.fp_wr, "%s", CT_STR); + + parse_write_hex_str(val); + + break; + + } + + return 0; +} + +int +parse_test_ccm_init(void) +{ + + uint32_t i; + + for (i = 0; i < info.nb_vec_lines; i++) { + char *line = info.vec[i]; + uint32_t j; + + for (j = 0; j < RTE_DIM(ctt); j++) + if (strstr(line, ctt[j].str)) { + info.interim_info.ccm_data.test_type = + ctt[j].type; + info.callbacks = ctt[j].cb; + info.interim_callbacks = ctt[j].cb_interim; + info.op = ctt[j].op; + break; + } + } + + info.parse_writeback = parse_test_ccm_writeback; + + return 0; +} diff --git a/examples/fips_validation/fips_validation_cmac.c b/examples/fips_validation/fips_validation_cmac.c new file mode 100644 index 00000000..54c951ef --- /dev/null +++ b/examples/fips_validation/fips_validation_cmac.c @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <string.h> +#include <time.h> +#include <stdio.h> +#include <rte_string_fns.h> + +#include <rte_cryptodev.h> + +#include "fips_validation.h" + +#define NEW_LINE_STR "#" +#define OP_STR "CMAC" + +#define ALGO_STR "Alg = " +#define MODE_STR "Mode = " + +#define COUNT_STR "Count = " +#define KLEN_STR "Klen = " +#define PTLEN_STR "Mlen = " +#define TAGLEN_STR "Tlen = " +#define KEY_STR "Key = " +#define PT_STR "Msg = " +#define TAG_STR "Mac = " + +#define GEN_STR "Generate" +#define VERIF_STR "Verify" + +#define POS_NEG_STR "Result = " +#define PASS_STR "P" +#define FAIL_STR "F" + +struct hash_algo_conversion { + const char *str; + enum fips_test_algorithms algo; +} cmac_algo[] = { + {"AES", FIPS_TEST_ALGO_AES_CMAC}, +}; + +static int +parse_test_cmac_writeback(struct fips_val *val) +{ + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + struct fips_val tmp_val = {val->val + vec.pt.len, + vec.cipher_auth.digest.len}; + + fprintf(info.fp_wr, "%s", TAG_STR); + parse_write_hex_str(&tmp_val); + } else { + fprintf(info.fp_wr, "%s", POS_NEG_STR); + + if (vec.status == RTE_CRYPTO_OP_STATUS_SUCCESS) + fprintf(info.fp_wr, "%s\n", PASS_STR); + else if (vec.status == RTE_CRYPTO_OP_STATUS_AUTH_FAILED) + fprintf(info.fp_wr, "%s\n", FAIL_STR); + else + fprintf(info.fp_wr, "Error\n"); + } + + return 0; +} + +struct fips_test_callback cmac_tests_vectors[] = { + {KLEN_STR, parser_read_uint32_val, &vec.cipher_auth.key}, + {PTLEN_STR, parser_read_uint32_val, &vec.pt}, + {TAGLEN_STR, parser_read_uint32_val, &vec.cipher_auth.digest}, + {KEY_STR, parse_uint8_hex_str, &vec.cipher_auth.key}, + {PT_STR, parse_uint8_known_len_hex_str, &vec.pt}, + {TAG_STR, parse_uint8_known_len_hex_str, + &vec.cipher_auth.digest}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +int +parse_test_cmac_init(void) +{ + char *tmp; + uint32_t i, j; + + for (i = 0; i < info.nb_vec_lines; i++) { + char *line = info.vec[i]; + + tmp = strstr(line, ALGO_STR); + if (!tmp) + continue; + + for (j = 0; j < RTE_DIM(cmac_algo); j++) { + if (!strstr(line, cmac_algo[j].str)) + continue; + + info.algo = cmac_algo[j].algo; + break; + } + + if (j == RTE_DIM(cmac_algo)) + return -EINVAL; + + tmp = strstr(line, MODE_STR); + if (!tmp) + return -1; + + if (strstr(tmp, GEN_STR)) + info.op = FIPS_TEST_ENC_AUTH_GEN; + else if (strstr(tmp, VERIF_STR)) + info.op = FIPS_TEST_DEC_AUTH_VERIF; + else + return -EINVAL; + } + + info.parse_writeback = parse_test_cmac_writeback; + info.callbacks = cmac_tests_vectors; + + return 0; +} diff --git a/examples/fips_validation/fips_validation_gcm.c b/examples/fips_validation/fips_validation_gcm.c new file mode 100644 index 00000000..0509b101 --- /dev/null +++ b/examples/fips_validation/fips_validation_gcm.c @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <string.h> +#include <time.h> +#include <stdio.h> + +#include <rte_cryptodev.h> + +#include "fips_validation.h" + +#define NEW_LINE_STR "#" +#define OP_STR "GCM " + +#define PARAM_PREFIX "[" +#define KEYLEN_STR "Keylen = " +#define IVLEN_STR "IVlen = " +#define PTLEN_STR "PTlen = " +#define AADLEN_STR "AADlen = " +#define TAGLEN_STR "Taglen = " + +#define COUNT_STR "Count = " +#define KEY_STR "Key = " +#define IV_STR "IV = " +#define PT_STR "PT = " +#define CT_STR "CT = " +#define TAG_STR "Tag = " +#define AAD_STR "AAD = " + +#define OP_ENC_STR "Encrypt" +#define OP_DEC_STR "Decrypt" + +#define NEG_TEST_STR "FAIL" + +struct fips_test_callback gcm_dec_vectors[] = { + {KEY_STR, parse_uint8_known_len_hex_str, &vec.cipher_auth.key}, + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {CT_STR, parse_uint8_known_len_hex_str, &vec.ct}, + {AAD_STR, parse_uint8_known_len_hex_str, &vec.cipher_auth.aad}, + {TAG_STR, parse_uint8_known_len_hex_str, + &vec.cipher_auth.digest}, + {NULL, NULL, NULL} /**< end pointer */ +}; +struct fips_test_callback gcm_interim_vectors[] = { + {KEYLEN_STR, parser_read_uint32_bit_val, &vec.cipher_auth.key}, + {IVLEN_STR, parser_read_uint32_bit_val, &vec.iv}, + {PTLEN_STR, parser_read_uint32_bit_val, &vec.pt}, + {AADLEN_STR, parser_read_uint32_bit_val, &vec.cipher_auth.aad}, + {TAGLEN_STR, parser_read_uint32_bit_val, + &vec.cipher_auth.digest}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback gcm_enc_vectors[] = { + {KEY_STR, parse_uint8_known_len_hex_str, &vec.cipher_auth.key}, + {IV_STR, parse_uint8_known_len_hex_str, &vec.iv}, + {PT_STR, parse_uint8_known_len_hex_str, &vec.pt}, + {AAD_STR, parse_uint8_known_len_hex_str, &vec.cipher_auth.aad}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +static int +parse_test_gcm_writeback(struct fips_val *val) +{ + struct fips_val tmp_val; + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + fprintf(info.fp_wr, "%s", CT_STR); + + tmp_val.val = val->val; + tmp_val.len = vec.pt.len; + + parse_write_hex_str(&tmp_val); + + fprintf(info.fp_wr, "%s", TAG_STR); + + tmp_val.val = val->val + vec.pt.len; + tmp_val.len = val->len - vec.pt.len; + + parse_write_hex_str(&tmp_val); + } else { + if (vec.status == RTE_CRYPTO_OP_STATUS_SUCCESS) { + fprintf(info.fp_wr, "%s", PT_STR); + + tmp_val.val = val->val; + tmp_val.len = vec.pt.len; + + parse_write_hex_str(&tmp_val); + } else + fprintf(info.fp_wr, "%s\n", NEG_TEST_STR); + } + + return 0; +} + +int +parse_test_gcm_init(void) +{ + char *tmp; + uint32_t i; + + + for (i = 0; i < info.nb_vec_lines; i++) { + char *line = info.vec[i]; + + + tmp = strstr(line, OP_STR); + if (tmp) { + if (strstr(line, OP_ENC_STR)) { + info.op = FIPS_TEST_ENC_AUTH_GEN; + info.callbacks = gcm_enc_vectors; + } else if (strstr(line, OP_DEC_STR)) { + info.op = FIPS_TEST_DEC_AUTH_VERIF; + info.callbacks = gcm_dec_vectors; + } else + return -EINVAL; + } + } + + info.interim_callbacks = gcm_interim_vectors; + info.parse_writeback = parse_test_gcm_writeback; + + return 0; +} diff --git a/examples/fips_validation/fips_validation_hmac.c b/examples/fips_validation/fips_validation_hmac.c new file mode 100644 index 00000000..97ac7186 --- /dev/null +++ b/examples/fips_validation/fips_validation_hmac.c @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <string.h> +#include <time.h> +#include <stdio.h> + +#include <rte_cryptodev.h> + +#include "fips_validation.h" + +#define ALGO_PREFIX "[L=" +#define KEYLEN_STR "Klen = " +#define TAGLEN_STR "Tlen = " + +#define COUNT_STR "Count = " +#define KEY_STR "Key = " +#define PT_STR "Msg = " +#define TAG_STR "Mac = " + +struct hash_size_conversion { + const char *str; + enum rte_crypto_auth_algorithm algo; +} hsc[] = { + {"20", RTE_CRYPTO_AUTH_SHA1_HMAC}, + {"28", RTE_CRYPTO_AUTH_SHA224_HMAC}, + {"32", RTE_CRYPTO_AUTH_SHA256_HMAC}, + {"48", RTE_CRYPTO_AUTH_SHA384_HMAC}, + {"64", RTE_CRYPTO_AUTH_SHA512_HMAC}, +}; + +static int +parse_interim_algo(__attribute__((__unused__)) const char *key, + char *text, + __attribute__((__unused__)) struct fips_val *val) +{ + + uint32_t i; + + for (i = 0; i < RTE_DIM(hsc); i++) { + if (strstr(text, hsc[i].str)) { + info.interim_info.hmac_data.algo = hsc[i].algo; + break; + } + } + + if (i == RTE_DIM(hsc)) + return -1; + + return 0; +} + +struct fips_test_callback hmac_tests_vectors[] = { + {KEYLEN_STR, parser_read_uint32_val, &vec.cipher_auth.key}, + {TAGLEN_STR, parser_read_uint32_val, &vec.cipher_auth.digest}, + {KEY_STR, parse_uint8_hex_str, &vec.cipher_auth.key}, + {PT_STR, parse_uint8_hex_str, &vec.pt}, + {TAG_STR, parse_uint8_hex_str, &vec.cipher_auth.digest}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback hmac_tests_interim_vectors[] = { + {ALGO_PREFIX, parse_interim_algo, NULL}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +static int +parse_test_hmac_writeback(struct fips_val *val) +{ + struct fips_val val_local; + + fprintf(info.fp_wr, "%s", TAG_STR); + + val_local.val = val->val + vec.pt.len; + val_local.len = vec.cipher_auth.digest.len; + + parse_write_hex_str(&val_local); + return 0; +} + +static int +rsp_test_hmac_check(struct fips_val *val) +{ + if (memcmp(val->val + vec.pt.len, vec.cipher_auth.digest.val, + vec.cipher_auth.digest.len) == 0) + fprintf(info.fp_wr, "Success\n"); + else + fprintf(info.fp_wr, "Failed\n"); + + return 0; +} + +int +parse_test_hmac_init(void) +{ + info.op = FIPS_TEST_ENC_AUTH_GEN; + info.parse_writeback = parse_test_hmac_writeback; + info.callbacks = hmac_tests_vectors; + info.interim_callbacks = hmac_tests_interim_vectors; + info.writeback_callbacks = NULL; + info.kat_check = rsp_test_hmac_check; + + return 0; +} diff --git a/examples/fips_validation/fips_validation_tdes.c b/examples/fips_validation/fips_validation_tdes.c new file mode 100644 index 00000000..5064ff3b --- /dev/null +++ b/examples/fips_validation/fips_validation_tdes.c @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <string.h> +#include <stdio.h> + +#include <rte_malloc.h> +#include <rte_cryptodev.h> + +#include "fips_validation.h" + +#define NEW_LINE_STR "#" +#define TEST_TYPE_KEY " for CBC" +#define TEST_CBCI_KEY " for CBCI" + +#define ENC_STR "[ENCRYPT]" +#define DEC_STR "[DECRYPT]" + +#define COUNT_STR "COUNT = " +#define KEY1_STR "KEY1 = " +#define KEY2_STR "KEY2 = " +#define KEY3_STR "KEY3 = " + +#define KEYS_STR "KEYs = " +#define IV_STR "IV = " +#define PT_STR "PLAINTEXT = " +#define CT_STR "CIPHERTEXT = " +#define NK_STR "NumKeys = " + +#define SET_STR " = " + +#define PLAIN_TEXT 0 +#define CIPHER_TEXT 1 +#define KEY_TEXT 2 +#define IV_TEXT 3 + +#define DEVICE_STR "# Config Info for : " + +struct { + uint32_t type; + const char *desc; +} test_types[] = { + {TDES_INVERSE_PERMUTATION, "INVERSE PERMUTATION"}, + {TDES_PERMUTATION, "PERMUTATION OPERATION"}, + {TDES_SUBSTITUTION_TABLE, "SUBSTITUTION TABLE"}, + {TDES_VARIABLE_KEY, "VARIABLE KEY"}, + {TDES_VARIABLE_TEXT, "VARIABLE PLAINTEXT/CIPHERTEXT"}, + {TDES_VARIABLE_TEXT, "KAT"}, + {TDES_MCT, "Monte Carlo (Modes) Test"}, + {TDES_MMT, "Multi block Message Test"}, +}; + +static int +writeback_tdes_hex_str(const char *key, char *dst, struct fips_val *val); + +static int +parse_tdes_uint8_hex_str(const char *key, char *src, struct fips_val *val); + +static int +parse_tdes_interim(const char *key, + __attribute__((__unused__)) char *text, + struct fips_val *val); + +struct fips_test_callback tdes_tests_vectors[] = { + {KEYS_STR, parse_tdes_uint8_hex_str, &vec.cipher_auth.key}, + {KEY1_STR, parse_tdes_uint8_hex_str, &vec.cipher_auth.key}, + {KEY2_STR, parse_tdes_uint8_hex_str, &vec.cipher_auth.key}, + {KEY3_STR, parse_tdes_uint8_hex_str, &vec.cipher_auth.key}, + {IV_STR, parse_uint8_hex_str, &vec.iv}, + {PT_STR, parse_uint8_hex_str, &vec.pt}, + {CT_STR, parse_uint8_hex_str, &vec.ct}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback tdes_tests_interim_vectors[] = { + {ENC_STR, parse_tdes_interim, NULL}, + {DEC_STR, parse_tdes_interim, NULL}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +struct fips_test_callback tdes_writeback_callbacks[] = { + /** First element is used to pass COUNT string */ + {COUNT_STR, NULL, NULL}, + {IV_STR, writeback_hex_str, &vec.iv}, + {KEY1_STR, writeback_tdes_hex_str, &vec.cipher_auth.key}, + {KEY2_STR, writeback_tdes_hex_str, &vec.cipher_auth.key}, + {KEY3_STR, writeback_tdes_hex_str, &vec.cipher_auth.key}, + {KEYS_STR, writeback_tdes_hex_str, &vec.cipher_auth.key}, + {PT_STR, writeback_hex_str, &vec.pt}, + {CT_STR, writeback_hex_str, &vec.ct}, + {NULL, NULL, NULL} /**< end pointer */ +}; + +static int +parse_tdes_interim(const char *key, + __attribute__((__unused__)) char *text, + __attribute__((__unused__)) struct fips_val *val) +{ + if (strstr(key, ENC_STR)) + info.op = FIPS_TEST_ENC_AUTH_GEN; + else if (strstr(key, DEC_STR)) + info.op = FIPS_TEST_DEC_AUTH_VERIF; + else if (strstr(NK_STR, "NumKeys = 1")) + info.interim_info.tdes_data.nb_keys = 1; + else if (strstr(NK_STR, "NumKeys = 2")) + info.interim_info.tdes_data.nb_keys = 2; + else if (strstr(NK_STR, "NumKeys = 3")) + info.interim_info.tdes_data.nb_keys = 3; + else + return -EINVAL; + + return 0; +} + +static int +parse_tdes_uint8_hex_str(const char *key, char *src, struct fips_val *val) +{ + uint8_t tmp_key[24] = {0}; + uint32_t len, i; + + src += strlen(key); + + len = strlen(src) / 2; + + if (val->val) { + memcpy(tmp_key, val->val, val->len); + rte_free(val->val); + } + + val->val = rte_zmalloc(NULL, 24, 0); + if (!val->val) + return -1; + + memcpy(val->val, tmp_key, 24); + + if (strstr(key, KEYS_STR)) { + for (i = 0; i < len; i++) { + char byte[3] = {src[i * 2], src[i * 2 + 1], '\0'}; + + if (parser_read_uint8_hex(&val->val[i], byte) < 0) + goto error_exit; + } + + memcpy(val->val + 8, val->val, 8); + memcpy(val->val + 16, val->val, 8); + + } else if (strstr(key, KEY1_STR)) { + for (i = 0; i < len; i++) { + char byte[3] = {src[i * 2], src[i * 2 + 1], '\0'}; + + if (parser_read_uint8_hex(&val->val[i], byte) < 0) + goto error_exit; + } + + if (info.interim_info.tdes_data.nb_keys == 2) + memcpy(val->val + 16, val->val, 8); + + } else if (strstr(key, KEY2_STR)) { + for (i = 0; i < len; i++) { + char byte[3] = {src[i * 2], src[i * 2 + 1], '\0'}; + + if (parser_read_uint8_hex(&val->val[i + 8], byte) < 0) + goto error_exit; + } + + } else if (strstr(key, KEY3_STR)) { + for (i = 0; i < len; i++) { + char byte[3] = {src[i * 2], src[i * 2 + 1], '\0'}; + + if (parser_read_uint8_hex(&val->val[i + 16], byte) < 0) + goto error_exit; + } + } else + return -EINVAL; + + val->len = 24; + + return 0; + +error_exit: + rte_free(val->val); + memset(val, 0, sizeof(*val)); + return -EINVAL; +} + +static int +parse_test_tdes_writeback(struct fips_val *val) +{ + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) + fprintf(info.fp_wr, "%s", CT_STR); + else + fprintf(info.fp_wr, "%s", PT_STR); + + parse_write_hex_str(val); + + return 0; + +} + +static int +writeback_tdes_hex_str(const char *key, char *dst, struct fips_val *val) +{ + struct fips_val tmp_val; + + tmp_val.len = 8; + + if (strstr(key, KEY1_STR)) + tmp_val.val = val->val; + else if (strstr(key, KEY2_STR)) + tmp_val.val = val->val + 8; + else if (strstr(key, KEY3_STR)) + tmp_val.val = val->val + 16; + + return writeback_hex_str(key, dst, &tmp_val); +} + +static int +rsp_test_tdes_check(struct fips_val *val) +{ + struct fips_val *data; + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) + data = &vec.ct; + else + data = &vec.pt; + + if (memcmp(val->val, data->val, val->len) == 0) + fprintf(info.fp_wr, "Success\n"); + else + fprintf(info.fp_wr, "Failed\n"); + + return 0; +} + +int +parse_test_tdes_init(void) +{ + uint32_t i; + + for (i = 0; i < info.nb_vec_lines; i++) { + char *line = info.vec[i]; + uint32_t j; + + if (strstr(line, TEST_CBCI_KEY)) + return -EPERM; + + for (j = 0; j < RTE_DIM(test_types); j++) + if (strstr(line, test_types[j].desc)) { + info.interim_info.tdes_data.test_type = + test_types[j].type; + break; + } + } + + info.parse_writeback = parse_test_tdes_writeback; + info.callbacks = tdes_tests_vectors; + info.interim_callbacks = tdes_tests_interim_vectors; + info.writeback_callbacks = tdes_writeback_callbacks; + info.kat_check = rsp_test_tdes_check; + + return 0; +} diff --git a/examples/fips_validation/main.c b/examples/fips_validation/main.c new file mode 100644 index 00000000..85f54cbf --- /dev/null +++ b/examples/fips_validation/main.c @@ -0,0 +1,1225 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <sys/stat.h> +#include <getopt.h> +#include <dirent.h> + +#include <rte_cryptodev.h> +#include <rte_cryptodev_pmd.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> + +#include "fips_validation.h" + +#define REQ_FILE_PATH_KEYWORD "req-file" +#define RSP_FILE_PATH_KEYWORD "rsp-file" +#define FOLDER_KEYWORD "path-is-folder" +#define CRYPTODEV_KEYWORD "cryptodev" +#define CRYPTODEV_ID_KEYWORD "cryptodev-id" + +struct fips_test_vector vec; +struct fips_test_interim_info info; + +struct cryptodev_fips_validate_env { + const char *req_path; + const char *rsp_path; + uint32_t is_path_folder; + uint32_t dev_id; + struct rte_mempool *mpool; + struct rte_mempool *op_pool; + struct rte_mbuf *mbuf; + struct rte_crypto_op *op; + struct rte_cryptodev_sym_session *sess; +} env; + +static int +cryptodev_fips_validate_app_int(void) +{ + struct rte_cryptodev_config conf = {rte_socket_id(), 1}; + struct rte_cryptodev_qp_conf qp_conf = {128}; + int ret; + + ret = rte_cryptodev_configure(env.dev_id, &conf); + if (ret < 0) + return ret; + + env.mpool = rte_pktmbuf_pool_create("FIPS_MEMPOOL", 128, 0, 0, + UINT16_MAX, rte_socket_id()); + if (!env.mpool) + return ret; + + ret = rte_cryptodev_queue_pair_setup(env.dev_id, 0, &qp_conf, + rte_socket_id(), env.mpool); + if (ret < 0) + return ret; + + ret = -ENOMEM; + + env.op_pool = rte_crypto_op_pool_create( + "FIPS_OP_POOL", + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + 1, 0, + 16, + rte_socket_id()); + if (!env.op_pool) + goto error_exit; + + env.mbuf = rte_pktmbuf_alloc(env.mpool); + if (!env.mbuf) + goto error_exit; + + env.op = rte_crypto_op_alloc(env.op_pool, RTE_CRYPTO_OP_TYPE_SYMMETRIC); + if (!env.op) + goto error_exit; + + return 0; + +error_exit: + rte_mempool_free(env.mpool); + if (env.op_pool) + rte_mempool_free(env.op_pool); + + return ret; +} + +static void +cryptodev_fips_validate_app_uninit(void) +{ + rte_pktmbuf_free(env.mbuf); + rte_crypto_op_free(env.op); + rte_cryptodev_sym_session_clear(env.dev_id, env.sess); + rte_cryptodev_sym_session_free(env.sess); + rte_mempool_free(env.mpool); + rte_mempool_free(env.op_pool); +} + +static int +fips_test_one_file(void); + +static int +parse_cryptodev_arg(char *arg) +{ + int id = rte_cryptodev_get_dev_id(arg); + + if (id < 0) { + RTE_LOG(ERR, USER1, "Error %i: invalid cryptodev name %s\n", + id, arg); + return id; + } + + env.dev_id = (uint32_t)id; + + return 0; +} + +static int +parse_cryptodev_id_arg(char *arg) +{ + uint32_t cryptodev_id; + + if (parser_read_uint32(&cryptodev_id, arg) < 0) { + RTE_LOG(ERR, USER1, "Error %i: invalid cryptodev id %s\n", + -EINVAL, arg); + return -1; + } + + + if (!rte_cryptodev_pmd_is_valid_dev(cryptodev_id)) { + RTE_LOG(ERR, USER1, "Error %i: invalid cryptodev id %s\n", + cryptodev_id, arg); + return -1; + } + + env.dev_id = (uint32_t)cryptodev_id; + + return 0; +} + +static void +cryptodev_fips_validate_usage(const char *prgname) +{ + printf("%s [EAL options] --\n" + " --%s: REQUEST-FILE-PATH\n" + " --%s: RESPONSE-FILE-PATH\n" + " --%s: indicating both paths are folders\n" + " --%s: CRYPTODEV-NAME\n" + " --%s: CRYPTODEV-ID-NAME\n", + prgname, REQ_FILE_PATH_KEYWORD, RSP_FILE_PATH_KEYWORD, + FOLDER_KEYWORD, CRYPTODEV_KEYWORD, CRYPTODEV_ID_KEYWORD); +} + +static int +cryptodev_fips_validate_parse_args(int argc, char **argv) +{ + int opt, ret; + char *prgname = argv[0]; + char **argvopt; + int option_index; + struct option lgopts[] = { + {REQ_FILE_PATH_KEYWORD, required_argument, 0, 0}, + {RSP_FILE_PATH_KEYWORD, required_argument, 0, 0}, + {FOLDER_KEYWORD, no_argument, 0, 0}, + {CRYPTODEV_KEYWORD, required_argument, 0, 0}, + {CRYPTODEV_ID_KEYWORD, required_argument, 0, 0}, + {NULL, 0, 0, 0} + }; + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "s:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + case 0: + if (strcmp(lgopts[option_index].name, + REQ_FILE_PATH_KEYWORD) == 0) + env.req_path = optarg; + else if (strcmp(lgopts[option_index].name, + RSP_FILE_PATH_KEYWORD) == 0) + env.rsp_path = optarg; + else if (strcmp(lgopts[option_index].name, + FOLDER_KEYWORD) == 0) + env.is_path_folder = 1; + else if (strcmp(lgopts[option_index].name, + CRYPTODEV_KEYWORD) == 0) { + ret = parse_cryptodev_arg(optarg); + if (ret < 0) { + cryptodev_fips_validate_usage(prgname); + return -EINVAL; + } + } else if (strcmp(lgopts[option_index].name, + CRYPTODEV_ID_KEYWORD) == 0) { + ret = parse_cryptodev_id_arg(optarg); + if (ret < 0) { + cryptodev_fips_validate_usage(prgname); + return -EINVAL; + } + } else { + cryptodev_fips_validate_usage(prgname); + return -EINVAL; + } + break; + default: + return -1; + } + } + + if (env.req_path == NULL || env.rsp_path == NULL || + env.dev_id == UINT32_MAX) { + cryptodev_fips_validate_usage(prgname); + return -EINVAL; + } + + return 0; +} + +int +main(int argc, char *argv[]) +{ + int ret; + + ret = rte_eal_init(argc, argv); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Failed init\n", ret); + return -1; + } + + argc -= ret; + argv += ret; + + ret = cryptodev_fips_validate_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Failed to parse arguments!\n"); + + ret = cryptodev_fips_validate_app_int(); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Failed init\n", ret); + return -1; + } + + if (!env.is_path_folder) { + printf("Processing file %s... ", env.req_path); + + ret = fips_test_init(env.req_path, env.rsp_path, + rte_cryptodev_name_get(env.dev_id)); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Failed test %s\n", + ret, env.req_path); + goto exit; + } + + + ret = fips_test_one_file(); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Failed test %s\n", + ret, env.req_path); + goto exit; + } + + printf("Done\n"); + + } else { + struct dirent *dir; + DIR *d_req, *d_rsp; + char req_path[1024]; + char rsp_path[1024]; + + d_req = opendir(env.req_path); + if (!d_req) { + RTE_LOG(ERR, USER1, "Error %i: Path %s not exist\n", + -EINVAL, env.req_path); + goto exit; + } + + d_rsp = opendir(env.rsp_path); + if (!d_rsp) { + ret = mkdir(env.rsp_path, 0700); + if (ret == 0) + d_rsp = opendir(env.rsp_path); + else { + RTE_LOG(ERR, USER1, "Error %i: Invalid %s\n", + -EINVAL, env.rsp_path); + goto exit; + } + } + closedir(d_rsp); + + while ((dir = readdir(d_req)) != NULL) { + if (strstr(dir->d_name, "req") == NULL) + continue; + + snprintf(req_path, 1023, "%s/%s", env.req_path, + dir->d_name); + snprintf(rsp_path, 1023, "%s/%s", env.rsp_path, + dir->d_name); + strlcpy(strstr(rsp_path, "req"), "rsp", 4); + + printf("Processing file %s... ", req_path); + + ret = fips_test_init(req_path, rsp_path, + rte_cryptodev_name_get(env.dev_id)); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Failed test %s\n", + ret, req_path); + break; + } + + ret = fips_test_one_file(); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Failed test %s\n", + ret, req_path); + break; + } + + printf("Done\n"); + } + + closedir(d_req); + } + + +exit: + fips_test_clear(); + cryptodev_fips_validate_app_uninit(); + + return ret; + +} + +#define IV_OFF (sizeof(struct rte_crypto_op) + sizeof(struct rte_crypto_sym_op)) +#define CRYPTODEV_FIPS_MAX_RETRIES 16 + +typedef int (*fips_test_one_case_t)(void); +typedef int (*fips_prepare_op_t)(void); +typedef int (*fips_prepare_xform_t)(struct rte_crypto_sym_xform *); + +struct fips_test_ops { + fips_prepare_xform_t prepare_xform; + fips_prepare_op_t prepare_op; + fips_test_one_case_t test; +} test_ops; + +static int +prepare_cipher_op(void) +{ + struct rte_crypto_sym_op *sym = env.op->sym; + uint8_t *iv = rte_crypto_op_ctod_offset(env.op, uint8_t *, IV_OFF); + + __rte_crypto_op_reset(env.op, RTE_CRYPTO_OP_TYPE_SYMMETRIC); + rte_pktmbuf_reset(env.mbuf); + + sym->m_src = env.mbuf; + sym->cipher.data.offset = 0; + + memcpy(iv, vec.iv.val, vec.iv.len); + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + uint8_t *pt; + + if (vec.pt.len > RTE_MBUF_MAX_NB_SEGS) { + RTE_LOG(ERR, USER1, "PT len %u\n", vec.pt.len); + return -EPERM; + } + + pt = (uint8_t *)rte_pktmbuf_append(env.mbuf, vec.pt.len); + + if (!pt) { + RTE_LOG(ERR, USER1, "Error %i: MBUF too small\n", + -ENOMEM); + return -ENOMEM; + } + + memcpy(pt, vec.pt.val, vec.pt.len); + sym->cipher.data.length = vec.pt.len; + + } else { + uint8_t *ct; + + if (vec.ct.len > RTE_MBUF_MAX_NB_SEGS) { + RTE_LOG(ERR, USER1, "CT len %u\n", vec.ct.len); + return -EPERM; + } + + ct = (uint8_t *)rte_pktmbuf_append(env.mbuf, vec.ct.len); + + if (!ct) { + RTE_LOG(ERR, USER1, "Error %i: MBUF too small\n", + -ENOMEM); + return -ENOMEM; + } + + memcpy(ct, vec.ct.val, vec.ct.len); + sym->cipher.data.length = vec.ct.len; + } + + rte_crypto_op_attach_sym_session(env.op, env.sess); + + return 0; +} + +static int +prepare_auth_op(void) +{ + struct rte_crypto_sym_op *sym = env.op->sym; + + __rte_crypto_op_reset(env.op, RTE_CRYPTO_OP_TYPE_SYMMETRIC); + rte_pktmbuf_reset(env.mbuf); + + sym->m_src = env.mbuf; + sym->auth.data.offset = 0; + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + uint8_t *pt; + + if (vec.pt.len > RTE_MBUF_MAX_NB_SEGS) { + RTE_LOG(ERR, USER1, "PT len %u\n", vec.pt.len); + return -EPERM; + } + + pt = (uint8_t *)rte_pktmbuf_append(env.mbuf, vec.pt.len + + vec.cipher_auth.digest.len); + + if (!pt) { + RTE_LOG(ERR, USER1, "Error %i: MBUF too small\n", + -ENOMEM); + return -ENOMEM; + } + + memcpy(pt, vec.pt.val, vec.pt.len); + sym->auth.data.length = vec.pt.len; + sym->auth.digest.data = pt + vec.pt.len; + sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset( + env.mbuf, vec.pt.len); + + } else { + uint8_t *ct; + + if (vec.ct.len > RTE_MBUF_MAX_NB_SEGS) { + RTE_LOG(ERR, USER1, "CT len %u\n", vec.ct.len); + return -EPERM; + } + + ct = (uint8_t *)rte_pktmbuf_append(env.mbuf, + vec.ct.len + vec.cipher_auth.digest.len); + + if (!ct) { + RTE_LOG(ERR, USER1, "Error %i: MBUF too small\n", + -ENOMEM); + return -ENOMEM; + } + + memcpy(ct, vec.ct.val, vec.ct.len); + sym->auth.data.length = vec.ct.len; + sym->auth.digest.data = vec.cipher_auth.digest.val; + sym->auth.digest.phys_addr = rte_malloc_virt2iova( + sym->auth.digest.data); + } + + rte_crypto_op_attach_sym_session(env.op, env.sess); + + return 0; +} + +static int +prepare_aead_op(void) +{ + struct rte_crypto_sym_op *sym = env.op->sym; + uint8_t *iv = rte_crypto_op_ctod_offset(env.op, uint8_t *, IV_OFF); + + __rte_crypto_op_reset(env.op, RTE_CRYPTO_OP_TYPE_SYMMETRIC); + rte_pktmbuf_reset(env.mbuf); + + if (info.algo == FIPS_TEST_ALGO_AES_CCM) + memcpy(iv + 1, vec.iv.val, vec.iv.len); + else + memcpy(iv, vec.iv.val, vec.iv.len); + + sym->m_src = env.mbuf; + sym->aead.data.offset = 0; + sym->aead.aad.data = vec.aead.aad.val; + sym->aead.aad.phys_addr = rte_malloc_virt2iova(sym->aead.aad.data); + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + uint8_t *pt; + + if (vec.pt.len > RTE_MBUF_MAX_NB_SEGS) { + RTE_LOG(ERR, USER1, "PT len %u\n", vec.pt.len); + return -EPERM; + } + + pt = (uint8_t *)rte_pktmbuf_append(env.mbuf, + vec.pt.len + vec.aead.digest.len); + + if (!pt) { + RTE_LOG(ERR, USER1, "Error %i: MBUF too small\n", + -ENOMEM); + return -ENOMEM; + } + + memcpy(pt, vec.pt.val, vec.pt.len); + sym->aead.data.length = vec.pt.len; + sym->aead.digest.data = pt + vec.pt.len; + sym->aead.digest.phys_addr = rte_pktmbuf_mtophys_offset( + env.mbuf, vec.pt.len); + } else { + uint8_t *ct; + + if (vec.ct.len > RTE_MBUF_MAX_NB_SEGS) { + RTE_LOG(ERR, USER1, "CT len %u\n", vec.ct.len); + return -EPERM; + } + + ct = (uint8_t *)rte_pktmbuf_append(env.mbuf, vec.ct.len); + + if (!ct) { + RTE_LOG(ERR, USER1, "Error %i: MBUF too small\n", + -ENOMEM); + return -ENOMEM; + } + + memcpy(ct, vec.ct.val, vec.ct.len); + sym->aead.data.length = vec.ct.len; + sym->aead.digest.data = vec.aead.digest.val; + sym->aead.digest.phys_addr = rte_malloc_virt2iova( + sym->aead.digest.data); + } + + rte_crypto_op_attach_sym_session(env.op, env.sess); + + return 0; +} + +static int +prepare_aes_xform(struct rte_crypto_sym_xform *xform) +{ + const struct rte_cryptodev_symmetric_capability *cap; + struct rte_cryptodev_sym_capability_idx cap_idx; + struct rte_crypto_cipher_xform *cipher_xform = &xform->cipher; + + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + cipher_xform->algo = RTE_CRYPTO_CIPHER_AES_CBC; + cipher_xform->op = (info.op == FIPS_TEST_ENC_AUTH_GEN) ? + RTE_CRYPTO_CIPHER_OP_ENCRYPT : + RTE_CRYPTO_CIPHER_OP_DECRYPT; + cipher_xform->key.data = vec.cipher_auth.key.val; + cipher_xform->key.length = vec.cipher_auth.key.len; + cipher_xform->iv.length = vec.iv.len; + cipher_xform->iv.offset = IV_OFF; + + cap_idx.algo.cipher = RTE_CRYPTO_CIPHER_AES_CBC; + cap_idx.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + cap = rte_cryptodev_sym_capability_get(env.dev_id, &cap_idx); + if (!cap) { + RTE_LOG(ERR, USER1, "Failed to get capability for cdev %u\n", + env.dev_id); + return -EINVAL; + } + + if (rte_cryptodev_sym_capability_check_cipher(cap, + cipher_xform->key.length, + cipher_xform->iv.length) != 0) { + RTE_LOG(ERR, USER1, "PMD %s key length %u IV length %u\n", + info.device_name, cipher_xform->key.length, + cipher_xform->iv.length); + return -EPERM; + } + + return 0; +} + +static int +prepare_tdes_xform(struct rte_crypto_sym_xform *xform) +{ + const struct rte_cryptodev_symmetric_capability *cap; + struct rte_cryptodev_sym_capability_idx cap_idx; + struct rte_crypto_cipher_xform *cipher_xform = &xform->cipher; + + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + cipher_xform->algo = RTE_CRYPTO_CIPHER_3DES_CBC; + cipher_xform->op = (info.op == FIPS_TEST_ENC_AUTH_GEN) ? + RTE_CRYPTO_CIPHER_OP_ENCRYPT : + RTE_CRYPTO_CIPHER_OP_DECRYPT; + cipher_xform->key.data = vec.cipher_auth.key.val; + cipher_xform->key.length = vec.cipher_auth.key.len; + cipher_xform->iv.length = vec.iv.len; + cipher_xform->iv.offset = IV_OFF; + + cap_idx.algo.cipher = RTE_CRYPTO_CIPHER_3DES_CBC; + cap_idx.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + cap = rte_cryptodev_sym_capability_get(env.dev_id, &cap_idx); + if (!cap) { + RTE_LOG(ERR, USER1, "Failed to get capability for cdev %u\n", + env.dev_id); + return -EINVAL; + } + + if (rte_cryptodev_sym_capability_check_cipher(cap, + cipher_xform->key.length, + cipher_xform->iv.length) != 0) { + RTE_LOG(ERR, USER1, "PMD %s key length %u IV length %u\n", + info.device_name, cipher_xform->key.length, + cipher_xform->iv.length); + return -EPERM; + } + + return 0; +} + +static int +prepare_hmac_xform(struct rte_crypto_sym_xform *xform) +{ + const struct rte_cryptodev_symmetric_capability *cap; + struct rte_cryptodev_sym_capability_idx cap_idx; + struct rte_crypto_auth_xform *auth_xform = &xform->auth; + + xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + + auth_xform->algo = info.interim_info.hmac_data.algo; + auth_xform->op = RTE_CRYPTO_AUTH_OP_GENERATE; + auth_xform->digest_length = vec.cipher_auth.digest.len; + auth_xform->key.data = vec.cipher_auth.key.val; + auth_xform->key.length = vec.cipher_auth.key.len; + + cap_idx.algo.auth = auth_xform->algo; + cap_idx.type = RTE_CRYPTO_SYM_XFORM_AUTH; + + cap = rte_cryptodev_sym_capability_get(env.dev_id, &cap_idx); + if (!cap) { + RTE_LOG(ERR, USER1, "Failed to get capability for cdev %u\n", + env.dev_id); + return -EINVAL; + } + + if (rte_cryptodev_sym_capability_check_auth(cap, + auth_xform->key.length, + auth_xform->digest_length, 0) != 0) { + RTE_LOG(ERR, USER1, "PMD %s key length %u IV length %u\n", + info.device_name, auth_xform->key.length, + auth_xform->digest_length); + return -EPERM; + } + + return 0; +} + +static int +prepare_gcm_xform(struct rte_crypto_sym_xform *xform) +{ + const struct rte_cryptodev_symmetric_capability *cap; + struct rte_cryptodev_sym_capability_idx cap_idx; + struct rte_crypto_aead_xform *aead_xform = &xform->aead; + + xform->type = RTE_CRYPTO_SYM_XFORM_AEAD; + + aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM; + aead_xform->aad_length = vec.aead.aad.len; + aead_xform->digest_length = vec.aead.digest.len; + aead_xform->iv.offset = IV_OFF; + aead_xform->iv.length = vec.iv.len; + aead_xform->key.data = vec.aead.key.val; + aead_xform->key.length = vec.aead.key.len; + aead_xform->op = (info.op == FIPS_TEST_ENC_AUTH_GEN) ? + RTE_CRYPTO_AEAD_OP_ENCRYPT : + RTE_CRYPTO_AEAD_OP_DECRYPT; + + cap_idx.algo.aead = aead_xform->algo; + cap_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; + + cap = rte_cryptodev_sym_capability_get(env.dev_id, &cap_idx); + if (!cap) { + RTE_LOG(ERR, USER1, "Failed to get capability for cdev %u\n", + env.dev_id); + return -EINVAL; + } + + if (rte_cryptodev_sym_capability_check_aead(cap, + aead_xform->key.length, + aead_xform->digest_length, aead_xform->aad_length, + aead_xform->iv.length) != 0) { + RTE_LOG(ERR, USER1, + "PMD %s key_len %u tag_len %u aad_len %u iv_len %u\n", + info.device_name, aead_xform->key.length, + aead_xform->digest_length, + aead_xform->aad_length, + aead_xform->iv.length); + return -EPERM; + } + + return 0; +} + +static int +prepare_cmac_xform(struct rte_crypto_sym_xform *xform) +{ + const struct rte_cryptodev_symmetric_capability *cap; + struct rte_cryptodev_sym_capability_idx cap_idx; + struct rte_crypto_auth_xform *auth_xform = &xform->auth; + + xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + + auth_xform->algo = RTE_CRYPTO_AUTH_AES_CMAC; + auth_xform->op = (info.op == FIPS_TEST_ENC_AUTH_GEN) ? + RTE_CRYPTO_AUTH_OP_GENERATE : RTE_CRYPTO_AUTH_OP_VERIFY; + auth_xform->digest_length = vec.cipher_auth.digest.len; + auth_xform->key.data = vec.cipher_auth.key.val; + auth_xform->key.length = vec.cipher_auth.key.len; + + cap_idx.algo.auth = auth_xform->algo; + cap_idx.type = RTE_CRYPTO_SYM_XFORM_AUTH; + + cap = rte_cryptodev_sym_capability_get(env.dev_id, &cap_idx); + if (!cap) { + RTE_LOG(ERR, USER1, "Failed to get capability for cdev %u\n", + env.dev_id); + return -EINVAL; + } + + if (rte_cryptodev_sym_capability_check_auth(cap, + auth_xform->key.length, + auth_xform->digest_length, 0) != 0) { + RTE_LOG(ERR, USER1, "PMD %s key length %u IV length %u\n", + info.device_name, auth_xform->key.length, + auth_xform->digest_length); + return -EPERM; + } + + return 0; +} + +static int +prepare_ccm_xform(struct rte_crypto_sym_xform *xform) +{ + const struct rte_cryptodev_symmetric_capability *cap; + struct rte_cryptodev_sym_capability_idx cap_idx; + struct rte_crypto_aead_xform *aead_xform = &xform->aead; + + xform->type = RTE_CRYPTO_SYM_XFORM_AEAD; + + aead_xform->algo = RTE_CRYPTO_AEAD_AES_CCM; + aead_xform->aad_length = vec.aead.aad.len; + aead_xform->digest_length = vec.aead.digest.len; + aead_xform->iv.offset = IV_OFF; + aead_xform->iv.length = vec.iv.len; + aead_xform->key.data = vec.aead.key.val; + aead_xform->key.length = vec.aead.key.len; + aead_xform->op = (info.op == FIPS_TEST_ENC_AUTH_GEN) ? + RTE_CRYPTO_AEAD_OP_ENCRYPT : + RTE_CRYPTO_AEAD_OP_DECRYPT; + + cap_idx.algo.aead = aead_xform->algo; + cap_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; + + cap = rte_cryptodev_sym_capability_get(env.dev_id, &cap_idx); + if (!cap) { + RTE_LOG(ERR, USER1, "Failed to get capability for cdev %u\n", + env.dev_id); + return -EINVAL; + } + + if (rte_cryptodev_sym_capability_check_aead(cap, + aead_xform->key.length, + aead_xform->digest_length, aead_xform->aad_length, + aead_xform->iv.length) != 0) { + RTE_LOG(ERR, USER1, + "PMD %s key_len %u tag_len %u aad_len %u iv_len %u\n", + info.device_name, aead_xform->key.length, + aead_xform->digest_length, + aead_xform->aad_length, + aead_xform->iv.length); + return -EPERM; + } + + return 0; +} + +static void +get_writeback_data(struct fips_val *val) +{ + val->val = rte_pktmbuf_mtod(env.mbuf, uint8_t *); + val->len = rte_pktmbuf_pkt_len(env.mbuf); +} + +static int +fips_run_test(void) +{ + struct rte_crypto_sym_xform xform = {0}; + uint16_t n_deqd; + int ret; + + ret = test_ops.prepare_xform(&xform); + if (ret < 0) + return ret; + + env.sess = rte_cryptodev_sym_session_create(env.mpool); + if (!env.sess) + return -ENOMEM; + + ret = rte_cryptodev_sym_session_init(env.dev_id, + env.sess, &xform, env.mpool); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Init session\n", + ret); + return ret; + } + + ret = test_ops.prepare_op(); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Prepare op\n", + ret); + return ret; + } + + if (rte_cryptodev_enqueue_burst(env.dev_id, 0, &env.op, 1) < 1) { + RTE_LOG(ERR, USER1, "Error: Failed enqueue\n"); + return ret; + } + + do { + struct rte_crypto_op *deqd_op; + + n_deqd = rte_cryptodev_dequeue_burst(env.dev_id, 0, &deqd_op, + 1); + } while (n_deqd == 0); + + vec.status = env.op->status; + + rte_cryptodev_sym_session_clear(env.dev_id, env.sess); + rte_cryptodev_sym_session_free(env.sess); + env.sess = NULL; + + return ret; +} + +static int +fips_generic_test(void) +{ + struct fips_val val; + int ret; + + fips_test_write_one_case(); + + ret = fips_run_test(); + if (ret < 0) { + if (ret == -EPERM) { + fprintf(info.fp_wr, "Bypass\n\n"); + return 0; + } + + return ret; + } + + get_writeback_data(&val); + + switch (info.file_type) { + case FIPS_TYPE_REQ: + case FIPS_TYPE_RSP: + if (info.parse_writeback == NULL) + return -EPERM; + ret = info.parse_writeback(&val); + if (ret < 0) + return ret; + break; + case FIPS_TYPE_FAX: + if (info.kat_check == NULL) + return -EPERM; + ret = info.kat_check(&val); + if (ret < 0) + return ret; + break; + } + + fprintf(info.fp_wr, "\n"); + + return 0; +} + +static int +fips_mct_tdes_test(void) +{ +#define TDES_BLOCK_SIZE 8 +#define TDES_EXTERN_ITER 400 +#define TDES_INTERN_ITER 10000 + struct fips_val val, val_key; + uint8_t prev_out[TDES_BLOCK_SIZE]; + uint8_t prev_prev_out[TDES_BLOCK_SIZE]; + uint8_t prev_in[TDES_BLOCK_SIZE]; + uint32_t i, j, k; + int ret; + + for (i = 0; i < TDES_EXTERN_ITER; i++) { + if (i != 0) + update_info_vec(i); + + fips_test_write_one_case(); + + for (j = 0; j < TDES_INTERN_ITER; j++) { + ret = fips_run_test(); + if (ret < 0) { + if (ret == -EPERM) { + fprintf(info.fp_wr, "Bypass\n"); + return 0; + } + + return ret; + } + + get_writeback_data(&val); + + if (info.op == FIPS_TEST_DEC_AUTH_VERIF) + memcpy(prev_in, vec.ct.val, TDES_BLOCK_SIZE); + + if (j == 0) { + memcpy(prev_out, val.val, TDES_BLOCK_SIZE); + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + memcpy(vec.pt.val, vec.iv.val, + TDES_BLOCK_SIZE); + memcpy(vec.iv.val, val.val, + TDES_BLOCK_SIZE); + } else { + memcpy(vec.iv.val, vec.ct.val, + TDES_BLOCK_SIZE); + memcpy(vec.ct.val, val.val, + TDES_BLOCK_SIZE); + } + continue; + } + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + memcpy(vec.iv.val, val.val, TDES_BLOCK_SIZE); + memcpy(vec.pt.val, prev_out, TDES_BLOCK_SIZE); + } else { + memcpy(vec.iv.val, vec.ct.val, TDES_BLOCK_SIZE); + memcpy(vec.ct.val, val.val, TDES_BLOCK_SIZE); + } + + if (j == TDES_INTERN_ITER - 1) + continue; + + memcpy(prev_out, val.val, TDES_BLOCK_SIZE); + + if (j == TDES_INTERN_ITER - 3) + memcpy(prev_prev_out, val.val, TDES_BLOCK_SIZE); + } + + info.parse_writeback(&val); + fprintf(info.fp_wr, "\n"); + + if (i == TDES_EXTERN_ITER - 1) + continue; + + /** update key */ + memcpy(&val_key, &vec.cipher_auth.key, sizeof(val_key)); + + if (info.interim_info.tdes_data.nb_keys == 0) { + if (memcmp(val_key.val, val_key.val + 8, 8) == 0) + info.interim_info.tdes_data.nb_keys = 1; + else if (memcmp(val_key.val, val_key.val + 16, 8) == 0) + info.interim_info.tdes_data.nb_keys = 2; + else + info.interim_info.tdes_data.nb_keys = 3; + + } + + for (k = 0; k < TDES_BLOCK_SIZE; k++) { + + switch (info.interim_info.tdes_data.nb_keys) { + case 3: + val_key.val[k] ^= val.val[k]; + val_key.val[k + 8] ^= prev_out[k]; + val_key.val[k + 16] ^= prev_prev_out[k]; + break; + case 2: + val_key.val[k] ^= val.val[k]; + val_key.val[k + 8] ^= prev_out[k]; + val_key.val[k + 16] ^= val.val[k]; + break; + default: /* case 1 */ + val_key.val[k] ^= val.val[k]; + val_key.val[k + 8] ^= val.val[k]; + val_key.val[k + 16] ^= val.val[k]; + break; + } + + } + + for (k = 0; k < 24; k++) + val_key.val[k] = (__builtin_popcount(val_key.val[k]) & + 0x1) ? + val_key.val[k] : (val_key.val[k] ^ 0x1); + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + memcpy(vec.iv.val, val.val, TDES_BLOCK_SIZE); + memcpy(vec.pt.val, prev_out, TDES_BLOCK_SIZE); + } else { + memcpy(vec.iv.val, prev_out, TDES_BLOCK_SIZE); + memcpy(vec.ct.val, val.val, TDES_BLOCK_SIZE); + } + } + + return 0; +} + +static int +fips_mct_aes_test(void) +{ +#define AES_BLOCK_SIZE 16 +#define AES_EXTERN_ITER 100 +#define AES_INTERN_ITER 1000 + struct fips_val val, val_key; + uint8_t prev_out[AES_BLOCK_SIZE] = {0}; + uint8_t prev_in[AES_BLOCK_SIZE] = {0}; + uint32_t i, j, k; + int ret; + + for (i = 0; i < AES_EXTERN_ITER; i++) { + if (i != 0) + update_info_vec(i); + + fips_test_write_one_case(); + + for (j = 0; j < AES_INTERN_ITER; j++) { + ret = fips_run_test(); + if (ret < 0) { + if (ret == -EPERM) { + fprintf(info.fp_wr, "Bypass\n"); + return 0; + } + + return ret; + } + + get_writeback_data(&val); + + if (info.op == FIPS_TEST_DEC_AUTH_VERIF) + memcpy(prev_in, vec.ct.val, AES_BLOCK_SIZE); + + if (j == 0) { + memcpy(prev_out, val.val, AES_BLOCK_SIZE); + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + memcpy(vec.pt.val, vec.iv.val, + AES_BLOCK_SIZE); + memcpy(vec.iv.val, val.val, + AES_BLOCK_SIZE); + } else { + memcpy(vec.ct.val, vec.iv.val, + AES_BLOCK_SIZE); + memcpy(vec.iv.val, prev_in, + AES_BLOCK_SIZE); + } + continue; + } + + if (info.op == FIPS_TEST_ENC_AUTH_GEN) { + memcpy(vec.iv.val, val.val, AES_BLOCK_SIZE); + memcpy(vec.pt.val, prev_out, AES_BLOCK_SIZE); + } else { + memcpy(vec.iv.val, prev_in, AES_BLOCK_SIZE); + memcpy(vec.ct.val, prev_out, AES_BLOCK_SIZE); + } + + if (j == AES_INTERN_ITER - 1) + continue; + + memcpy(prev_out, val.val, AES_BLOCK_SIZE); + } + + info.parse_writeback(&val); + fprintf(info.fp_wr, "\n"); + + if (i == AES_EXTERN_ITER - 1) + continue; + + /** update key */ + memcpy(&val_key, &vec.cipher_auth.key, sizeof(val_key)); + for (k = 0; k < vec.cipher_auth.key.len; k++) { + switch (vec.cipher_auth.key.len) { + case 16: + val_key.val[k] ^= val.val[k]; + break; + case 24: + if (k < 8) + val_key.val[k] ^= prev_out[k + 8]; + else + val_key.val[k] ^= val.val[k - 8]; + break; + case 32: + if (k < 16) + val_key.val[k] ^= prev_out[k]; + else + val_key.val[k] ^= val.val[k - 16]; + break; + default: + return -1; + } + } + + if (info.op == FIPS_TEST_DEC_AUTH_VERIF) + memcpy(vec.iv.val, val.val, AES_BLOCK_SIZE); + } + + return 0; +} + +static int +init_test_ops(void) +{ + switch (info.algo) { + case FIPS_TEST_ALGO_AES: + test_ops.prepare_op = prepare_cipher_op; + test_ops.prepare_xform = prepare_aes_xform; + if (info.interim_info.aes_data.test_type == AESAVS_TYPE_MCT) + test_ops.test = fips_mct_aes_test; + else + test_ops.test = fips_generic_test; + break; + case FIPS_TEST_ALGO_HMAC: + test_ops.prepare_op = prepare_auth_op; + test_ops.prepare_xform = prepare_hmac_xform; + test_ops.test = fips_generic_test; + break; + case FIPS_TEST_ALGO_TDES: + test_ops.prepare_op = prepare_cipher_op; + test_ops.prepare_xform = prepare_tdes_xform; + if (info.interim_info.tdes_data.test_type == TDES_MCT) + test_ops.test = fips_mct_tdes_test; + else + test_ops.test = fips_generic_test; + break; + case FIPS_TEST_ALGO_AES_GCM: + test_ops.prepare_op = prepare_aead_op; + test_ops.prepare_xform = prepare_gcm_xform; + test_ops.test = fips_generic_test; + break; + case FIPS_TEST_ALGO_AES_CMAC: + test_ops.prepare_op = prepare_auth_op; + test_ops.prepare_xform = prepare_cmac_xform; + test_ops.test = fips_generic_test; + break; + case FIPS_TEST_ALGO_AES_CCM: + test_ops.prepare_op = prepare_aead_op; + test_ops.prepare_xform = prepare_ccm_xform; + test_ops.test = fips_generic_test; + break; + default: + return -1; + } + + return 0; +} + +static void +print_test_block(void) +{ + uint32_t i; + + for (i = 0; i < info.nb_vec_lines; i++) + printf("%s\n", info.vec[i]); + + printf("\n"); +} + +static int +fips_test_one_file(void) +{ + int fetch_ret = 0, ret; + + + ret = init_test_ops(); + if (ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Init test op\n", ret); + return ret; + } + + while (ret >= 0 && fetch_ret == 0) { + fetch_ret = fips_test_fetch_one_block(); + if (fetch_ret < 0) { + RTE_LOG(ERR, USER1, "Error %i: Fetch block\n", + fetch_ret); + ret = fetch_ret; + goto error_one_case; + } + + if (info.nb_vec_lines == 0) { + if (fetch_ret == -EOF) + break; + + fprintf(info.fp_wr, "\n"); + continue; + } + + ret = fips_test_parse_one_case(); + switch (ret) { + case 0: + ret = test_ops.test(); + if (ret == 0) + break; + RTE_LOG(ERR, USER1, "Error %i: test block\n", + ret); + goto error_one_case; + case 1: + break; + default: + RTE_LOG(ERR, USER1, "Error %i: Parse block\n", + ret); + goto error_one_case; + } + + continue; +error_one_case: + print_test_block(); + } + + fips_test_clear(); + + return ret; + +} diff --git a/examples/fips_validation/meson.build b/examples/fips_validation/meson.build new file mode 100644 index 00000000..498c9ba9 --- /dev/null +++ b/examples/fips_validation/meson.build @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +# meson file, for building this example as part of a main DPDK build. +# +# To build this example as a standalone application with an already-installed +# DPDK instance, use 'make' + +deps += ['cryptodev'] +allow_experimental_apis = true +sources = files( + 'fips_validation_aes.c', + 'fips_validation.c', + 'fips_validation_hmac.c', + 'fips_validation_tdes.c', + 'fips_validation_gcm.c', + 'fips_validation_cmac.c', + 'fips_validation_ccm.c', + 'main.c' +) diff --git a/examples/flow_filtering/flow_blocks.c b/examples/flow_filtering/flow_blocks.c index 4da45928..bae71169 100644 --- a/examples/flow_filtering/flow_blocks.c +++ b/examples/flow_filtering/flow_blocks.c @@ -46,8 +46,6 @@ generate_ipv4_flow(uint16_t port_id, uint16_t rx_q, struct rte_flow_action_queue queue = { .index = rx_q }; struct rte_flow_item_eth eth_spec; struct rte_flow_item_eth eth_mask; - struct rte_flow_item_vlan vlan_spec; - struct rte_flow_item_vlan vlan_mask; struct rte_flow_item_ipv4 ip_spec; struct rte_flow_item_ipv4 ip_mask; int res; @@ -85,17 +83,6 @@ generate_ipv4_flow(uint16_t port_id, uint16_t rx_q, pattern[0].mask = ð_mask; /* - * setting the second level of the pattern (vlan). - * since in this example we just want to get the - * ipv4 we also set this level to allow all. - */ - memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan)); - memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan)); - pattern[1].type = RTE_FLOW_ITEM_TYPE_VLAN; - pattern[1].spec = &vlan_spec; - pattern[1].mask = &vlan_mask; - - /* * setting the third level of the pattern (ip). * in this example this is the level we care about * so we set it according to the parameters. @@ -106,12 +93,12 @@ generate_ipv4_flow(uint16_t port_id, uint16_t rx_q, ip_mask.hdr.dst_addr = dest_mask; ip_spec.hdr.src_addr = htonl(src_ip); ip_mask.hdr.src_addr = src_mask; - pattern[2].type = RTE_FLOW_ITEM_TYPE_IPV4; - pattern[2].spec = &ip_spec; - pattern[2].mask = &ip_mask; + pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4; + pattern[1].spec = &ip_spec; + pattern[1].mask = &ip_mask; /* the final level must be always type end */ - pattern[3].type = RTE_FLOW_ITEM_TYPE_END; + pattern[2].type = RTE_FLOW_ITEM_TYPE_END; res = rte_flow_validate(port_id, &attr, pattern, action, error); if (!res) diff --git a/examples/flow_filtering/main.c b/examples/flow_filtering/main.c index a73d120e..27e287ae 100644 --- a/examples/flow_filtering/main.c +++ b/examples/flow_filtering/main.c @@ -136,6 +136,8 @@ init_port(void) struct rte_eth_rxconf rxq_conf; struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get(port_id, &dev_info); + port_conf.txmode.offloads &= dev_info.rx_offload_capa; printf(":: initializing port: %d\n", port_id); ret = rte_eth_dev_configure(port_id, nr_queues, nr_queues, &port_conf); @@ -145,7 +147,6 @@ init_port(void) ret, port_id); } - rte_eth_dev_info_get(port_id, &dev_info); rxq_conf = dev_info.default_rxconf; rxq_conf.offloads = port_conf.rxmode.offloads; /* only set Rx queues: something we care only so far */ diff --git a/examples/ip_pipeline/cli.c b/examples/ip_pipeline/cli.c index d1e55407..3de62068 100644 --- a/examples/ip_pipeline/cli.c +++ b/examples/ip_pipeline/cli.c @@ -4321,7 +4321,6 @@ cmd_pipeline_table_rule_add(char **tokens, struct table_rule_match m; struct table_rule_action a; char *pipeline_name; - void *data; uint32_t table_id, t0, n_tokens_parsed; int status; @@ -4379,8 +4378,7 @@ cmd_pipeline_table_rule_add(char **tokens, return; } - status = pipeline_table_rule_add(pipeline_name, table_id, - &m, &a, &data); + status = pipeline_table_rule_add(pipeline_name, table_id, &m, &a); if (status) { snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); return; @@ -4409,7 +4407,6 @@ cmd_pipeline_table_rule_add_default(char **tokens, size_t out_size) { struct table_rule_action action; - void *data; char *pipeline_name; uint32_t table_id; int status; @@ -4515,8 +4512,7 @@ cmd_pipeline_table_rule_add_default(char **tokens, status = pipeline_table_rule_add_default(pipeline_name, table_id, - &action, - &data); + &action); if (status) { snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); return; @@ -4525,7 +4521,7 @@ cmd_pipeline_table_rule_add_default(char **tokens, static const char cmd_pipeline_table_rule_add_bulk_help[] = -"pipeline <pipeline_name> table <table_id> rule add bulk <file_name> <n_rules>\n" +"pipeline <pipeline_name> table <table_id> rule add bulk <file_name>\n" "\n" " File <file_name>:\n" " - line format: match <match> action <action>\n"; @@ -4533,8 +4529,7 @@ static const char cmd_pipeline_table_rule_add_bulk_help[] = static int cli_rule_file_process(const char *file_name, size_t line_len_max, - struct table_rule_match *m, - struct table_rule_action *a, + struct table_rule_list **rule_list, uint32_t *n_rules, uint32_t *line_number, char *out, @@ -4546,14 +4541,12 @@ cmd_pipeline_table_rule_add_bulk(char **tokens, char *out, size_t out_size) { - struct table_rule_match *match; - struct table_rule_action *action; - void **data; + struct table_rule_list *list = NULL; char *pipeline_name, *file_name; - uint32_t table_id, n_rules, n_rules_parsed, line_number; + uint32_t table_id, n_rules, n_rules_added, n_rules_not_added, line_number; int status; - if (n_tokens != 9) { + if (n_tokens != 8) { snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]); return; } @@ -4587,68 +4580,33 @@ cmd_pipeline_table_rule_add_bulk(char **tokens, file_name = tokens[7]; - if ((parser_read_uint32(&n_rules, tokens[8]) != 0) || - (n_rules == 0)) { - snprintf(out, out_size, MSG_ARG_INVALID, "n_rules"); - return; - } - - /* Memory allocation. */ - match = calloc(n_rules, sizeof(struct table_rule_match)); - action = calloc(n_rules, sizeof(struct table_rule_action)); - data = calloc(n_rules, sizeof(void *)); - if ((match == NULL) || (action == NULL) || (data == NULL)) { - snprintf(out, out_size, MSG_OUT_OF_MEMORY); - free(data); - free(action); - free(match); - return; - } - - /* Load rule file */ - n_rules_parsed = n_rules; + /* Load rules from file. */ status = cli_rule_file_process(file_name, 1024, - match, - action, - &n_rules_parsed, + &list, + &n_rules, &line_number, out, out_size); if (status) { snprintf(out, out_size, MSG_FILE_ERR, file_name, line_number); - free(data); - free(action); - free(match); - return; - } - if (n_rules_parsed != n_rules) { - snprintf(out, out_size, MSG_FILE_NOT_ENOUGH, file_name); - free(data); - free(action); - free(match); return; } /* Rule bulk add */ status = pipeline_table_rule_add_bulk(pipeline_name, table_id, - match, - action, - data, - &n_rules); + list, + &n_rules_added, + &n_rules_not_added); if (status) { snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); - free(data); - free(action); - free(match); return; } - /* Memory free */ - free(data); - free(action); - free(match); + snprintf(out, out_size, "Added %u rules out of %u.\n", + n_rules_added, + n_rules); } @@ -4781,19 +4739,530 @@ cmd_pipeline_table_rule_delete_default(char **tokens, } } +static void +ether_addr_show(FILE *f, struct ether_addr *addr) +{ + fprintf(f, "%02x:%02x:%02x:%02x:%02x:%02x", + (uint32_t)addr->addr_bytes[0], (uint32_t)addr->addr_bytes[1], + (uint32_t)addr->addr_bytes[2], (uint32_t)addr->addr_bytes[3], + (uint32_t)addr->addr_bytes[4], (uint32_t)addr->addr_bytes[5]); +} + +static void +ipv4_addr_show(FILE *f, uint32_t addr) +{ + fprintf(f, "%u.%u.%u.%u", + addr >> 24, + (addr >> 16) & 0xFF, + (addr >> 8) & 0xFF, + addr & 0xFF); +} + +static void +ipv6_addr_show(FILE *f, uint8_t *addr) +{ + fprintf(f, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:" + "%02x%02x:%02x%02x:%02x%02x:%02x%02x:", + (uint32_t)addr[0], (uint32_t)addr[1], + (uint32_t)addr[2], (uint32_t)addr[3], + (uint32_t)addr[4], (uint32_t)addr[5], + (uint32_t)addr[6], (uint32_t)addr[7], + (uint32_t)addr[8], (uint32_t)addr[9], + (uint32_t)addr[10], (uint32_t)addr[11], + (uint32_t)addr[12], (uint32_t)addr[13], + (uint32_t)addr[14], (uint32_t)addr[15]); +} + +static const char * +policer_action_string(enum rte_table_action_policer action) { + switch (action) { + case RTE_TABLE_ACTION_POLICER_COLOR_GREEN: return "G"; + case RTE_TABLE_ACTION_POLICER_COLOR_YELLOW: return "Y"; + case RTE_TABLE_ACTION_POLICER_COLOR_RED: return "R"; + case RTE_TABLE_ACTION_POLICER_DROP: return "D"; + default: return "?"; + } +} + +static int +table_rule_show(const char *pipeline_name, + uint32_t table_id, + const char *file_name) +{ + struct pipeline *p; + struct table *table; + struct table_rule *rule; + FILE *f = NULL; + uint32_t i; + + /* Check input params. */ + if ((pipeline_name == NULL) || + (file_name == NULL)) + return -1; + + p = pipeline_find(pipeline_name); + if ((p == NULL) || + (table_id >= p->n_tables)) + return -1; + + table = &p->table[table_id]; + + /* Open file. */ + f = fopen(file_name, "w"); + if (f == NULL) + return -1; + + /* Write table rules to file. */ + TAILQ_FOREACH(rule, &table->rules, node) { + struct table_rule_match *m = &rule->match; + struct table_rule_action *a = &rule->action; + + fprintf(f, "match "); + switch (m->match_type) { + case TABLE_ACL: + fprintf(f, "acl priority %u ", + m->match.acl.priority); + + fprintf(f, m->match.acl.ip_version ? "ipv4 " : "ipv6 "); + + if (m->match.acl.ip_version) + ipv4_addr_show(f, m->match.acl.ipv4.sa); + else + ipv6_addr_show(f, m->match.acl.ipv6.sa); + + fprintf(f, "%u", m->match.acl.sa_depth); + + if (m->match.acl.ip_version) + ipv4_addr_show(f, m->match.acl.ipv4.da); + else + ipv6_addr_show(f, m->match.acl.ipv6.da); + + fprintf(f, "%u", m->match.acl.da_depth); + + fprintf(f, "%u %u %u %u %u ", + (uint32_t)m->match.acl.sp0, + (uint32_t)m->match.acl.sp1, + (uint32_t)m->match.acl.dp0, + (uint32_t)m->match.acl.dp1, + (uint32_t)m->match.acl.proto); + break; + + case TABLE_ARRAY: + fprintf(f, "array %u ", + m->match.array.pos); + break; + + case TABLE_HASH: + fprintf(f, "hash raw "); + for (i = 0; i < table->params.match.hash.key_size; i++) + fprintf(f, "%02x", m->match.hash.key[i]); + fprintf(f, " "); + break; + + case TABLE_LPM: + fprintf(f, "lpm "); + + fprintf(f, m->match.lpm.ip_version ? "ipv4 " : "ipv6 "); + + if (m->match.acl.ip_version) + ipv4_addr_show(f, m->match.lpm.ipv4); + else + ipv6_addr_show(f, m->match.lpm.ipv6); + + fprintf(f, "%u ", + (uint32_t)m->match.lpm.depth); + break; + + default: + fprintf(f, "unknown "); + } + + fprintf(f, "action "); + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_FWD)) { + fprintf(f, "fwd "); + switch (a->fwd.action) { + case RTE_PIPELINE_ACTION_DROP: + fprintf(f, "drop "); + break; + + case RTE_PIPELINE_ACTION_PORT: + fprintf(f, "port %u ", a->fwd.id); + break; + + case RTE_PIPELINE_ACTION_PORT_META: + fprintf(f, "meta "); + break; + + case RTE_PIPELINE_ACTION_TABLE: + default: + fprintf(f, "table %u ", a->fwd.id); + } + } + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_LB)) { + fprintf(f, "balance "); + for (i = 0; i < RTE_DIM(a->lb.out); i++) + fprintf(f, "%u ", a->lb.out[i]); + } + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) { + fprintf(f, "mtr "); + for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) + if (a->mtr.tc_mask & (1 << i)) { + struct rte_table_action_mtr_tc_params *p = + &a->mtr.mtr[i]; + enum rte_table_action_policer ga = + p->policer[e_RTE_METER_GREEN]; + enum rte_table_action_policer ya = + p->policer[e_RTE_METER_YELLOW]; + enum rte_table_action_policer ra = + p->policer[e_RTE_METER_RED]; + + fprintf(f, "tc%u meter %u policer g %s y %s r %s ", + i, + a->mtr.mtr[i].meter_profile_id, + policer_action_string(ga), + policer_action_string(ya), + policer_action_string(ra)); + } + } + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_TM)) + fprintf(f, "tm subport %u pipe %u ", + a->tm.subport_id, + a->tm.pipe_id); + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) { + fprintf(f, "encap "); + switch (a->encap.type) { + case RTE_TABLE_ACTION_ENCAP_ETHER: + fprintf(f, "ether "); + ether_addr_show(f, &a->encap.ether.ether.da); + fprintf(f, " "); + ether_addr_show(f, &a->encap.ether.ether.sa); + fprintf(f, " "); + break; + + case RTE_TABLE_ACTION_ENCAP_VLAN: + fprintf(f, "vlan "); + ether_addr_show(f, &a->encap.vlan.ether.da); + fprintf(f, " "); + ether_addr_show(f, &a->encap.vlan.ether.sa); + fprintf(f, " pcp %u dei %u vid %u ", + a->encap.vlan.vlan.pcp, + a->encap.vlan.vlan.dei, + a->encap.vlan.vlan.vid); + break; + + case RTE_TABLE_ACTION_ENCAP_QINQ: + fprintf(f, "qinq "); + ether_addr_show(f, &a->encap.qinq.ether.da); + fprintf(f, " "); + ether_addr_show(f, &a->encap.qinq.ether.sa); + fprintf(f, " pcp %u dei %u vid %u pcp %u dei %u vid %u ", + a->encap.qinq.svlan.pcp, + a->encap.qinq.svlan.dei, + a->encap.qinq.svlan.vid, + a->encap.qinq.cvlan.pcp, + a->encap.qinq.cvlan.dei, + a->encap.qinq.cvlan.vid); + break; + + case RTE_TABLE_ACTION_ENCAP_MPLS: + fprintf(f, "mpls %s ", (a->encap.mpls.unicast) ? + "unicast " : "multicast "); + ether_addr_show(f, &a->encap.mpls.ether.da); + fprintf(f, " "); + ether_addr_show(f, &a->encap.mpls.ether.sa); + fprintf(f, " "); + for (i = 0; i < a->encap.mpls.mpls_count; i++) { + struct rte_table_action_mpls_hdr *l = + &a->encap.mpls.mpls[i]; + + fprintf(f, "label%u %u %u %u ", + i, + l->label, + l->tc, + l->ttl); + } + break; + + case RTE_TABLE_ACTION_ENCAP_PPPOE: + fprintf(f, "pppoe "); + ether_addr_show(f, &a->encap.pppoe.ether.da); + fprintf(f, " "); + ether_addr_show(f, &a->encap.pppoe.ether.sa); + fprintf(f, " %u ", a->encap.pppoe.pppoe.session_id); + break; + + case RTE_TABLE_ACTION_ENCAP_VXLAN: + fprintf(f, "vxlan ether "); + ether_addr_show(f, &a->encap.vxlan.ether.da); + fprintf(f, " "); + ether_addr_show(f, &a->encap.vxlan.ether.sa); + if (table->ap->params.encap.vxlan.vlan) + fprintf(f, " vlan pcp %u dei %u vid %u ", + a->encap.vxlan.vlan.pcp, + a->encap.vxlan.vlan.dei, + a->encap.vxlan.vlan.vid); + if (table->ap->params.encap.vxlan.ip_version) { + fprintf(f, " ipv4 "); + ipv4_addr_show(f, a->encap.vxlan.ipv4.sa); + fprintf(f, " "); + ipv4_addr_show(f, a->encap.vxlan.ipv4.da); + fprintf(f, " %u %u ", + (uint32_t)a->encap.vxlan.ipv4.dscp, + (uint32_t)a->encap.vxlan.ipv4.ttl); + } else { + fprintf(f, " ipv6 "); + ipv6_addr_show(f, a->encap.vxlan.ipv6.sa); + fprintf(f, " "); + ipv6_addr_show(f, a->encap.vxlan.ipv6.da); + fprintf(f, " %u %u %u ", + a->encap.vxlan.ipv6.flow_label, + (uint32_t)a->encap.vxlan.ipv6.dscp, + (uint32_t)a->encap.vxlan.ipv6.hop_limit); + fprintf(f, " udp %u %u vxlan %u ", + a->encap.vxlan.udp.sp, + a->encap.vxlan.udp.dp, + a->encap.vxlan.vxlan.vni); + } + break; + + default: + fprintf(f, "unknown "); + } + } + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_NAT)) { + fprintf(f, "nat %s ", (a->nat.ip_version) ? "ipv4 " : "ipv6 "); + if (a->nat.ip_version) + ipv4_addr_show(f, a->nat.addr.ipv4); + else + ipv6_addr_show(f, a->nat.addr.ipv6); + fprintf(f, " %u ", (uint32_t)(a->nat.port)); + } + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_TTL)) + fprintf(f, "ttl %s ", (a->ttl.decrement) ? "dec" : "keep"); + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_STATS)) + fprintf(f, "stats "); + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_TIME)) + fprintf(f, "time "); + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) + fprintf(f, "sym_crypto "); + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) + fprintf(f, "tag %u ", a->tag.tag); + + if (a->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) + fprintf(f, "decap %u ", a->decap.n); + + /* end */ + fprintf(f, "\n"); + } + + /* Write table default rule to file. */ + if (table->rule_default) { + struct table_rule_action *a = &table->rule_default->action; + + fprintf(f, "# match default action fwd "); + + switch (a->fwd.action) { + case RTE_PIPELINE_ACTION_DROP: + fprintf(f, "drop "); + break; + + case RTE_PIPELINE_ACTION_PORT: + fprintf(f, "port %u ", a->fwd.id); + break; + + case RTE_PIPELINE_ACTION_PORT_META: + fprintf(f, "meta "); + break; + + case RTE_PIPELINE_ACTION_TABLE: + default: + fprintf(f, "table %u ", a->fwd.id); + } + } else + fprintf(f, "# match default action fwd drop "); + + fprintf(f, "\n"); + + /* Close file. */ + fclose(f); + + return 0; +} + +static const char cmd_pipeline_table_rule_show_help[] = +"pipeline <pipeline_name> table <table_id> rule show\n" +" file <file_name>\n"; + +static void +cmd_pipeline_table_rule_show(char **tokens, + uint32_t n_tokens, + char *out, + size_t out_size) +{ + char *file_name = NULL, *pipeline_name; + uint32_t table_id; + int status; + + if (n_tokens != 8) { + snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]); + return; + } + + pipeline_name = tokens[1]; + + if (strcmp(tokens[2], "table") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "table"); + return; + } + + if (parser_read_uint32(&table_id, tokens[3]) != 0) { + snprintf(out, out_size, MSG_ARG_INVALID, "table_id"); + return; + } + + if (strcmp(tokens[4], "rule") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rule"); + return; + } + + if (strcmp(tokens[5], "show") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "show"); + return; + } + + if (strcmp(tokens[6], "file") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "file"); + return; + } + + file_name = tokens[7]; + + status = table_rule_show(pipeline_name, table_id, file_name); + if (status) { + snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); + return; + } +} static const char cmd_pipeline_table_rule_stats_read_help[] = -"pipeline <pipeline_name> table <table_id> rule read stats [clear]\n"; +"pipeline <pipeline_name> table <table_id> rule read stats [clear]\n" +" match <match>\n"; static void cmd_pipeline_table_rule_stats_read(char **tokens, - uint32_t n_tokens __rte_unused, + uint32_t n_tokens, char *out, size_t out_size) { - snprintf(out, out_size, MSG_CMD_UNIMPLEM, tokens[0]); -} + struct table_rule_match m; + struct rte_table_action_stats_counters stats; + char *pipeline_name; + uint32_t table_id, n_tokens_parsed; + int clear = 0, status; + + if (n_tokens < 7) { + snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]); + return; + } + + pipeline_name = tokens[1]; + + if (strcmp(tokens[2], "table") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "table"); + return; + } + + if (parser_read_uint32(&table_id, tokens[3]) != 0) { + snprintf(out, out_size, MSG_ARG_INVALID, "table_id"); + return; + } + + if (strcmp(tokens[4], "rule") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rule"); + return; + } + + if (strcmp(tokens[5], "read") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "read"); + return; + } + + if (strcmp(tokens[6], "stats") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "stats"); + return; + } + + n_tokens -= 7; + tokens += 7; + + /* clear */ + if (n_tokens && (strcmp(tokens[0], "clear") == 0)) { + clear = 1; + + n_tokens--; + tokens++; + } + + /* match */ + if ((n_tokens == 0) || strcmp(tokens[0], "match")) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "match"); + return; + } + + n_tokens_parsed = parse_match(tokens, + n_tokens, + out, + out_size, + &m); + if (n_tokens_parsed == 0) + return; + n_tokens -= n_tokens_parsed; + tokens += n_tokens_parsed; + + /* end */ + if (n_tokens) { + snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]); + return; + } + + /* Read table rule stats. */ + status = pipeline_table_rule_stats_read(pipeline_name, + table_id, + &m, + &stats, + clear); + if (status) { + snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); + return; + } + /* Print stats. */ + if (stats.n_packets_valid && stats.n_bytes_valid) + snprintf(out, out_size, "Packets: %" PRIu64 "; Bytes: %" PRIu64 "\n", + stats.n_packets, + stats.n_bytes); + + if (stats.n_packets_valid && !stats.n_bytes_valid) + snprintf(out, out_size, "Packets: %" PRIu64 "; Bytes: N/A\n", + stats.n_packets); + + if (!stats.n_packets_valid && stats.n_bytes_valid) + snprintf(out, out_size, "Packets: N/A; Bytes: %" PRIu64 "\n", + stats.n_bytes); + + if (!stats.n_packets_valid && !stats.n_bytes_valid) + snprintf(out, out_size, "Packets: N/A ; Bytes: N/A\n"); +} static const char cmd_pipeline_table_meter_profile_add_help[] = "pipeline <pipeline_name> table <table_id> meter profile <meter_profile_id>\n" @@ -5010,15 +5479,98 @@ cmd_pipeline_table_meter_profile_delete(char **tokens, static const char cmd_pipeline_table_rule_meter_read_help[] = -"pipeline <pipeline_name> table <table_id> rule read meter [clear]\n"; +"pipeline <pipeline_name> table <table_id> rule read meter [clear]\n" +" match <match>\n"; static void cmd_pipeline_table_rule_meter_read(char **tokens, - uint32_t n_tokens __rte_unused, + uint32_t n_tokens, char *out, size_t out_size) { - snprintf(out, out_size, MSG_CMD_UNIMPLEM, tokens[0]); + struct table_rule_match m; + struct rte_table_action_mtr_counters stats; + char *pipeline_name; + uint32_t table_id, n_tokens_parsed; + int clear = 0, status; + + if (n_tokens < 7) { + snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]); + return; + } + + pipeline_name = tokens[1]; + + if (strcmp(tokens[2], "table") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "table"); + return; + } + + if (parser_read_uint32(&table_id, tokens[3]) != 0) { + snprintf(out, out_size, MSG_ARG_INVALID, "table_id"); + return; + } + + if (strcmp(tokens[4], "rule") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rule"); + return; + } + + if (strcmp(tokens[5], "read") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "read"); + return; + } + + if (strcmp(tokens[6], "meter") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "meter"); + return; + } + + n_tokens -= 7; + tokens += 7; + + /* clear */ + if (n_tokens && (strcmp(tokens[0], "clear") == 0)) { + clear = 1; + + n_tokens--; + tokens++; + } + + /* match */ + if ((n_tokens == 0) || strcmp(tokens[0], "match")) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "match"); + return; + } + + n_tokens_parsed = parse_match(tokens, + n_tokens, + out, + out_size, + &m); + if (n_tokens_parsed == 0) + return; + n_tokens -= n_tokens_parsed; + tokens += n_tokens_parsed; + + /* end */ + if (n_tokens) { + snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]); + return; + } + + /* Read table rule meter stats. */ + status = pipeline_table_rule_mtr_read(pipeline_name, + table_id, + &m, + &stats, + clear); + if (status) { + snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); + return; + } + + /* Print stats. */ } @@ -5173,17 +5725,188 @@ cmd_pipeline_table_dscp(char **tokens, static const char cmd_pipeline_table_rule_ttl_read_help[] = -"pipeline <pipeline_name> table <table_id> rule read ttl [clear]\n"; +"pipeline <pipeline_name> table <table_id> rule read ttl [clear]\n" +" match <match>\n"; static void cmd_pipeline_table_rule_ttl_read(char **tokens, - uint32_t n_tokens __rte_unused, + uint32_t n_tokens, char *out, size_t out_size) { - snprintf(out, out_size, MSG_CMD_UNIMPLEM, tokens[0]); + struct table_rule_match m; + struct rte_table_action_ttl_counters stats; + char *pipeline_name; + uint32_t table_id, n_tokens_parsed; + int clear = 0, status; + + if (n_tokens < 7) { + snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]); + return; + } + + pipeline_name = tokens[1]; + + if (strcmp(tokens[2], "table") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "table"); + return; + } + + if (parser_read_uint32(&table_id, tokens[3]) != 0) { + snprintf(out, out_size, MSG_ARG_INVALID, "table_id"); + return; + } + + if (strcmp(tokens[4], "rule") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rule"); + return; + } + + if (strcmp(tokens[5], "read") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "read"); + return; + } + + if (strcmp(tokens[6], "ttl") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "ttl"); + return; + } + + n_tokens -= 7; + tokens += 7; + + /* clear */ + if (n_tokens && (strcmp(tokens[0], "clear") == 0)) { + clear = 1; + + n_tokens--; + tokens++; + } + + /* match */ + if ((n_tokens == 0) || strcmp(tokens[0], "match")) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "match"); + return; + } + + n_tokens_parsed = parse_match(tokens, + n_tokens, + out, + out_size, + &m); + if (n_tokens_parsed == 0) + return; + n_tokens -= n_tokens_parsed; + tokens += n_tokens_parsed; + + /* end */ + if (n_tokens) { + snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]); + return; + } + + /* Read table rule TTL stats. */ + status = pipeline_table_rule_ttl_read(pipeline_name, + table_id, + &m, + &stats, + clear); + if (status) { + snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); + return; + } + + /* Print stats. */ + snprintf(out, out_size, "Packets: %" PRIu64 "\n", + stats.n_packets); } +static const char cmd_pipeline_table_rule_time_read_help[] = +"pipeline <pipeline_name> table <table_id> rule read time\n" +" match <match>\n"; + +static void +cmd_pipeline_table_rule_time_read(char **tokens, + uint32_t n_tokens, + char *out, + size_t out_size) +{ + struct table_rule_match m; + char *pipeline_name; + uint64_t timestamp; + uint32_t table_id, n_tokens_parsed; + int status; + + if (n_tokens < 7) { + snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]); + return; + } + + pipeline_name = tokens[1]; + + if (strcmp(tokens[2], "table") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "table"); + return; + } + + if (parser_read_uint32(&table_id, tokens[3]) != 0) { + snprintf(out, out_size, MSG_ARG_INVALID, "table_id"); + return; + } + + if (strcmp(tokens[4], "rule") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rule"); + return; + } + + if (strcmp(tokens[5], "read") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "read"); + return; + } + + if (strcmp(tokens[6], "time") != 0) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "time"); + return; + } + + n_tokens -= 7; + tokens += 7; + + /* match */ + if ((n_tokens == 0) || strcmp(tokens[0], "match")) { + snprintf(out, out_size, MSG_ARG_NOT_FOUND, "match"); + return; + } + + n_tokens_parsed = parse_match(tokens, + n_tokens, + out, + out_size, + &m); + if (n_tokens_parsed == 0) + return; + n_tokens -= n_tokens_parsed; + tokens += n_tokens_parsed; + + /* end */ + if (n_tokens) { + snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]); + return; + } + + /* Read table rule timestamp. */ + status = pipeline_table_rule_time_read(pipeline_name, + table_id, + &m, + ×tamp); + if (status) { + snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]); + return; + } + + /* Print stats. */ + snprintf(out, out_size, "Packets: %" PRIu64 "\n", timestamp); +} static const char cmd_thread_pipeline_enable_help[] = "thread <thread_id> pipeline <pipeline_name> enable\n"; @@ -5308,12 +6031,14 @@ cmd_help(char **tokens, uint32_t n_tokens, char *out, size_t out_size) "\tpipeline table rule add bulk\n" "\tpipeline table rule delete\n" "\tpipeline table rule delete default\n" + "\tpipeline table rule show\n" "\tpipeline table rule stats read\n" "\tpipeline table meter profile add\n" "\tpipeline table meter profile delete\n" "\tpipeline table rule meter read\n" "\tpipeline table dscp\n" "\tpipeline table rule ttl read\n" + "\tpipeline table rule time read\n" "\tthread pipeline enable\n" "\tthread pipeline disable\n\n"); return; @@ -5521,6 +6246,14 @@ cmd_help(char **tokens, uint32_t n_tokens, char *out, size_t out_size) return; } + if ((n_tokens == 4) && + (strcmp(tokens[2], "rule") == 0) && + (strcmp(tokens[3], "show") == 0)) { + snprintf(out, out_size, "\n%s\n", + cmd_pipeline_table_rule_show_help); + return; + } + if ((n_tokens == 5) && (strcmp(tokens[2], "rule") == 0) && (strcmp(tokens[3], "stats") == 0) && @@ -5565,6 +6298,15 @@ cmd_help(char **tokens, uint32_t n_tokens, char *out, size_t out_size) cmd_pipeline_table_rule_ttl_read_help); return; } + + if ((n_tokens == 5) && + (strcmp(tokens[2], "rule") == 0) && + (strcmp(tokens[3], "time") == 0) && + (strcmp(tokens[4], "read") == 0)) { + snprintf(out, out_size, "\n%s\n", + cmd_pipeline_table_rule_time_read_help); + return; + } } if ((n_tokens == 3) && @@ -5816,6 +6558,15 @@ cli_process(char *in, char *out, size_t out_size) return; } + if ((n_tokens >= 6) && + (strcmp(tokens[2], "table") == 0) && + (strcmp(tokens[4], "rule") == 0) && + (strcmp(tokens[5], "show") == 0)) { + cmd_pipeline_table_rule_show(tokens, n_tokens, + out, out_size); + return; + } + if ((n_tokens >= 7) && (strcmp(tokens[2], "table") == 0) && (strcmp(tokens[4], "rule") == 0) && @@ -5873,6 +6624,16 @@ cli_process(char *in, char *out, size_t out_size) out, out_size); return; } + + if ((n_tokens >= 7) && + (strcmp(tokens[2], "table") == 0) && + (strcmp(tokens[4], "rule") == 0) && + (strcmp(tokens[5], "read") == 0) && + (strcmp(tokens[6], "time") == 0)) { + cmd_pipeline_table_rule_time_read(tokens, n_tokens, + out, out_size); + return; + } } if (strcmp(tokens[0], "thread") == 0) { @@ -5952,44 +6713,56 @@ cli_script_process(const char *file_name, static int cli_rule_file_process(const char *file_name, size_t line_len_max, - struct table_rule_match *m, - struct table_rule_action *a, + struct table_rule_list **rule_list, uint32_t *n_rules, uint32_t *line_number, char *out, size_t out_size) { - FILE *f = NULL; + struct table_rule_list *list = NULL; char *line = NULL; - uint32_t rule_id, line_id; + FILE *f = NULL; + uint32_t rule_id = 0, line_id = 0; int status = 0; /* Check input arguments */ if ((file_name == NULL) || (strlen(file_name) == 0) || - (line_len_max == 0)) { - *line_number = 0; - return -EINVAL; + (line_len_max == 0) || + (rule_list == NULL) || + (n_rules == NULL) || + (line_number == NULL) || + (out == NULL)) { + status = -EINVAL; + goto cli_rule_file_process_free; } /* Memory allocation */ + list = malloc(sizeof(struct table_rule_list)); + if (list == NULL) { + status = -ENOMEM; + goto cli_rule_file_process_free; + } + + TAILQ_INIT(list); + line = malloc(line_len_max + 1); if (line == NULL) { - *line_number = 0; - return -ENOMEM; + status = -ENOMEM; + goto cli_rule_file_process_free; } /* Open file */ f = fopen(file_name, "r"); if (f == NULL) { - *line_number = 0; - free(line); - return -EIO; + status = -EIO; + goto cli_rule_file_process_free; } /* Read file */ - for (line_id = 1, rule_id = 0; rule_id < *n_rules; line_id++) { + for (line_id = 1, rule_id = 0; ; line_id++) { char *tokens[CMD_MAX_TOKENS]; + struct table_rule *rule = NULL; uint32_t n_tokens, n_tokens_parsed, t0; /* Read next line from file. */ @@ -6005,7 +6778,7 @@ cli_rule_file_process(const char *file_name, status = parse_tokenize_string(line, tokens, &n_tokens); if (status) { status = -EINVAL; - break; + goto cli_rule_file_process_free; } /* Empty line. */ @@ -6013,15 +6786,24 @@ cli_rule_file_process(const char *file_name, continue; t0 = 0; + /* Rule alloc and insert. */ + rule = calloc(1, sizeof(struct table_rule)); + if (rule == NULL) { + status = -ENOMEM; + goto cli_rule_file_process_free; + } + + TAILQ_INSERT_TAIL(list, rule, node); + /* Rule match. */ n_tokens_parsed = parse_match(tokens + t0, n_tokens - t0, out, out_size, - &m[rule_id]); + &rule->match); if (n_tokens_parsed == 0) { status = -EINVAL; - break; + goto cli_rule_file_process_free; } t0 += n_tokens_parsed; @@ -6030,17 +6812,17 @@ cli_rule_file_process(const char *file_name, n_tokens - t0, out, out_size, - &a[rule_id]); + &rule->action); if (n_tokens_parsed == 0) { status = -EINVAL; - break; + goto cli_rule_file_process_free; } t0 += n_tokens_parsed; /* Line completed. */ if (t0 < n_tokens) { status = -EINVAL; - break; + goto cli_rule_file_process_free; } /* Increment rule count */ @@ -6053,7 +6835,31 @@ cli_rule_file_process(const char *file_name, /* Memory free */ free(line); + *rule_list = list; *n_rules = rule_id; *line_number = line_id; + return 0; + +cli_rule_file_process_free: + *rule_list = NULL; + *n_rules = rule_id; + *line_number = line_id; + + for ( ; ; ) { + struct table_rule *rule; + + rule = TAILQ_FIRST(list); + if (rule == NULL) + break; + + TAILQ_REMOVE(list, rule, node); + free(rule); + } + + if (f) + fclose(f); + free(line); + free(list); + return status; } diff --git a/examples/ip_pipeline/pipeline.c b/examples/ip_pipeline/pipeline.c index b23d6c09..78d590d7 100644 --- a/examples/ip_pipeline/pipeline.c +++ b/examples/ip_pipeline/pipeline.c @@ -1041,7 +1041,95 @@ pipeline_table_create(const char *pipeline_name, memcpy(&table->params, params, sizeof(*params)); table->ap = ap; table->a = action; + TAILQ_INIT(&table->rules); + table->rule_default = NULL; + pipeline->n_tables++; return 0; } + +struct table_rule * +table_rule_find(struct table *table, + struct table_rule_match *match) +{ + struct table_rule *rule; + + TAILQ_FOREACH(rule, &table->rules, node) + if (memcmp(&rule->match, match, sizeof(*match)) == 0) + return rule; + + return NULL; +} + +void +table_rule_add(struct table *table, + struct table_rule *new_rule) +{ + struct table_rule *existing_rule; + + existing_rule = table_rule_find(table, &new_rule->match); + if (existing_rule == NULL) + TAILQ_INSERT_TAIL(&table->rules, new_rule, node); + else { + TAILQ_INSERT_AFTER(&table->rules, existing_rule, new_rule, node); + TAILQ_REMOVE(&table->rules, existing_rule, node); + free(existing_rule); + } +} + +void +table_rule_add_bulk(struct table *table, + struct table_rule_list *list, + uint32_t n_rules) +{ + uint32_t i; + + for (i = 0; i < n_rules; i++) { + struct table_rule *existing_rule, *new_rule; + + new_rule = TAILQ_FIRST(list); + if (new_rule == NULL) + break; + + TAILQ_REMOVE(list, new_rule, node); + + existing_rule = table_rule_find(table, &new_rule->match); + if (existing_rule == NULL) + TAILQ_INSERT_TAIL(&table->rules, new_rule, node); + else { + TAILQ_INSERT_AFTER(&table->rules, existing_rule, new_rule, node); + TAILQ_REMOVE(&table->rules, existing_rule, node); + free(existing_rule); + } + } +} + +void +table_rule_delete(struct table *table, + struct table_rule_match *match) +{ + struct table_rule *rule; + + rule = table_rule_find(table, match); + if (rule == NULL) + return; + + TAILQ_REMOVE(&table->rules, rule, node); + free(rule); +} + +void +table_rule_default_add(struct table *table, + struct table_rule *rule) +{ + free(table->rule_default); + table->rule_default = rule; +} + +void +table_rule_default_delete(struct table *table) +{ + free(table->rule_default); + table->rule_default = NULL; +} diff --git a/examples/ip_pipeline/pipeline.h b/examples/ip_pipeline/pipeline.h index e5b1d5d0..278775c2 100644 --- a/examples/ip_pipeline/pipeline.h +++ b/examples/ip_pipeline/pipeline.h @@ -143,6 +143,10 @@ struct table_params { const char *action_profile_name; }; +struct table_rule; + +TAILQ_HEAD(table_rule_list, table_rule); + struct port_in { struct port_in_params params; struct port_in_action_profile *ap; @@ -153,6 +157,8 @@ struct table { struct table_params params; struct table_action_profile *ap; struct rte_table_action *a; + struct table_rule_list rules; + struct table_rule *rule_default; }; struct pipeline { @@ -286,6 +292,13 @@ struct table_rule_action { struct rte_table_action_decap_params decap; }; +struct table_rule { + TAILQ_ENTRY(table_rule) node; + struct table_rule_match match; + struct table_rule_action action; + void *data; +}; + int pipeline_port_in_stats_read(const char *pipeline_name, uint32_t port_id, @@ -316,22 +329,19 @@ int pipeline_table_rule_add(const char *pipeline_name, uint32_t table_id, struct table_rule_match *match, - struct table_rule_action *action, - void **data); + struct table_rule_action *action); int pipeline_table_rule_add_bulk(const char *pipeline_name, uint32_t table_id, - struct table_rule_match *match, - struct table_rule_action *action, - void **data, - uint32_t *n_rules); + struct table_rule_list *list, + uint32_t *n_rules_added, + uint32_t *n_rules_not_added); int pipeline_table_rule_add_default(const char *pipeline_name, uint32_t table_id, - struct table_rule_action *action, - void **data); + struct table_rule_action *action); int pipeline_table_rule_delete(const char *pipeline_name, @@ -345,7 +355,7 @@ pipeline_table_rule_delete_default(const char *pipeline_name, int pipeline_table_rule_stats_read(const char *pipeline_name, uint32_t table_id, - void *data, + struct table_rule_match *match, struct rte_table_action_stats_counters *stats, int clear); @@ -363,8 +373,7 @@ pipeline_table_mtr_profile_delete(const char *pipeline_name, int pipeline_table_rule_mtr_read(const char *pipeline_name, uint32_t table_id, - void *data, - uint32_t tc_mask, + struct table_rule_match *match, struct rte_table_action_mtr_counters *stats, int clear); @@ -377,8 +386,38 @@ pipeline_table_dscp_table_update(const char *pipeline_name, int pipeline_table_rule_ttl_read(const char *pipeline_name, uint32_t table_id, - void *data, + struct table_rule_match *match, struct rte_table_action_ttl_counters *stats, int clear); +int +pipeline_table_rule_time_read(const char *pipeline_name, + uint32_t table_id, + struct table_rule_match *match, + uint64_t *timestamp); + +struct table_rule * +table_rule_find(struct table *table, + struct table_rule_match *match); + +void +table_rule_add(struct table *table, + struct table_rule *rule); + +void +table_rule_add_bulk(struct table *table, + struct table_rule_list *list, + uint32_t n_rules); + +void +table_rule_delete(struct table *table, + struct table_rule_match *match); + +void +table_rule_default_add(struct table *table, + struct table_rule *rule); + +void +table_rule_default_delete(struct table *table); + #endif /* _INCLUDE_PIPELINE_H_ */ diff --git a/examples/ip_pipeline/thread.c b/examples/ip_pipeline/thread.c index 4bd971fd..272fbbee 100644 --- a/examples/ip_pipeline/thread.c +++ b/examples/ip_pipeline/thread.c @@ -584,6 +584,7 @@ enum pipeline_req_type { PIPELINE_REQ_TABLE_RULE_MTR_READ, PIPELINE_REQ_TABLE_DSCP_TABLE_UPDATE, PIPELINE_REQ_TABLE_RULE_TTL_READ, + PIPELINE_REQ_TABLE_RULE_TIME_READ, PIPELINE_REQ_MAX }; @@ -609,10 +610,7 @@ struct pipeline_msg_req_table_rule_add_default { }; struct pipeline_msg_req_table_rule_add_bulk { - struct table_rule_match *match; - struct table_rule_action *action; - void **data; - uint32_t n_rules; + struct table_rule_list *list; int bulk; }; @@ -650,6 +648,10 @@ struct pipeline_msg_req_table_rule_ttl_read { int clear; }; +struct pipeline_msg_req_table_rule_time_read { + void *data; +}; + struct pipeline_msg_req { enum pipeline_req_type type; uint32_t id; /* Port IN, port OUT or table ID */ @@ -669,6 +671,7 @@ struct pipeline_msg_req { struct pipeline_msg_req_table_rule_mtr_read table_rule_mtr_read; struct pipeline_msg_req_table_dscp_table_update table_dscp_table_update; struct pipeline_msg_req_table_rule_ttl_read table_rule_ttl_read; + struct pipeline_msg_req_table_rule_time_read table_rule_time_read; }; }; @@ -708,6 +711,10 @@ struct pipeline_msg_rsp_table_rule_ttl_read { struct rte_table_action_ttl_counters stats; }; +struct pipeline_msg_rsp_table_rule_time_read { + uint64_t timestamp; +}; + struct pipeline_msg_rsp { int status; @@ -722,6 +729,7 @@ struct pipeline_msg_rsp { struct pipeline_msg_rsp_table_rule_stats_read table_rule_stats_read; struct pipeline_msg_rsp_table_rule_mtr_read table_rule_mtr_read; struct pipeline_msg_rsp_table_rule_ttl_read table_rule_ttl_read; + struct pipeline_msg_rsp_table_rule_time_read table_rule_time_read; }; }; @@ -812,7 +820,7 @@ pipeline_port_in_stats_read(const char *pipeline_name, /* Read response */ status = rsp->status; - if (status) + if (status == 0) memcpy(stats, &rsp->port_in_stats_read.stats, sizeof(*stats)); /* Free response */ @@ -960,7 +968,7 @@ pipeline_port_out_stats_read(const char *pipeline_name, /* Read response */ status = rsp->status; - if (status) + if (status == 0) memcpy(stats, &rsp->port_out_stats_read.stats, sizeof(*stats)); /* Free response */ @@ -1016,7 +1024,7 @@ pipeline_table_stats_read(const char *pipeline_name, /* Read response */ status = rsp->status; - if (status) + if (status == 0) memcpy(stats, &rsp->table_stats_read.stats, sizeof(*stats)); /* Free response */ @@ -1206,23 +1214,145 @@ action_convert(struct rte_table_action *a, struct table_rule_action *action, struct rte_pipeline_table_entry *data); +struct table_ll { + struct rte_pipeline *p; + int table_id; + struct rte_table_action *a; + int bulk_supported; +}; + +static int +table_rule_add_bulk_ll(struct table_ll *table, + struct table_rule_list *list, + uint32_t *n_rules) +{ + union table_rule_match_low_level *match_ll = NULL; + uint8_t *action_ll = NULL; + void **match_ll_ptr = NULL; + struct rte_pipeline_table_entry **action_ll_ptr = NULL; + struct rte_pipeline_table_entry **entries_ptr = NULL; + int *found = NULL; + struct table_rule *rule; + uint32_t n, i; + int status = 0; + + n = 0; + TAILQ_FOREACH(rule, list, node) + n++; + + /* Memory allocation */ + match_ll = calloc(n, sizeof(union table_rule_match_low_level)); + action_ll = calloc(n, TABLE_RULE_ACTION_SIZE_MAX); + + match_ll_ptr = calloc(n, sizeof(void *)); + action_ll_ptr = calloc(n, sizeof(struct rte_pipeline_table_entry *)); + + entries_ptr = calloc(n, sizeof(struct rte_pipeline_table_entry *)); + found = calloc(n, sizeof(int)); + + if (match_ll == NULL || + action_ll == NULL || + match_ll_ptr == NULL || + action_ll_ptr == NULL || + entries_ptr == NULL || + found == NULL) { + status = -ENOMEM; + goto table_rule_add_bulk_ll_free; + } + + /* Init */ + for (i = 0; i < n; i++) { + match_ll_ptr[i] = (void *)&match_ll[i]; + action_ll_ptr[i] = (struct rte_pipeline_table_entry *) + &action_ll[i * TABLE_RULE_ACTION_SIZE_MAX]; + } + + /* Rule (match, action) conversion */ + i = 0; + TAILQ_FOREACH(rule, list, node) { + status = match_convert(&rule->match, match_ll_ptr[i], 1); + if (status) + goto table_rule_add_bulk_ll_free; + + status = action_convert(table->a, &rule->action, action_ll_ptr[i]); + if (status) + goto table_rule_add_bulk_ll_free; + + i++; + } + + /* Add rule (match, action) to table */ + if (table->bulk_supported) { + status = rte_pipeline_table_entry_add_bulk(table->p, + table->table_id, + match_ll_ptr, + action_ll_ptr, + n, + found, + entries_ptr); + if (status) + goto table_rule_add_bulk_ll_free; + } else + for (i = 0; i < n; i++) { + status = rte_pipeline_table_entry_add(table->p, + table->table_id, + match_ll_ptr[i], + action_ll_ptr[i], + &found[i], + &entries_ptr[i]); + if (status) { + if (i == 0) + goto table_rule_add_bulk_ll_free; + + /* No roll-back. */ + status = 0; + n = i; + break; + } + } + + /* Write back to the rule list. */ + i = 0; + TAILQ_FOREACH(rule, list, node) { + if (i >= n) + break; + + rule->data = entries_ptr[i]; + + i++; + } + + *n_rules = n; + + /* Free */ +table_rule_add_bulk_ll_free: + free(found); + free(entries_ptr); + free(action_ll_ptr); + free(match_ll_ptr); + free(action_ll); + free(match_ll); + + return status; +} + int pipeline_table_rule_add(const char *pipeline_name, uint32_t table_id, struct table_rule_match *match, - struct table_rule_action *action, - void **data) + struct table_rule_action *action) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; + struct table_rule *rule; int status; /* Check input params */ if ((pipeline_name == NULL) || (match == NULL) || - (action == NULL) || - (data == NULL)) + (action == NULL)) return -1; p = pipeline_find(pipeline_name); @@ -1232,16 +1362,26 @@ pipeline_table_rule_add(const char *pipeline_name, action_check(action, p, table_id)) return -1; + table = &p->table[table_id]; + + rule = calloc(1, sizeof(struct table_rule)); + if (rule == NULL) + return -1; + + memcpy(&rule->match, match, sizeof(*match)); + memcpy(&rule->action, action, sizeof(*action)); + if (!pipeline_is_running(p)) { - struct rte_table_action *a = p->table[table_id].a; union table_rule_match_low_level match_ll; struct rte_pipeline_table_entry *data_in, *data_out; int key_found; uint8_t *buffer; buffer = calloc(TABLE_RULE_ACTION_SIZE_MAX, sizeof(uint8_t)); - if (buffer == NULL) + if (buffer == NULL) { + free(rule); return -1; + } /* Table match-action rule conversion */ data_in = (struct rte_pipeline_table_entry *)buffer; @@ -1249,12 +1389,14 @@ pipeline_table_rule_add(const char *pipeline_name, status = match_convert(match, &match_ll, 1); if (status) { free(buffer); + free(rule); return -1; } - status = action_convert(a, action, data_in); + status = action_convert(table->a, action, data_in); if (status) { free(buffer); + free(rule); return -1; } @@ -1267,11 +1409,13 @@ pipeline_table_rule_add(const char *pipeline_name, &data_out); if (status) { free(buffer); + free(rule); return -1; } /* Write Response */ - *data = data_out; + rule->data = data_out; + table_rule_add(table, rule); free(buffer); return 0; @@ -1279,8 +1423,10 @@ pipeline_table_rule_add(const char *pipeline_name, /* Allocate request */ req = pipeline_msg_alloc(); - if (req == NULL) + if (req == NULL) { + free(rule); return -1; + } /* Write request */ req->type = PIPELINE_REQ_TABLE_RULE_ADD; @@ -1290,13 +1436,18 @@ pipeline_table_rule_add(const char *pipeline_name, /* Send request and wait for response */ rsp = pipeline_msg_send_recv(p, req); - if (rsp == NULL) + if (rsp == NULL) { + free(rule); return -1; + } /* Read response */ status = rsp->status; - if (status == 0) - *data = rsp->table_rule_add.data; + if (status == 0) { + rule->data = rsp->table_rule_add.data; + table_rule_add(table, rule); + } else + free(rule); /* Free response */ pipeline_msg_free(rsp); @@ -1307,18 +1458,18 @@ pipeline_table_rule_add(const char *pipeline_name, int pipeline_table_rule_add_default(const char *pipeline_name, uint32_t table_id, - struct table_rule_action *action, - void **data) + struct table_rule_action *action) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; + struct table_rule *rule; int status; /* Check input params */ if ((pipeline_name == NULL) || - (action == NULL) || - (data == NULL)) + (action == NULL)) return -1; p = pipeline_find(pipeline_name); @@ -1327,13 +1478,23 @@ pipeline_table_rule_add_default(const char *pipeline_name, action_default_check(action, p, table_id)) return -1; + table = &p->table[table_id]; + + rule = calloc(1, sizeof(struct table_rule)); + if (rule == NULL) + return -1; + + memcpy(&rule->action, action, sizeof(*action)); + if (!pipeline_is_running(p)) { struct rte_pipeline_table_entry *data_in, *data_out; uint8_t *buffer; buffer = calloc(TABLE_RULE_ACTION_SIZE_MAX, sizeof(uint8_t)); - if (buffer == NULL) + if (buffer == NULL) { + free(rule); return -1; + } /* Apply actions */ data_in = (struct rte_pipeline_table_entry *)buffer; @@ -1351,11 +1512,13 @@ pipeline_table_rule_add_default(const char *pipeline_name, &data_out); if (status) { free(buffer); + free(rule); return -1; } /* Write Response */ - *data = data_out; + rule->data = data_out; + table_rule_default_add(table, rule); free(buffer); return 0; @@ -1363,8 +1526,10 @@ pipeline_table_rule_add_default(const char *pipeline_name, /* Allocate request */ req = pipeline_msg_alloc(); - if (req == NULL) + if (req == NULL) { + free(rule); return -1; + } /* Write request */ req->type = PIPELINE_REQ_TABLE_RULE_ADD_DEFAULT; @@ -1373,13 +1538,18 @@ pipeline_table_rule_add_default(const char *pipeline_name, /* Send request and wait for response */ rsp = pipeline_msg_send_recv(p, req); - if (rsp == NULL) + if (rsp == NULL) { + free(rule); return -1; + } /* Read response */ status = rsp->status; - if (status == 0) - *data = rsp->table_rule_add_default.data; + if (status == 0) { + rule->data = rsp->table_rule_add_default.data; + table_rule_default_add(table, rule); + } else + free(rule); /* Free response */ pipeline_msg_free(rsp); @@ -1387,156 +1557,119 @@ pipeline_table_rule_add_default(const char *pipeline_name, return status; } +static uint32_t +table_rule_list_free(struct table_rule_list *list) +{ + uint32_t n = 0; + + if (!list) + return 0; + + for ( ; ; ) { + struct table_rule *rule; + + rule = TAILQ_FIRST(list); + if (rule == NULL) + break; + + TAILQ_REMOVE(list, rule, node); + free(rule); + n++; + } + + free(list); + return n; +} + int pipeline_table_rule_add_bulk(const char *pipeline_name, uint32_t table_id, - struct table_rule_match *match, - struct table_rule_action *action, - void **data, - uint32_t *n_rules) + struct table_rule_list *list, + uint32_t *n_rules_added, + uint32_t *n_rules_not_added) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; - uint32_t i; - int status; + struct table_rule *rule; + int status = 0; /* Check input params */ if ((pipeline_name == NULL) || - (match == NULL) || - (action == NULL) || - (data == NULL) || - (n_rules == NULL) || - (*n_rules == 0)) - return -1; + (list == NULL) || + TAILQ_EMPTY(list) || + (n_rules_added == NULL) || + (n_rules_not_added == NULL)) { + table_rule_list_free(list); + return -EINVAL; + } p = pipeline_find(pipeline_name); if ((p == NULL) || - (table_id >= p->n_tables)) - return -1; - - for (i = 0; i < *n_rules; i++) - if (match_check(match, p, table_id) || - action_check(action, p, table_id)) - return -1; - - if (!pipeline_is_running(p)) { - struct rte_table_action *a = p->table[table_id].a; - union table_rule_match_low_level *match_ll; - uint8_t *action_ll; - void **match_ll_ptr; - struct rte_pipeline_table_entry **action_ll_ptr; - struct rte_pipeline_table_entry **entries_ptr = - (struct rte_pipeline_table_entry **)data; - uint32_t bulk = - (p->table[table_id].params.match_type == TABLE_ACL) ? 1 : 0; - int *found; - - /* Memory allocation */ - match_ll = calloc(*n_rules, sizeof(union table_rule_match_low_level)); - action_ll = calloc(*n_rules, TABLE_RULE_ACTION_SIZE_MAX); - match_ll_ptr = calloc(*n_rules, sizeof(void *)); - action_ll_ptr = - calloc(*n_rules, sizeof(struct rte_pipeline_table_entry *)); - found = calloc(*n_rules, sizeof(int)); - - if (match_ll == NULL || - action_ll == NULL || - match_ll_ptr == NULL || - action_ll_ptr == NULL || - found == NULL) - goto fail; - - for (i = 0; i < *n_rules; i++) { - match_ll_ptr[i] = (void *)&match_ll[i]; - action_ll_ptr[i] = - (struct rte_pipeline_table_entry *)&action_ll[i * TABLE_RULE_ACTION_SIZE_MAX]; - } + (table_id >= p->n_tables)) { + table_rule_list_free(list); + return -EINVAL; + } - /* Rule match conversion */ - for (i = 0; i < *n_rules; i++) { - status = match_convert(&match[i], match_ll_ptr[i], 1); - if (status) - goto fail; - } + table = &p->table[table_id]; - /* Rule action conversion */ - for (i = 0; i < *n_rules; i++) { - status = action_convert(a, &action[i], action_ll_ptr[i]); - if (status) - goto fail; + TAILQ_FOREACH(rule, list, node) + if (match_check(&rule->match, p, table_id) || + action_check(&rule->action, p, table_id)) { + table_rule_list_free(list); + return -EINVAL; } - /* Add rule (match, action) to table */ - if (bulk) { - status = rte_pipeline_table_entry_add_bulk(p->p, - table_id, - match_ll_ptr, - action_ll_ptr, - *n_rules, - found, - entries_ptr); - if (status) - *n_rules = 0; - } else { - for (i = 0; i < *n_rules; i++) { - status = rte_pipeline_table_entry_add(p->p, - table_id, - match_ll_ptr[i], - action_ll_ptr[i], - &found[i], - &entries_ptr[i]); - if (status) { - *n_rules = i; - break; - } - } + if (!pipeline_is_running(p)) { + struct table_ll table_ll = { + .p = p->p, + .table_id = table_id, + .a = table->a, + .bulk_supported = table->params.match_type == TABLE_ACL, + }; + + status = table_rule_add_bulk_ll(&table_ll, list, n_rules_added); + if (status) { + table_rule_list_free(list); + return status; } - /* Free */ - free(found); - free(action_ll_ptr); - free(match_ll_ptr); - free(action_ll); - free(match_ll); - - return status; - -fail: - free(found); - free(action_ll_ptr); - free(match_ll_ptr); - free(action_ll); - free(match_ll); - - *n_rules = 0; - return -1; + table_rule_add_bulk(table, list, *n_rules_added); + *n_rules_not_added = table_rule_list_free(list); + return 0; } /* Allocate request */ req = pipeline_msg_alloc(); - if (req == NULL) - return -1; + if (req == NULL) { + table_rule_list_free(list); + return -ENOMEM; + } /* Write request */ req->type = PIPELINE_REQ_TABLE_RULE_ADD_BULK; req->id = table_id; - req->table_rule_add_bulk.match = match; - req->table_rule_add_bulk.action = action; - req->table_rule_add_bulk.data = data; - req->table_rule_add_bulk.n_rules = *n_rules; - req->table_rule_add_bulk.bulk = - (p->table[table_id].params.match_type == TABLE_ACL) ? 1 : 0; + req->table_rule_add_bulk.list = list; + req->table_rule_add_bulk.bulk = table->params.match_type == TABLE_ACL; /* Send request and wait for response */ rsp = pipeline_msg_send_recv(p, req); - if (rsp == NULL) - return -1; + if (rsp == NULL) { + table_rule_list_free(list); + return -ENOMEM; + } /* Read response */ status = rsp->status; - if (status == 0) - *n_rules = rsp->table_rule_add_bulk.n_rules; + if (status == 0) { + *n_rules_added = rsp->table_rule_add_bulk.n_rules; + + table_rule_add_bulk(table, list, *n_rules_added); + *n_rules_not_added = table_rule_list_free(list); + } else + table_rule_list_free(list); + /* Free response */ pipeline_msg_free(rsp); @@ -1550,6 +1683,7 @@ pipeline_table_rule_delete(const char *pipeline_name, struct table_rule_match *match) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; int status; @@ -1565,6 +1699,8 @@ pipeline_table_rule_delete(const char *pipeline_name, match_check(match, p, table_id)) return -1; + table = &p->table[table_id]; + if (!pipeline_is_running(p)) { union table_rule_match_low_level match_ll; int key_found; @@ -1579,6 +1715,9 @@ pipeline_table_rule_delete(const char *pipeline_name, &key_found, NULL); + if (status == 0) + table_rule_delete(table, match); + return status; } @@ -1599,6 +1738,8 @@ pipeline_table_rule_delete(const char *pipeline_name, /* Read response */ status = rsp->status; + if (status == 0) + table_rule_delete(table, match); /* Free response */ pipeline_msg_free(rsp); @@ -1611,6 +1752,7 @@ pipeline_table_rule_delete_default(const char *pipeline_name, uint32_t table_id) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; int status; @@ -1624,11 +1766,16 @@ pipeline_table_rule_delete_default(const char *pipeline_name, (table_id >= p->n_tables)) return -1; + table = &p->table[table_id]; + if (!pipeline_is_running(p)) { status = rte_pipeline_table_default_entry_delete(p->p, table_id, NULL); + if (status == 0) + table_rule_default_delete(table); + return status; } @@ -1648,6 +1795,8 @@ pipeline_table_rule_delete_default(const char *pipeline_name, /* Read response */ status = rsp->status; + if (status == 0) + table_rule_default_delete(table); /* Free response */ pipeline_msg_free(rsp); @@ -1658,31 +1807,37 @@ pipeline_table_rule_delete_default(const char *pipeline_name, int pipeline_table_rule_stats_read(const char *pipeline_name, uint32_t table_id, - void *data, + struct table_rule_match *match, struct rte_table_action_stats_counters *stats, int clear) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; + struct table_rule *rule; int status; /* Check input params */ if ((pipeline_name == NULL) || - (data == NULL) || + (match == NULL) || (stats == NULL)) return -1; p = pipeline_find(pipeline_name); if ((p == NULL) || - (table_id >= p->n_tables)) + (table_id >= p->n_tables) || + match_check(match, p, table_id)) return -1; - if (!pipeline_is_running(p)) { - struct rte_table_action *a = p->table[table_id].a; + table = &p->table[table_id]; + rule = table_rule_find(table, match); + if (rule == NULL) + return -1; - status = rte_table_action_stats_read(a, - data, + if (!pipeline_is_running(p)) { + status = rte_table_action_stats_read(table->a, + rule->data, stats, clear); @@ -1697,7 +1852,7 @@ pipeline_table_rule_stats_read(const char *pipeline_name, /* Write request */ req->type = PIPELINE_REQ_TABLE_RULE_STATS_READ; req->id = table_id; - req->table_rule_stats_read.data = data; + req->table_rule_stats_read.data = rule->data; req->table_rule_stats_read.clear = clear; /* Send request and wait for response */ @@ -1707,7 +1862,7 @@ pipeline_table_rule_stats_read(const char *pipeline_name, /* Read response */ status = rsp->status; - if (status) + if (status == 0) memcpy(stats, &rsp->table_rule_stats_read.stats, sizeof(*stats)); /* Free response */ @@ -1827,32 +1982,40 @@ pipeline_table_mtr_profile_delete(const char *pipeline_name, int pipeline_table_rule_mtr_read(const char *pipeline_name, uint32_t table_id, - void *data, - uint32_t tc_mask, + struct table_rule_match *match, struct rte_table_action_mtr_counters *stats, int clear) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; + struct table_rule *rule; + uint32_t tc_mask; int status; /* Check input params */ if ((pipeline_name == NULL) || - (data == NULL) || + (match == NULL) || (stats == NULL)) return -1; p = pipeline_find(pipeline_name); if ((p == NULL) || - (table_id >= p->n_tables)) + (table_id >= p->n_tables) || + match_check(match, p, table_id)) return -1; - if (!pipeline_is_running(p)) { - struct rte_table_action *a = p->table[table_id].a; + table = &p->table[table_id]; + tc_mask = (1 << table->ap->params.mtr.n_tc) - 1; + + rule = table_rule_find(table, match); + if (rule == NULL) + return -1; - status = rte_table_action_meter_read(a, - data, + if (!pipeline_is_running(p)) { + status = rte_table_action_meter_read(table->a, + rule->data, tc_mask, stats, clear); @@ -1868,7 +2031,7 @@ pipeline_table_rule_mtr_read(const char *pipeline_name, /* Write request */ req->type = PIPELINE_REQ_TABLE_RULE_MTR_READ; req->id = table_id; - req->table_rule_mtr_read.data = data; + req->table_rule_mtr_read.data = rule->data; req->table_rule_mtr_read.tc_mask = tc_mask; req->table_rule_mtr_read.clear = clear; @@ -1879,7 +2042,7 @@ pipeline_table_rule_mtr_read(const char *pipeline_name, /* Read response */ status = rsp->status; - if (status) + if (status == 0) memcpy(stats, &rsp->table_rule_mtr_read.stats, sizeof(*stats)); /* Free response */ @@ -1948,31 +2111,40 @@ pipeline_table_dscp_table_update(const char *pipeline_name, int pipeline_table_rule_ttl_read(const char *pipeline_name, uint32_t table_id, - void *data, + struct table_rule_match *match, struct rte_table_action_ttl_counters *stats, int clear) { struct pipeline *p; + struct table *table; struct pipeline_msg_req *req; struct pipeline_msg_rsp *rsp; + struct table_rule *rule; int status; /* Check input params */ if ((pipeline_name == NULL) || - (data == NULL) || + (match == NULL) || (stats == NULL)) return -1; p = pipeline_find(pipeline_name); if ((p == NULL) || - (table_id >= p->n_tables)) + (table_id >= p->n_tables) || + match_check(match, p, table_id)) return -1; - if (!pipeline_is_running(p)) { - struct rte_table_action *a = p->table[table_id].a; + table = &p->table[table_id]; + if (!table->ap->params.ttl.n_packets_enabled) + return -1; - status = rte_table_action_ttl_read(a, - data, + rule = table_rule_find(table, match); + if (rule == NULL) + return -1; + + if (!pipeline_is_running(p)) { + status = rte_table_action_ttl_read(table->a, + rule->data, stats, clear); @@ -1987,7 +2159,7 @@ pipeline_table_rule_ttl_read(const char *pipeline_name, /* Write request */ req->type = PIPELINE_REQ_TABLE_RULE_TTL_READ; req->id = table_id; - req->table_rule_ttl_read.data = data; + req->table_rule_ttl_read.data = rule->data; req->table_rule_ttl_read.clear = clear; /* Send request and wait for response */ @@ -1997,7 +2169,7 @@ pipeline_table_rule_ttl_read(const char *pipeline_name, /* Read response */ status = rsp->status; - if (status) + if (status == 0) memcpy(stats, &rsp->table_rule_ttl_read.stats, sizeof(*stats)); /* Free response */ @@ -2006,6 +2178,71 @@ pipeline_table_rule_ttl_read(const char *pipeline_name, return status; } +int +pipeline_table_rule_time_read(const char *pipeline_name, + uint32_t table_id, + struct table_rule_match *match, + uint64_t *timestamp) +{ + struct pipeline *p; + struct table *table; + struct pipeline_msg_req *req; + struct pipeline_msg_rsp *rsp; + struct table_rule *rule; + int status; + + /* Check input params */ + if ((pipeline_name == NULL) || + (match == NULL) || + (timestamp == NULL)) + return -1; + + p = pipeline_find(pipeline_name); + if ((p == NULL) || + (table_id >= p->n_tables) || + match_check(match, p, table_id)) + return -1; + + table = &p->table[table_id]; + + rule = table_rule_find(table, match); + if (rule == NULL) + return -1; + + if (!pipeline_is_running(p)) { + status = rte_table_action_time_read(table->a, + rule->data, + timestamp); + + return status; + } + + /* Allocate request */ + req = pipeline_msg_alloc(); + if (req == NULL) + return -1; + + /* Write request */ + req->type = PIPELINE_REQ_TABLE_RULE_TIME_READ; + req->id = table_id; + req->table_rule_time_read.data = rule->data; + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); + if (rsp == NULL) + return -1; + + /* Read response */ + status = rsp->status; + if (status == 0) + *timestamp = rsp->table_rule_time_read.timestamp; + + /* Free response */ + pipeline_msg_free(rsp); + + return status; +} + /** * Data plane threads: message handling */ @@ -2605,107 +2842,32 @@ static struct pipeline_msg_rsp * pipeline_msg_handle_table_rule_add_bulk(struct pipeline_data *p, struct pipeline_msg_req *req) { - struct pipeline_msg_rsp *rsp = (struct pipeline_msg_rsp *) req; uint32_t table_id = req->id; - struct table_rule_match *match = req->table_rule_add_bulk.match; - struct table_rule_action *action = req->table_rule_add_bulk.action; - struct rte_pipeline_table_entry **data = - (struct rte_pipeline_table_entry **)req->table_rule_add_bulk.data; - uint32_t n_rules = req->table_rule_add_bulk.n_rules; + struct table_rule_list *list = req->table_rule_add_bulk.list; uint32_t bulk = req->table_rule_add_bulk.bulk; - struct rte_table_action *a = p->table_data[table_id].a; - union table_rule_match_low_level *match_ll; - uint8_t *action_ll; - void **match_ll_ptr; - struct rte_pipeline_table_entry **action_ll_ptr; - int *found, status; - uint32_t i; - - /* Memory allocation */ - match_ll = calloc(n_rules, sizeof(union table_rule_match_low_level)); - action_ll = calloc(n_rules, TABLE_RULE_ACTION_SIZE_MAX); - match_ll_ptr = calloc(n_rules, sizeof(void *)); - action_ll_ptr = - calloc(n_rules, sizeof(struct rte_pipeline_table_entry *)); - found = calloc(n_rules, sizeof(int)); - - if ((match_ll == NULL) || - (action_ll == NULL) || - (match_ll_ptr == NULL) || - (action_ll_ptr == NULL) || - (found == NULL)) - goto fail; - - for (i = 0; i < n_rules; i++) { - match_ll_ptr[i] = (void *)&match_ll[i]; - action_ll_ptr[i] = - (struct rte_pipeline_table_entry *)&action_ll[i * TABLE_RULE_ACTION_SIZE_MAX]; - } + uint32_t n_rules_added; + int status; - /* Rule match conversion */ - for (i = 0; i < n_rules; i++) { - status = match_convert(&match[i], match_ll_ptr[i], 1); - if (status) - goto fail; - } + struct table_ll table_ll = { + .p = p->p, + .table_id = table_id, + .a = p->table_data[table_id].a, + .bulk_supported = bulk, + }; - /* Rule action conversion */ - for (i = 0; i < n_rules; i++) { - status = action_convert(a, &action[i], action_ll_ptr[i]); - if (status) - goto fail; + status = table_rule_add_bulk_ll(&table_ll, list, &n_rules_added); + if (status) { + rsp->status = -1; + rsp->table_rule_add_bulk.n_rules = 0; + return rsp; } - /* Add rule (match, action) to table */ - if (bulk) { - status = rte_pipeline_table_entry_add_bulk(p->p, - table_id, - match_ll_ptr, - action_ll_ptr, - n_rules, - found, - data); - if (status) - n_rules = 0; - } else - for (i = 0; i < n_rules; i++) { - status = rte_pipeline_table_entry_add(p->p, - table_id, - match_ll_ptr[i], - action_ll_ptr[i], - &found[i], - &data[i]); - if (status) { - n_rules = i; - break; - } - } - /* Write response */ rsp->status = 0; - rsp->table_rule_add_bulk.n_rules = n_rules; - - /* Free */ - free(found); - free(action_ll_ptr); - free(match_ll_ptr); - free(action_ll); - free(match_ll); - - return rsp; - -fail: - free(found); - free(action_ll_ptr); - free(match_ll_ptr); - free(action_ll); - free(match_ll); - - rsp->status = -1; - rsp->table_rule_add_bulk.n_rules = 0; + rsp->table_rule_add_bulk.n_rules = n_rules_added; return rsp; } @@ -2856,6 +3018,22 @@ pipeline_msg_handle_table_rule_ttl_read(struct pipeline_data *p, return rsp; } +static struct pipeline_msg_rsp * +pipeline_msg_handle_table_rule_time_read(struct pipeline_data *p, + struct pipeline_msg_req *req) +{ + struct pipeline_msg_rsp *rsp = (struct pipeline_msg_rsp *) req; + uint32_t table_id = req->id; + void *data = req->table_rule_time_read.data; + struct rte_table_action *a = p->table_data[table_id].a; + + rsp->status = rte_table_action_time_read(a, + data, + &rsp->table_rule_time_read.timestamp); + + return rsp; +} + static void pipeline_msg_handle(struct pipeline_data *p) { @@ -2932,6 +3110,10 @@ pipeline_msg_handle(struct pipeline_data *p) rsp = pipeline_msg_handle_table_rule_ttl_read(p, req); break; + case PIPELINE_REQ_TABLE_RULE_TIME_READ: + rsp = pipeline_msg_handle_table_rule_time_read(p, req); + break; + default: rsp = (struct pipeline_msg_rsp *) req; rsp->status = -1; diff --git a/examples/multi_process/client_server_mp/mp_server/main.c b/examples/multi_process/client_server_mp/mp_server/main.c index 93a9a089..0ddc63e9 100644 --- a/examples/multi_process/client_server_mp/mp_server/main.c +++ b/examples/multi_process/client_server_mp/mp_server/main.c @@ -12,6 +12,7 @@ #include <sys/queue.h> #include <errno.h> #include <netinet/ip.h> +#include <signal.h> #include <rte_common.h> #include <rte_memory.h> @@ -264,9 +265,23 @@ do_packet_forwarding(void) } } +static void +signal_handler(int signal) +{ + uint16_t port_id; + + if (signal == SIGINT) + RTE_ETH_FOREACH_DEV(port_id) { + rte_eth_dev_stop(port_id); + rte_eth_dev_close(port_id); + } + exit(0); +} + int main(int argc, char *argv[]) { + signal(SIGINT, signal_handler); /* initialise the system */ if (init(argc, argv) < 0 ) return -1; diff --git a/examples/vm_power_manager/power_manager.c b/examples/vm_power_manager/power_manager.c index b7769c3c..f9e8c0ab 100644 --- a/examples/vm_power_manager/power_manager.c +++ b/examples/vm_power_manager/power_manager.c @@ -95,6 +95,7 @@ power_manager_init(void) unsigned int i, num_cpus = 0, num_freqs = 0; int ret = 0; struct core_info *ci; + unsigned int max_core_num; rte_power_set_env(PM_ENV_ACPI_CPUFREQ); @@ -105,7 +106,12 @@ power_manager_init(void) return -1; } - for (i = 0; i < ci->core_count; i++) { + if (ci->core_count > POWER_MGR_MAX_CPUS) + max_core_num = POWER_MGR_MAX_CPUS; + else + max_core_num = ci->core_count; + + for (i = 0; i < max_core_num; i++) { if (ci->cd[i].global_enabled_cpus) { if (rte_power_init(i) < 0) RTE_LOG(ERR, POWER_MANAGER, @@ -165,6 +171,7 @@ power_manager_exit(void) unsigned int i; int ret = 0; struct core_info *ci; + unsigned int max_core_num; ci = get_core_info(); if (!ci) { @@ -173,7 +180,12 @@ power_manager_exit(void) return -1; } - for (i = 0; i < ci->core_count; i++) { + if (ci->core_count > POWER_MGR_MAX_CPUS) + max_core_num = POWER_MGR_MAX_CPUS; + else + max_core_num = ci->core_count; + + for (i = 0; i < max_core_num; i++) { if (ci->cd[i].global_enabled_cpus) { if (rte_power_exit(i) < 0) { RTE_LOG(ERR, POWER_MANAGER, "Unable to shutdown power manager " diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c index bed66be0..35a0140b 100644 --- a/lib/librte_acl/acl_gen.c +++ b/lib/librte_acl/acl_gen.c @@ -163,7 +163,7 @@ acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { if (bits->bits[n / (sizeof(bits_t) * 8)] & - (1 << (n % (sizeof(bits_t) * 8)))) { + (1U << (n % (sizeof(bits_t) * 8)))) { if (zero_one == 1 && last_bit != 1) ranges++; last_bit = 1; diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c index c663be59..4634c127 100644 --- a/lib/librte_compressdev/rte_comp.c +++ b/lib/librte_compressdev/rte_comp.c @@ -174,7 +174,7 @@ rte_comp_op_alloc(struct rte_mempool *mempool) int retval; retval = rte_comp_op_raw_bulk_alloc(mempool, &op, 1); - if (unlikely(retval < 0)) + if (unlikely(retval != 1)) return NULL; rte_comp_op_reset(op); @@ -186,12 +186,12 @@ int __rte_experimental rte_comp_op_bulk_alloc(struct rte_mempool *mempool, struct rte_comp_op **ops, uint16_t nb_ops) { - int ret; + int retval; uint16_t i; - ret = rte_comp_op_raw_bulk_alloc(mempool, ops, nb_ops); - if (unlikely(ret < nb_ops)) - return ret; + retval = rte_comp_op_raw_bulk_alloc(mempool, ops, nb_ops); + if (unlikely(retval != nb_ops)) + return 0; for (i = 0; i < nb_ops; i++) rte_comp_op_reset(ops[i]); diff --git a/lib/librte_compressdev/rte_compressdev.h b/lib/librte_compressdev/rte_compressdev.h index 5b4fca4d..7b68170a 100644 --- a/lib/librte_compressdev/rte_compressdev.h +++ b/lib/librte_compressdev/rte_compressdev.h @@ -408,6 +408,13 @@ rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id, * @note All compression operations are Out-of-place (OOP) operations, * as the size of the output data is different to the size of the input data. * + * @note The rte_comp_op contains both input and output parameters and is the + * vehicle for the application to pass data into and out of the PMD. While an + * op is inflight, i.e. once it has been enqueued, the private_xform or stream + * attached to it and any mbufs or memory referenced by it should not be altered + * or freed by the application. The PMD may use or change some of this data at + * any time until it has been returned in a dequeue operation. + * * @note The flush flag only applies to operations which return SUCCESS. * In OUT_OF_SPACE cases whether STATEFUL or STATELESS, data in dest buffer * is as if flush flag was FLUSH_NONE. diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index 62e9ed47..5759ec2d 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -186,7 +186,7 @@ err_devarg: return ret; } -int __rte_experimental +int rte_dev_probe(const char *devargs) { struct eal_dev_mp_req req; @@ -322,7 +322,7 @@ local_dev_remove(struct rte_device *dev) return 0; } -int __rte_experimental +int rte_dev_remove(struct rte_device *dev) { struct eal_dev_mp_req req; diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c index 56b492f5..c63a943b 100644 --- a/lib/librte_eal/common/eal_common_errno.c +++ b/lib/librte_eal/common/eal_common_errno.c @@ -2,6 +2,9 @@ * Copyright(c) 2010-2014 Intel Corporation */ +/* Use XSI-compliant portable version of strerror_r() */ +#undef _GNU_SOURCE + #include <stdint.h> #include <stdio.h> #include <string.h> diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 12dcedf5..87fd9921 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -49,7 +49,7 @@ static uint64_t system_page_sz; * Current known limitations are 39 or 40 bits. Setting the starting address * at 4GB implies there are 508GB or 1020GB for mapping the available * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_eal_check_dma_mask for ensuring all + * addressing limitations should call rte_mem_check_dma_mask for ensuring all * memory is within supported range. */ static uint64_t baseaddr = 0x100000000; @@ -446,11 +446,12 @@ check_iova(const struct rte_memseg_list *msl __rte_unused, #endif /* check memseg iovas are within the required range based on dma mask */ -int __rte_experimental -rte_eal_check_dma_mask(uint8_t maskbits) +static int __rte_experimental +check_dma_mask(uint8_t maskbits, bool thread_unsafe) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; uint64_t mask; + int ret; /* sanity check */ if (maskbits > MAX_DMA_MASK_BITS) { @@ -462,7 +463,12 @@ rte_eal_check_dma_mask(uint8_t maskbits) /* create dma mask */ mask = ~((1ULL << maskbits) - 1); - if (rte_memseg_walk(check_iova, &mask)) + if (thread_unsafe) + ret = rte_memseg_walk_thread_unsafe(check_iova, &mask); + else + ret = rte_memseg_walk(check_iova, &mask); + + if (ret) /* * Dma mask precludes hugepage usage. * This device can not be used and we do not need to keep @@ -480,6 +486,34 @@ rte_eal_check_dma_mask(uint8_t maskbits) return 0; } +int __rte_experimental +rte_mem_check_dma_mask(uint8_t maskbits) +{ + return check_dma_mask(maskbits, false); +} + +int __rte_experimental +rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits) +{ + return check_dma_mask(maskbits, true); +} + +/* + * Set dma mask to use when memory initialization is done. + * + * This function should ONLY be used by code executed before the memory + * initialization. PMDs should use rte_mem_check_dma_mask if addressing + * limitations by the device. + */ +void __rte_experimental +rte_mem_set_dma_mask(uint8_t maskbits) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits : + RTE_MIN(mcfg->dma_maskbits, maskbits); +} + /* return the number of memory channels */ unsigned rte_memory_get_nchannel(void) { diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index b82f3ddd..e31eca5c 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -222,7 +222,7 @@ eal_plugin_add(const char *path) return -1; } memset(solib, 0, sizeof(*solib)); - strncpy(solib->name, path, PATH_MAX-1); + strlcpy(solib->name, path, PATH_MAX-1); solib->name[PATH_MAX-1] = 0; TAILQ_INSERT_TAIL(&solib_list, solib, next); diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c index 84f59d95..7c9fcc46 100644 --- a/lib/librte_eal/common/hotplug_mp.c +++ b/lib/librte_eal/common/hotplug_mp.c @@ -243,7 +243,7 @@ static void __handle_primary_request(void *param) da = calloc(1, sizeof(*da)); if (da == NULL) { ret = -ENOMEM; - goto quit; + break; } ret = rte_devargs_parse(da, req->devargs); @@ -266,6 +266,8 @@ static void __handle_primary_request(void *param) ret = local_dev_remove(dev); quit: + free(da->args); + free(da); break; default: ret = -EINVAL; @@ -355,6 +357,7 @@ int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req) resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param; req->result = resp->result; + free(mp_reply.msgs); return ret; } @@ -379,6 +382,7 @@ int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req) if (mp_reply.nb_sent != mp_reply.nb_received) { RTE_LOG(ERR, EAL, "not all secondary reply\n"); + free(mp_reply.msgs); return -1; } @@ -397,6 +401,7 @@ int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req) } } + free(mp_reply.msgs); return 0; } diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index cd6c187c..a9724dc9 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -196,9 +196,6 @@ int rte_eal_hotplug_add(const char *busname, const char *devname, const char *drvargs); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Add matching devices. * * In multi-process, it will request other processes to add the same device. @@ -209,7 +206,7 @@ int rte_eal_hotplug_add(const char *busname, const char *devname, * @return * 0 on success, negative on error. */ -int __rte_experimental rte_dev_probe(const char *devargs); +int rte_dev_probe(const char *devargs); /** * Hotplug remove a given device from a specific bus. @@ -227,9 +224,6 @@ int __rte_experimental rte_dev_probe(const char *devargs); int rte_eal_hotplug_remove(const char *busname, const char *devname); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Remove one device. * * In multi-process, it will request other processes to remove the same device. @@ -240,7 +234,7 @@ int rte_eal_hotplug_remove(const char *busname, const char *devname); * @return * 0 on success, negative on error. */ -int __rte_experimental rte_dev_remove(struct rte_device *dev); +int rte_dev_remove(struct rte_device *dev); /** * Device comparison function. diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index ce937058..d970825d 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -463,8 +463,45 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); -/* check memsegs iovas are within a range based on dma mask */ -int __rte_experimental rte_eal_check_dma_mask(uint8_t maskbits); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Check if all currently allocated memory segments are compliant with + * supplied DMA address width. + * + * @param maskbits + * Address width to check against. + */ +int __rte_experimental rte_mem_check_dma_mask(uint8_t maskbits); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Check if all currently allocated memory segments are compliant with + * supplied DMA address width. This function will use + * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying + * memory_hotplug_lock will not be acquired avoiding deadlock during + * memory initialization. + * + * This function is just for EAL core memory internal use. Drivers should + * use the previous rte_mem_check_dma_mask. + * + * @param maskbits + * Address width to check against. + */ +int __rte_experimental rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Set dma mask to use once memory initialization is done. Previous functions + * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be + * used safely until memory has been initialized. + */ +void __rte_experimental rte_mem_set_dma_mask(uint8_t maskbits); /** * Drivers based on uio will not load unless physical diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 412ed2db..80c516d3 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -49,7 +49,7 @@ extern "C" { * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 1 +#define RTE_VER_RELEASE 2 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 1973b6e6..c6a6d4f6 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -294,7 +294,6 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, size_t alloc_sz; int allocd_pages; void *ret, *map_addr; - uint64_t mask; alloc_sz = (size_t)pg_sz * n_segs; @@ -322,14 +321,44 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, goto fail; } - if (mcfg->dma_maskbits) { - mask = ~((1ULL << mcfg->dma_maskbits) - 1); - if (rte_eal_check_dma_mask(mask)) { + /* + * Once we have all the memseg lists configured, if there is a dma mask + * set, check iova addresses are not out of range. Otherwise the device + * setting the dma mask could have problems with the mapped memory. + * + * There are two situations when this can happen: + * 1) memory initialization + * 2) dynamic memory allocation + * + * For 1), an error when checking dma mask implies app can not be + * executed. For 2) implies the new memory can not be added. + */ + if (mcfg->dma_maskbits && + rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { + /* + * Currently this can only happen if IOMMU is enabled + * and the address width supported by the IOMMU hw is + * not enough for using the memory mapped IOVAs. + * + * If IOVA is VA, advice to try with '--iova-mode pa' + * which could solve some situations when IOVA VA is not + * really needed. + */ + RTE_LOG(ERR, EAL, + "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n", + __func__); + + /* + * If IOVA is VA and it is possible to run with IOVA PA, + * because user is root, give and advice for solving the + * problem. + */ + if ((rte_eal_iova_mode() == RTE_IOVA_VA) && + rte_eal_using_phys_addrs()) RTE_LOG(ERR, EAL, - "%s(): couldn't allocate memory due to DMA mask\n", + "%s(): Please try initializing EAL with --iova-mode=pa parameter\n", __func__); - goto fail; - } + goto fail; } /* add newly minted memsegs to malloc heap */ diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 9e61dc41..0da5ad5e 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -349,8 +349,7 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len, strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == RTE_HEAP_NAME_MAX_LEN) { rte_errno = EINVAL; - ret = -1; - goto unlock; + return -1; } rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index 8767c722..0f3695c4 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -795,6 +795,9 @@ rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, return; } + if (f == NULL) + return; + fprintf(f, " %s: stats %d\tcalls %"PRIu64"\tcycles %" PRIu64"\tavg: %"PRIu64"\n", s->spec.name, service_stats_enabled(s), s->calls, diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 39252a88..cbac451e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -700,7 +700,7 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) bool call = false; int n, bytes_read; struct rte_intr_source *src; - struct rte_intr_callback *cb; + struct rte_intr_callback *cb, *next; union rte_intr_read_buffer buf; struct rte_intr_callback active_cb; @@ -780,6 +780,23 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) "descriptor %d: %s\n", events[n].data.fd, strerror(errno)); + /* + * The device is unplugged or buggy, remove + * it as an interrupt source and return to + * force the wait list to be rebuilt. + */ + rte_spinlock_lock(&intr_lock); + TAILQ_REMOVE(&intr_sources, src, next); + rte_spinlock_unlock(&intr_lock); + + for (cb = TAILQ_FIRST(&src->callbacks); cb; + cb = next) { + next = TAILQ_NEXT(cb, next); + TAILQ_REMOVE(&src->callbacks, cb, next); + free(cb); + } + free(src); + return -1; } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index fce86fda..c1b5e079 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1393,6 +1393,18 @@ eal_legacy_hugepage_init(void) addr = RTE_PTR_ADD(addr, (size_t)page_sz); } + if (mcfg->dma_maskbits && + rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { + RTE_LOG(ERR, EAL, + "%s(): couldnt allocate memory due to IOVA exceeding limits of current DMA mask.\n", + __func__); + if (rte_eal_iova_mode() == RTE_IOVA_VA && + rte_eal_using_phys_addrs()) + RTE_LOG(ERR, EAL, + "%s(): Please try initializing EAL with --iova-mode=pa parameter.\n", + __func__); + goto fail; + } return 0; } @@ -1628,6 +1640,14 @@ eal_legacy_hugepage_init(void) rte_fbarray_destroy(&msl->memseg_arr); } + if (mcfg->dma_maskbits && + rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { + RTE_LOG(ERR, EAL, + "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n", + __func__); + goto fail; + } + return 0; fail: diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 04f62424..3fe78260 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -259,6 +259,8 @@ DPDK_18.08 { DPDK_18.11 { global: + rte_dev_probe; + rte_dev_remove; rte_eal_get_runtime_dir; rte_eal_hotplug_add; rte_eal_hotplug_remove; @@ -285,8 +287,6 @@ EXPERIMENTAL { rte_dev_is_probed; rte_dev_iterator_init; rte_dev_iterator_next; - rte_dev_probe; - rte_dev_remove; rte_devargs_add; rte_devargs_dump; rte_devargs_insert; @@ -295,7 +295,6 @@ EXPERIMENTAL { rte_devargs_parsef; rte_devargs_remove; rte_devargs_type_count; - rte_eal_check_dma_mask; rte_eal_cleanup; rte_fbarray_attach; rte_fbarray_destroy; @@ -331,9 +330,12 @@ EXPERIMENTAL { rte_malloc_heap_socket_is_external; rte_mem_alloc_validator_register; rte_mem_alloc_validator_unregister; + rte_mem_check_dma_mask; + rte_mem_check_dma_mask_thread_unsafe; rte_mem_event_callback_register; rte_mem_event_callback_unregister; rte_mem_iova2virt; + rte_mem_set_dma_mask; rte_mem_virt2memseg; rte_mem_virt2memseg_list; rte_memseg_contig_walk; diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index 9d348138..8eaa5fcc 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -187,7 +187,7 @@ enum { STAT_QMAP_RX }; -int __rte_experimental +int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str) { int ret; @@ -288,7 +288,7 @@ error: return ret; } -uint16_t __rte_experimental +uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter) { if (iter->cls == NULL) /* invalid ethdev iterator */ @@ -317,7 +317,7 @@ rte_eth_iterator_next(struct rte_dev_iterator *iter) return RTE_MAX_ETHPORTS; } -void __rte_experimental +void rte_eth_iterator_cleanup(struct rte_dev_iterator *iter) { if (iter->bus_str == NULL) @@ -3647,11 +3647,10 @@ rte_eth_dev_destroy(struct rte_eth_dev *ethdev, return -ENODEV; RTE_FUNC_PTR_OR_ERR_RET(*ethdev_uninit, -EINVAL); - if (ethdev_uninit) { - ret = ethdev_uninit(ethdev); - if (ret) - return ret; - } + + ret = ethdev_uninit(ethdev); + if (ret) + return ret; return rte_eth_dev_release_port(ethdev); } diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h index 769a6943..8a92d91e 100644 --- a/lib/librte_ethdev/rte_ethdev.h +++ b/lib/librte_ethdev/rte_ethdev.h @@ -167,9 +167,6 @@ extern int rte_eth_dev_logtype; struct rte_mbuf; /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * * Initializes a device iterator. * * This iterator allows accessing a list of devices matching some devargs. @@ -185,13 +182,9 @@ struct rte_mbuf; * @return * 0 on successful initialization, negative otherwise. */ -__rte_experimental int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * * Iterates on devices with devargs filter. * The ownership is not checked. * @@ -205,13 +198,9 @@ int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs); * @return * A port id if found, RTE_MAX_ETHPORTS otherwise. */ -__rte_experimental uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * * Free some allocated fields of the iterator. * * This function is automatically called by rte_eth_iterator_next() @@ -223,7 +212,6 @@ uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter); * Device iterator handle initialized by rte_eth_iterator_init(). * The fields bus_str and cls_str are freed if needed. */ -__rte_experimental void rte_eth_iterator_cleanup(struct rte_dev_iterator *iter); /** diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map index 3560c288..92ac3de2 100644 --- a/lib/librte_ethdev/rte_ethdev_version.map +++ b/lib/librte_ethdev/rte_ethdev_version.map @@ -223,6 +223,9 @@ DPDK_18.11 { rte_eth_dev_rx_offload_name; rte_eth_dev_tx_offload_name; + rte_eth_iterator_cleanup; + rte_eth_iterator_init; + rte_eth_iterator_next; } DPDK_18.08; @@ -242,9 +245,6 @@ EXPERIMENTAL { rte_eth_dev_owner_set; rte_eth_dev_owner_unset; rte_eth_dev_rx_intr_ctl_q_get_fd; - rte_eth_iterator_cleanup; - rte_eth_iterator_init; - rte_eth_iterator_next; rte_eth_switch_domain_alloc; rte_eth_switch_domain_free; rte_flow_conv; diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h index 0f62e2e1..a17a7407 100644 --- a/lib/librte_ip_frag/ip_frag_common.h +++ b/lib/librte_ip_frag/ip_frag_common.h @@ -58,20 +58,14 @@ struct rte_mbuf *ipv6_frag_reassemble(struct ip_frag_pkt *fp); static inline int ip_frag_key_is_empty(const struct ip_frag_key * key) { - uint32_t i; - for (i = 0; i < RTE_MIN(key->key_len, RTE_DIM(key->src_dst)); i++) - if (key->src_dst[i] != 0) - return 0; - return 1; + return (key->key_len == 0); } -/* empty the key */ +/* invalidate the key */ static inline void ip_frag_key_invalidate(struct ip_frag_key * key) { - uint32_t i; - for (i = 0; i < key->key_len; i++) - key->src_dst[i] = 0; + key->key_len = 0; } /* compare two keys */ @@ -80,7 +74,7 @@ ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2) { uint32_t i; uint64_t val; - val = k1->id ^ k2->id; + val = k1->id_key_len ^ k2->id_key_len; for (i = 0; i < k1->key_len; i++) val |= k1->src_dst[i] ^ k2->src_dst[i]; return val; diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index 7f425f61..a4ccaf9d 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -44,9 +44,17 @@ struct ip_frag { /** @internal <src addr, dst_addr, id> to uniquely identify fragmented datagram. */ struct ip_frag_key { - uint64_t src_dst[4]; /**< src address, first 8 bytes used for IPv4 */ - uint32_t id; /**< dst address */ - uint32_t key_len; /**< src/dst key length */ + uint64_t src_dst[4]; + /**< src and dst address, only first 8 bytes used for IPv4 */ + RTE_STD_C11 + union { + uint64_t id_key_len; /**< combined for easy fetch */ + __extension__ + struct { + uint32_t id; /**< packet id */ + uint32_t key_len; /**< src/dst key length */ + }; + }; }; /** diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c index 4956b99e..1029b7ab 100644 --- a/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -36,8 +36,11 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) /* previous fragment found. */ if(fp->frags[i].ofs + fp->frags[i].len == ofs) { + RTE_ASSERT(curr_idx != i); + /* adjust start of the last fragment data. */ - rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_adj(m, + (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[i].mb, m); /* this mbuf should not be accessed directly */ @@ -96,14 +99,14 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) */ struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, - struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct ipv4_hdr *ip_hdr) + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv4_hdr *ip_hdr) { struct ip_frag_pkt *fp; struct ip_frag_key key; const unaligned_uint64_t *psd; - uint16_t ip_len; uint16_t flag_offset, ip_ofs, ip_flag; + int32_t ip_len; flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); @@ -116,12 +119,11 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, key.key_len = IPV4_KEYLEN; ip_ofs *= IPV4_HDR_OFFSET_UNITS; - ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - - mb->l3_len); + ip_len = rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len; IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 - ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" + ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %d, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, @@ -129,6 +131,12 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); + /* check that fragment length is greater then zero. */ + if (ip_len <= 0) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + /* try to find/add entry into the fragment's table. */ if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { IP_FRAG_MBUF2DR(dr, mb); diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c index db249fe6..855e3f74 100644 --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -59,8 +59,11 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) /* previous fragment found. */ if (fp->frags[i].ofs + fp->frags[i].len == ofs) { + RTE_ASSERT(curr_idx != i); + /* adjust start of the last fragment data. */ - rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_adj(m, + (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[i].mb, m); /* this mbuf should not be accessed directly */ @@ -135,12 +138,13 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3) struct rte_mbuf * rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, - struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) { struct ip_frag_pkt *fp; struct ip_frag_key key; - uint16_t ip_len, ip_ofs; + uint16_t ip_ofs; + int32_t ip_len; rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16); rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16); @@ -151,15 +155,17 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8; /* - * as per RFC2460, payload length contains all extension headers as well. - * since we don't support anything but frag headers, this is what we remove - * from the payload len. + * as per RFC2460, payload length contains all extension headers + * as well. + * since we don't support anything but frag headers, + * this is what we remove from the payload len. */ ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 - ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, ofs: %u, len: %u, flags: %#x\n" + ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ofs: %u, len: %d, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, @@ -168,6 +174,12 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); + /* check that fragment length is greater then zero. */ + if (ip_len <= 0) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + /* try to find/add entry into the fragment's table. */ fp = ip_frag_find(tbl, dr, &key, tms); if (fp == NULL) { diff --git a/lib/librte_net/rte_gre.h b/lib/librte_net/rte_gre.h index 69499bb8..05aa9d14 100644 --- a/lib/librte_net/rte_gre.h +++ b/lib/librte_net/rte_gre.h @@ -15,6 +15,7 @@ extern "C" { /** * GRE Header */ +__extension__ struct gre_hdr { #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN uint16_t res2:4; /**< Reserved */ diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 73ac3a95..dca0830e 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -69,8 +69,8 @@ reflect_32bits(uint32_t val) uint32_t i, res = 0; for (i = 0; i < 32; i++) - if ((val & (1 << i)) != 0) - res |= (uint32_t)(1 << (31 - i)); + if ((val & (1U << i)) != 0) + res |= (uint32_t)(1U << (31 - i)); return res; } diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h index 94df3c4a..7bc74a4c 100644 --- a/lib/librte_ring/rte_ring_c11_mem.h +++ b/lib/librte_ring/rte_ring_c11_mem.h @@ -57,23 +57,27 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, uint32_t *free_entries) { const uint32_t capacity = r->capacity; + uint32_t cons_tail; unsigned int max = n; int success; + *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); do { /* Reset n to the initial burst count */ n = max; - *old_head = __atomic_load_n(&r->prod.head, + /* load-acquire synchronize with store-release of ht->tail + * in update_tail. + */ + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); - /* - * The subtraction is done between two unsigned 32bits value + /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = (capacity + r->cons.tail - *old_head); + *free_entries = (capacity + cons_tail - *old_head); /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -87,6 +91,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, if (is_sp) r->prod.head = *new_head, success = 1; else + /* on failure, *old_head is updated */ success = __atomic_compare_exchange_n(&r->prod.head, old_head, *new_head, 0, __ATOMIC_ACQUIRE, @@ -125,13 +130,19 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, uint32_t *entries) { unsigned int max = n; + uint32_t prod_tail; int success; /* move cons.head atomically */ + *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE); do { /* Restore n as it may change every loop */ n = max; - *old_head = __atomic_load_n(&r->cons.head, + + /* this load-acquire synchronize with store-release of ht->tail + * in update_tail. + */ + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value @@ -139,7 +150,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = (r->prod.tail - *old_head); + *entries = (prod_tail - *old_head); /* Set the actual entries for dequeue */ if (n > *entries) @@ -152,6 +163,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, if (is_sc) r->cons.head = *new_head, success = 1; else + /* on failure, *old_head will be updated */ success = __atomic_compare_exchange_n(&r->cons.head, old_head, *new_head, 0, __ATOMIC_ACQUIRE, diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c index c2c5dff1..e7d849ee 100644 --- a/lib/librte_vhost/vdpa.c +++ b/lib/librte_vhost/vdpa.c @@ -63,6 +63,9 @@ rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr, break; } + if (i == MAX_VHOST_DEVICE) + return -1; + sprintf(device_name, "vdpa-dev-%d", i); dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device), RTE_CACHE_LINE_SIZE); diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index b4abad30..760f4219 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -275,7 +275,8 @@ struct vring_packed_desc_event { (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ (1ULL << VIRTIO_NET_F_MTU) | \ (1ULL << VIRTIO_F_IN_ORDER) | \ - (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ + (1ULL << VIRTIO_F_RING_PACKED)) struct guest_page { diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c index 9811a232..5472bead 100644 --- a/lib/librte_vhost/vhost_crypto.c +++ b/lib/librte_vhost/vhost_crypto.c @@ -238,7 +238,7 @@ transform_cipher_param(struct rte_crypto_sym_xform *xform, return ret; xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - xform->cipher.algo = (uint32_t)ret; + xform->cipher.algo = (enum rte_crypto_cipher_algorithm)ret; xform->cipher.key.length = param->cipher_key_len; if (xform->cipher.key.length > 0) xform->cipher.key.data = param->cipher_key_buf; @@ -288,7 +288,7 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, if (unlikely(ret < 0)) return ret; xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - xform_cipher->cipher.algo = (uint32_t)ret; + xform_cipher->cipher.algo = (enum rte_crypto_cipher_algorithm)ret; xform_cipher->cipher.key.length = param->cipher_key_len; xform_cipher->cipher.key.data = param->cipher_key_buf; ret = get_iv_len(xform_cipher->cipher.algo); @@ -302,7 +302,7 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, ret = auth_algo_transform(param->hash_algo); if (unlikely(ret < 0)) return ret; - xform_auth->auth.algo = (uint32_t)ret; + xform_auth->auth.algo = (enum rte_crypto_auth_algorithm)ret; xform_auth->auth.digest_length = param->digest_len; xform_auth->auth.key.length = param->auth_key_len; xform_auth->auth.key.data = param->auth_key_buf; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 508228a3..cc154f31 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -696,10 +696,27 @@ vhost_user_set_vring_base(struct virtio_net **pdev, int main_fd __rte_unused) { struct virtio_net *dev = *pdev; - dev->virtqueue[msg->payload.state.index]->last_used_idx = - msg->payload.state.num; - dev->virtqueue[msg->payload.state.index]->last_avail_idx = - msg->payload.state.num; + struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; + uint64_t val = msg->payload.state.num; + + if (vq_is_packed(dev)) { + /* + * Bit[0:14]: avail index + * Bit[15]: avail wrap counter + */ + vq->last_avail_idx = val & 0x7fff; + vq->avail_wrap_counter = !!(val & (0x1 << 15)); + /* + * Set used index to same value as available one, as + * their values should be the same since ring processing + * was stopped at get time. + */ + vq->last_used_idx = vq->last_avail_idx; + vq->used_wrap_counter = vq->avail_wrap_counter; + } else { + vq->last_used_idx = msg->payload.state.num; + vq->last_avail_idx = msg->payload.state.num; + } return VH_RESULT_OK; } @@ -1208,6 +1225,7 @@ vhost_user_get_vring_base(struct virtio_net **pdev, { struct virtio_net *dev = *pdev; struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; + uint64_t val; /* We have to stop the queue (virtio) if it is running. */ vhost_destroy_device_notify(dev); @@ -1215,8 +1233,18 @@ vhost_user_get_vring_base(struct virtio_net **pdev, dev->flags &= ~VIRTIO_DEV_READY; dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; - /* Here we are safe to get the last avail index */ - msg->payload.state.num = vq->last_avail_idx; + /* Here we are safe to get the indexes */ + if (vq_is_packed(dev)) { + /* + * Bit[0:14]: avail index + * Bit[15]: avail wrap counter + */ + val = vq->last_avail_idx & 0x7fff; + val |= vq->avail_wrap_counter << 15; + msg->payload.state.num = val; + } else { + msg->payload.state.num = vq->last_avail_idx; + } RTE_LOG(INFO, VHOST_CONFIG, "vring base idx:%d file:%d\n", msg->payload.state.index, diff --git a/meson.build b/meson.build index 9f626972..6d25b90f 100644 --- a/meson.build +++ b/meson.build @@ -2,7 +2,7 @@ # Copyright(c) 2017 Intel Corporation project('DPDK', 'C', - version: '18.11.0-rc1', + version: '18.11.0-rc2', license: 'BSD', default_options: ['buildtype=release', 'default_library=static'], meson_version: '>= 0.41' diff --git a/mk/rte.cpuflags.mk b/mk/rte.cpuflags.mk index 43ed8415..c3291b17 100644 --- a/mk/rte.cpuflags.mk +++ b/mk/rte.cpuflags.mk @@ -68,6 +68,11 @@ endif ifneq ($(filter $(AUTO_CPUFLAGS),__AVX512F__),) ifeq ($(CONFIG_RTE_ENABLE_AVX512),y) CPUFLAGS += AVX512F +else +# disable AVX512F support of gcc as a workaround for Bug 97 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +MACHINE_CFLAGS += -mno-avx512f +endif endif endif diff --git a/test/test/test.c b/test/test/test.c index ffa9c366..24df6299 100644 --- a/test/test/test.c +++ b/test/test/test.c @@ -75,15 +75,47 @@ do_recursive_call(void) int last_test_result; +#define MAX_EXTRA_ARGS 32 + int main(int argc, char **argv) { #ifdef RTE_LIBRTE_CMDLINE struct cmdline *cl; #endif + char *extra_args; int ret; - ret = rte_eal_init(argc, argv); + extra_args = getenv("DPDK_TEST_PARAMS"); + if (extra_args != NULL && strlen(extra_args) > 0) { + char **all_argv; + char *eargv[MAX_EXTRA_ARGS]; + int all_argc; + int eargc; + int i; + + RTE_LOG(INFO, APP, "Using additional DPDK_TEST_PARAMS: '%s'\n", + extra_args); + eargc = rte_strsplit(extra_args, strlen(extra_args), + eargv, MAX_EXTRA_ARGS, ' '); + + /* merge argc/argv and the environment args */ + all_argc = argc + eargc; + all_argv = malloc(sizeof(*all_argv) * (all_argc + 1)); + if (all_argv == NULL) + return -1; + + for (i = 0; i < argc; i++) + all_argv[i] = argv[i]; + for (i = 0; i < eargc; i++) + all_argv[argc + i] = eargv[i]; + all_argv[all_argc] = NULL; + + /* call eal_init with combined args */ + ret = rte_eal_init(all_argc, all_argv); + free(all_argv); + } else + ret = rte_eal_init(argc, argv); if (ret < 0) { ret = -1; goto out; diff --git a/test/test/test_compressdev.c b/test/test/test_compressdev.c index 86453882..5d5e5190 100644 --- a/test/test/test_compressdev.c +++ b/test/test/test_compressdev.c @@ -10,6 +10,7 @@ #include <rte_mempool.h> #include <rte_mbuf.h> #include <rte_compressdev.h> +#include <rte_string_fns.h> #include "test_compressdev_test_buffer.h" #include "test.h" @@ -856,13 +857,20 @@ test_deflate_comp_decomp(const char * const test_bufs[], &compress_xforms[xform_idx]->compress; enum rte_comp_huffman huffman_type = compress_xform->deflate.huffman; - RTE_LOG(DEBUG, USER1, "Buffer %u compressed from %u to %u bytes " - "(level = %d, huffman = %s)\n", - buf_idx[priv_data->orig_idx], + char engine[22]; + if (zlib_dir == ZLIB_COMPRESS || zlib_dir == ZLIB_ALL) + strlcpy(engine, "zlib (direct, no pmd)", 22); + else + strlcpy(engine, "pmd", 22); + + RTE_LOG(DEBUG, USER1, "Buffer %u compressed by %s from %u to" + " %u bytes (level = %d, huffman = %s)\n", + buf_idx[priv_data->orig_idx], engine, ops_processed[i]->consumed, ops_processed[i]->produced, compress_xform->level, huffman_type_strings[huffman_type]); - RTE_LOG(DEBUG, USER1, "Compression ratio = %.2f", + RTE_LOG(DEBUG, USER1, "Compression ratio = %.2f\n", + ops_processed[i]->consumed == 0 ? 0 : (float)ops_processed[i]->produced / ops_processed[i]->consumed * 100); ops[i] = NULL; @@ -1058,8 +1066,14 @@ test_deflate_comp_decomp(const char * const test_bufs[], for (i = 0; i < num_bufs; i++) { priv_data = (struct priv_op_data *)(ops_processed[i] + 1); - RTE_LOG(DEBUG, USER1, "Buffer %u decompressed from %u to %u bytes\n", - buf_idx[priv_data->orig_idx], + char engine[22]; + if (zlib_dir == ZLIB_DECOMPRESS || zlib_dir == ZLIB_ALL) + strlcpy(engine, "zlib (direct, no pmd)", 22); + else + strlcpy(engine, "pmd", 22); + RTE_LOG(DEBUG, USER1, + "Buffer %u decompressed by %s from %u to %u bytes\n", + buf_idx[priv_data->orig_idx], engine, ops_processed[i]->consumed, ops_processed[i]->produced); ops[i] = NULL; } diff --git a/test/test/test_cryptodev_asym.c b/test/test/test_cryptodev_asym.c index 2fdfc1df..a899f997 100644 --- a/test/test/test_cryptodev_asym.c +++ b/test/test/test_cryptodev_asym.c @@ -153,10 +153,13 @@ test_rsa_sign_verify(void) goto error_exit; } status = TEST_SUCCESS; - int ret = 0; - ret = rsa_verify(&rsaplaintext, result_op); - if (ret) + if (result_op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "line %u FAILED: %s", + __LINE__, "Failed to process asym crypto op"); status = TEST_FAILED; + goto error_exit; + } error_exit: diff --git a/test/test/test_func_reentrancy.c b/test/test/test_func_reentrancy.c index a0ed19d7..e27d1e02 100644 --- a/test/test/test_func_reentrancy.c +++ b/test/test/test_func_reentrancy.c @@ -45,13 +45,14 @@ typedef int (*case_func_t)(void* arg); typedef void (*case_clean_t)(unsigned lcore_id); #define MAX_STRING_SIZE (256) -#define MAX_ITER_TIMES (16) -#define MAX_LPM_ITER_TIMES (8) +#define MAX_ITER_MULTI (16) +#define MAX_ITER_ONCE (4) +#define MAX_LPM_ITER_TIMES (6) #define MEMPOOL_ELT_SIZE (sizeof(uint32_t)) #define MEMPOOL_SIZE (4) -#define MAX_LCORES RTE_MAX_MEMZONE / (MAX_ITER_TIMES * 4U) +#define MAX_LCORES (RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U)) static rte_atomic32_t obj_count = RTE_ATOMIC32_INIT(0); static rte_atomic32_t synchro = RTE_ATOMIC32_INIT(0); @@ -88,7 +89,7 @@ ring_clean(unsigned int lcore_id) char ring_name[MAX_STRING_SIZE]; int i; - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(ring_name, sizeof(ring_name), "fr_test_%d_%d", lcore_id, i); rp = rte_ring_lookup(ring_name); @@ -108,25 +109,22 @@ ring_create_lookup(__attribute__((unused)) void *arg) WAIT_SYNCHRO_FOR_SLAVES(); /* create the same ring simultaneously on all threads */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_ONCE; i++) { rp = rte_ring_create("fr_test_once", 4096, SOCKET_ID_ANY, 0); if (rp != NULL) rte_atomic32_inc(&obj_count); } /* create/lookup new ring several times */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(ring_name, sizeof(ring_name), "fr_test_%d_%d", lcore_self, i); rp = rte_ring_create(ring_name, 4096, SOCKET_ID_ANY, 0); if (NULL == rp) return -1; if (rte_ring_lookup(ring_name) != rp) return -1; - } - /* verify all ring created successful */ - for (i = 0; i < MAX_ITER_TIMES; i++) { - snprintf(ring_name, sizeof(ring_name), "fr_test_%d_%d", lcore_self, i); + /* verify all ring created successful */ if (rte_ring_lookup(ring_name) == NULL) return -1; } @@ -151,7 +149,7 @@ mempool_clean(unsigned int lcore_id) int i; /* verify all ring created successful */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(mempool_name, sizeof(mempool_name), "fr_test_%d_%d", lcore_id, i); mp = rte_mempool_lookup(mempool_name); @@ -171,7 +169,7 @@ mempool_create_lookup(__attribute__((unused)) void *arg) WAIT_SYNCHRO_FOR_SLAVES(); /* create the same mempool simultaneously on all threads */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_ONCE; i++) { mp = rte_mempool_create("fr_test_once", MEMPOOL_SIZE, MEMPOOL_ELT_SIZE, 0, 0, NULL, NULL, @@ -182,7 +180,7 @@ mempool_create_lookup(__attribute__((unused)) void *arg) } /* create/lookup new ring several times */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(mempool_name, sizeof(mempool_name), "fr_test_%d_%d", lcore_self, i); mp = rte_mempool_create(mempool_name, MEMPOOL_SIZE, MEMPOOL_ELT_SIZE, 0, 0, @@ -193,11 +191,8 @@ mempool_create_lookup(__attribute__((unused)) void *arg) return -1; if (rte_mempool_lookup(mempool_name) != mp) return -1; - } - /* verify all ring created successful */ - for (i = 0; i < MAX_ITER_TIMES; i++) { - snprintf(mempool_name, sizeof(mempool_name), "fr_test_%d_%d", lcore_self, i); + /* verify all ring created successful */ if (rte_mempool_lookup(mempool_name) == NULL) return -1; } @@ -213,7 +208,7 @@ hash_clean(unsigned lcore_id) struct rte_hash *handle; int i; - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(hash_name, sizeof(hash_name), "fr_test_%d_%d", lcore_id, i); if ((handle = rte_hash_find_existing(hash_name)) != NULL) @@ -241,14 +236,14 @@ hash_create_free(__attribute__((unused)) void *arg) /* create the same hash simultaneously on all threads */ hash_params.name = "fr_test_once"; - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_ONCE; i++) { handle = rte_hash_create(&hash_params); if (handle != NULL) rte_atomic32_inc(&obj_count); } /* create mutiple times simultaneously */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(hash_name, sizeof(hash_name), "fr_test_%d_%d", lcore_self, i); hash_params.name = hash_name; @@ -261,12 +256,8 @@ hash_create_free(__attribute__((unused)) void *arg) return -1; rte_hash_free(handle); - } - - /* verify free correct */ - for (i = 0; i < MAX_ITER_TIMES; i++) { - snprintf(hash_name, sizeof(hash_name), "fr_test_%d_%d", lcore_self, i); + /* verify free correct */ if (NULL != rte_hash_find_existing(hash_name)) return -1; } @@ -281,7 +272,7 @@ fbk_clean(unsigned lcore_id) struct rte_fbk_hash_table *handle; int i; - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(fbk_name, sizeof(fbk_name), "fr_test_%d_%d", lcore_id, i); if ((handle = rte_fbk_hash_find_existing(fbk_name)) != NULL) @@ -309,14 +300,14 @@ fbk_create_free(__attribute__((unused)) void *arg) /* create the same fbk hash table simultaneously on all threads */ fbk_params.name = "fr_test_once"; - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_ONCE; i++) { handle = rte_fbk_hash_create(&fbk_params); if (handle != NULL) rte_atomic32_inc(&obj_count); } /* create mutiple fbk tables simultaneously */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_MULTI; i++) { snprintf(fbk_name, sizeof(fbk_name), "fr_test_%d_%d", lcore_self, i); fbk_params.name = fbk_name; @@ -329,12 +320,8 @@ fbk_create_free(__attribute__((unused)) void *arg) return -1; rte_fbk_hash_free(handle); - } - - /* verify free correct */ - for (i = 0; i < MAX_ITER_TIMES; i++) { - snprintf(fbk_name, sizeof(fbk_name), "fr_test_%d_%d", lcore_self, i); + /* verify free correct */ if (NULL != rte_fbk_hash_find_existing(fbk_name)) return -1; } @@ -375,7 +362,7 @@ lpm_create_free(__attribute__((unused)) void *arg) WAIT_SYNCHRO_FOR_SLAVES(); /* create the same lpm simultaneously on all threads */ - for (i = 0; i < MAX_ITER_TIMES; i++) { + for (i = 0; i < MAX_ITER_ONCE; i++) { lpm = rte_lpm_create("fr_test_once", SOCKET_ID_ANY, &config); if (lpm != NULL) rte_atomic32_inc(&obj_count); @@ -393,11 +380,8 @@ lpm_create_free(__attribute__((unused)) void *arg) return -1; rte_lpm_free(lpm); - } - /* verify free correct */ - for (i = 0; i < MAX_LPM_ITER_TIMES; i++) { - snprintf(lpm_name, sizeof(lpm_name), "fr_test_%d_%d", lcore_self, i); + /* verify free correct */ if (NULL != rte_lpm_find_existing(lpm_name)) return -1; } diff --git a/test/test/test_hash.c b/test/test/test_hash.c index 6d06eb24..fe607fad 100644 --- a/test/test/test_hash.c +++ b/test/test/test_hash.c @@ -80,29 +80,23 @@ static uint32_t pseudo_hash(__attribute__((unused)) const void *keys, return 3; } +#define UNIT_TEST_HASH_VERBOSE 0 /* * Print out result of unit test hash operation. */ -#if defined(UNIT_TEST_HASH_VERBOSE) static void print_key_info(const char *msg, const struct flow_key *key, int32_t pos) { - uint8_t *p = (uint8_t *)key; - unsigned i; - - printf("%s key:0x", msg); - for (i = 0; i < sizeof(struct flow_key); i++) { - printf("%02X", p[i]); + if (UNIT_TEST_HASH_VERBOSE) { + const uint8_t *p = (const uint8_t *)key; + unsigned int i; + + printf("%s key:0x", msg); + for (i = 0; i < sizeof(struct flow_key); i++) + printf("%02X", p[i]); + printf(" @ pos %d\n", pos); } - printf(" @ pos %d\n", pos); -} -#else -static void print_key_info(__attribute__((unused)) const char *msg, - __attribute__((unused)) const struct flow_key *key, - __attribute__((unused)) int32_t pos) -{ } -#endif /* Keys used by unit test functions */ static struct flow_key keys[5] = { { diff --git a/test/test/test_hash_multiwriter.c b/test/test/test_hash_multiwriter.c index d447f6dc..50018db5 100644 --- a/test/test/test_hash_multiwriter.c +++ b/test/test/test_hash_multiwriter.c @@ -39,8 +39,8 @@ struct { struct rte_hash *h; } tbl_multiwriter_test_params; -const uint32_t nb_entries = 16*1024*1024; -const uint32_t nb_total_tsx_insertion = 15*1024*1024; +const uint32_t nb_entries = 5*1024*1024; +const uint32_t nb_total_tsx_insertion = 4.5*1024*1024; uint32_t rounded_nb_total_tsx_insertion; static rte_atomic64_t gcycles; |