diff options
Diffstat (limited to 'build/external/patches/dpdk_20.02/0014-net-iavf-add-RSS-hash-parsing-in-AVX-path.patch')
-rw-r--r-- | build/external/patches/dpdk_20.02/0014-net-iavf-add-RSS-hash-parsing-in-AVX-path.patch | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/build/external/patches/dpdk_20.02/0014-net-iavf-add-RSS-hash-parsing-in-AVX-path.patch b/build/external/patches/dpdk_20.02/0014-net-iavf-add-RSS-hash-parsing-in-AVX-path.patch new file mode 100644 index 00000000000..34ce7868456 --- /dev/null +++ b/build/external/patches/dpdk_20.02/0014-net-iavf-add-RSS-hash-parsing-in-AVX-path.patch @@ -0,0 +1,133 @@ +From d338aa7cb45638b3a14177a8d83ef01c4ec20d1b Mon Sep 17 00:00:00 2001 +From: Leyi Rong <leyi.rong@intel.com> +Date: Wed, 8 Apr 2020 14:22:09 +0800 +Subject: [DPDK 14/17] net/iavf: add RSS hash parsing in AVX path + +Support RSS hash parsing from Flex Rx +descriptor in AVX data path. + +Signed-off-by: Leyi Rong <leyi.rong@intel.com> +--- + drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++- + 1 file changed, 90 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c +index 3bf5833fa..22f1b7887 100644 +--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c ++++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c +@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, + _mm256_set_epi8 + (/* first descriptor */ + 0xFF, 0xFF, +- 0xFF, 0xFF, /* rss not supported */ ++ 0xFF, 0xFF, /* rss hash parsed separately */ + 11, 10, /* octet 10~11, 16 bits vlan_macip */ + 5, 4, /* octet 4~5, 16 bits data_len */ + 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ +@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, + 0xFF, 0xFF, /*pkt_type set as unknown */ + /* second descriptor */ + 0xFF, 0xFF, +- 0xFF, 0xFF, /* rss not supported */ ++ 0xFF, 0xFF, /* rss hash parsed separately */ + 11, 10, /* octet 10~11, 16 bits vlan_macip */ + 5, 4, /* octet 4~5, 16 bits data_len */ + 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ +@@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, + _mm256_extract_epi32(fdir_id0_7, 4); + } /* if() on fdir_enabled */ + ++ /** ++ * needs to load 2nd 16B of each desc for RSS hash parsing, ++ * will cause performance drop to get into this context. ++ */ ++ if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & ++ DEV_RX_OFFLOAD_RSS_HASH) { ++ /* load bottom half of every 32B desc */ ++ const __m128i raw_desc_bh7 = ++ _mm_load_si128 ++ ((void *)(&rxdp[7].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh6 = ++ _mm_load_si128 ++ ((void *)(&rxdp[6].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh5 = ++ _mm_load_si128 ++ ((void *)(&rxdp[5].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh4 = ++ _mm_load_si128 ++ ((void *)(&rxdp[4].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh3 = ++ _mm_load_si128 ++ ((void *)(&rxdp[3].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh2 = ++ _mm_load_si128 ++ ((void *)(&rxdp[2].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh1 = ++ _mm_load_si128 ++ ((void *)(&rxdp[1].wb.status_error1)); ++ rte_compiler_barrier(); ++ const __m128i raw_desc_bh0 = ++ _mm_load_si128 ++ ((void *)(&rxdp[0].wb.status_error1)); ++ ++ __m256i raw_desc_bh6_7 = ++ _mm256_inserti128_si256 ++ (_mm256_castsi128_si256(raw_desc_bh6), ++ raw_desc_bh7, 1); ++ __m256i raw_desc_bh4_5 = ++ _mm256_inserti128_si256 ++ (_mm256_castsi128_si256(raw_desc_bh4), ++ raw_desc_bh5, 1); ++ __m256i raw_desc_bh2_3 = ++ _mm256_inserti128_si256 ++ (_mm256_castsi128_si256(raw_desc_bh2), ++ raw_desc_bh3, 1); ++ __m256i raw_desc_bh0_1 = ++ _mm256_inserti128_si256 ++ (_mm256_castsi128_si256(raw_desc_bh0), ++ raw_desc_bh1, 1); ++ ++ /** ++ * to shift the 32b RSS hash value to the ++ * highest 32b of each 128b before mask ++ */ ++ __m256i rss_hash6_7 = ++ _mm256_slli_epi64(raw_desc_bh6_7, 32); ++ __m256i rss_hash4_5 = ++ _mm256_slli_epi64(raw_desc_bh4_5, 32); ++ __m256i rss_hash2_3 = ++ _mm256_slli_epi64(raw_desc_bh2_3, 32); ++ __m256i rss_hash0_1 = ++ _mm256_slli_epi64(raw_desc_bh0_1, 32); ++ ++ __m256i rss_hash_msk = ++ _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0, ++ 0xFFFFFFFF, 0, 0, 0); ++ ++ rss_hash6_7 = _mm256_and_si256 ++ (rss_hash6_7, rss_hash_msk); ++ rss_hash4_5 = _mm256_and_si256 ++ (rss_hash4_5, rss_hash_msk); ++ rss_hash2_3 = _mm256_and_si256 ++ (rss_hash2_3, rss_hash_msk); ++ rss_hash0_1 = _mm256_and_si256 ++ (rss_hash0_1, rss_hash_msk); ++ ++ mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7); ++ mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5); ++ mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3); ++ mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1); ++ } /* if() on RSS hash parsing */ ++ + /** + * At this point, we have the 8 sets of flags in the low 16-bits + * of each 32-bit value in vlan0. +-- +2.17.1 + |