dpdk: DPDK 20.05 iavf flow director backporting to DPDK 20.02
[vpp.git] / build / external / patches / dpdk_20.02 / 0014-net-iavf-add-RSS-hash-parsing-in-AVX-path.patch
1 From d338aa7cb45638b3a14177a8d83ef01c4ec20d1b Mon Sep 17 00:00:00 2001
2 From: Leyi Rong <leyi.rong@intel.com>
3 Date: Wed, 8 Apr 2020 14:22:09 +0800
4 Subject: [DPDK 14/17] net/iavf: add RSS hash parsing in AVX path
5
6 Support RSS hash parsing from Flex Rx
7 descriptor in AVX data path.
8
9 Signed-off-by: Leyi Rong <leyi.rong@intel.com>
10 ---
11  drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
12  1 file changed, 90 insertions(+), 2 deletions(-)
13
14 diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
15 index 3bf5833fa..22f1b7887 100644
16 --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
17 +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
18 @@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
19                 _mm256_set_epi8
20                         (/* first descriptor */
21                          0xFF, 0xFF,
22 -                        0xFF, 0xFF,    /* rss not supported */
23 +                        0xFF, 0xFF,    /* rss hash parsed separately */
24                          11, 10,        /* octet 10~11, 16 bits vlan_macip */
25                          5, 4,          /* octet 4~5, 16 bits data_len */
26                          0xFF, 0xFF,    /* skip hi 16 bits pkt_len, zero out */
27 @@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
28                          0xFF, 0xFF,    /*pkt_type set as unknown */
29                          /* second descriptor */
30                          0xFF, 0xFF,
31 -                        0xFF, 0xFF,    /* rss not supported */
32 +                        0xFF, 0xFF,    /* rss hash parsed separately */
33                          11, 10,        /* octet 10~11, 16 bits vlan_macip */
34                          5, 4,          /* octet 4~5, 16 bits data_len */
35                          0xFF, 0xFF,    /* skip hi 16 bits pkt_len, zero out */
36 @@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
37                                 _mm256_extract_epi32(fdir_id0_7, 4);
38                 } /* if() on fdir_enabled */
39  
40 +               /**
41 +                * needs to load 2nd 16B of each desc for RSS hash parsing,
42 +                * will cause performance drop to get into this context.
43 +                */
44 +               if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
45 +                               DEV_RX_OFFLOAD_RSS_HASH) {
46 +                       /* load bottom half of every 32B desc */
47 +                       const __m128i raw_desc_bh7 =
48 +                               _mm_load_si128
49 +                                       ((void *)(&rxdp[7].wb.status_error1));
50 +                       rte_compiler_barrier();
51 +                       const __m128i raw_desc_bh6 =
52 +                               _mm_load_si128
53 +                                       ((void *)(&rxdp[6].wb.status_error1));
54 +                       rte_compiler_barrier();
55 +                       const __m128i raw_desc_bh5 =
56 +                               _mm_load_si128
57 +                                       ((void *)(&rxdp[5].wb.status_error1));
58 +                       rte_compiler_barrier();
59 +                       const __m128i raw_desc_bh4 =
60 +                               _mm_load_si128
61 +                                       ((void *)(&rxdp[4].wb.status_error1));
62 +                       rte_compiler_barrier();
63 +                       const __m128i raw_desc_bh3 =
64 +                               _mm_load_si128
65 +                                       ((void *)(&rxdp[3].wb.status_error1));
66 +                       rte_compiler_barrier();
67 +                       const __m128i raw_desc_bh2 =
68 +                               _mm_load_si128
69 +                                       ((void *)(&rxdp[2].wb.status_error1));
70 +                       rte_compiler_barrier();
71 +                       const __m128i raw_desc_bh1 =
72 +                               _mm_load_si128
73 +                                       ((void *)(&rxdp[1].wb.status_error1));
74 +                       rte_compiler_barrier();
75 +                       const __m128i raw_desc_bh0 =
76 +                               _mm_load_si128
77 +                                       ((void *)(&rxdp[0].wb.status_error1));
78 +
79 +                       __m256i raw_desc_bh6_7 =
80 +                               _mm256_inserti128_si256
81 +                                       (_mm256_castsi128_si256(raw_desc_bh6),
82 +                                       raw_desc_bh7, 1);
83 +                       __m256i raw_desc_bh4_5 =
84 +                               _mm256_inserti128_si256
85 +                                       (_mm256_castsi128_si256(raw_desc_bh4),
86 +                                       raw_desc_bh5, 1);
87 +                       __m256i raw_desc_bh2_3 =
88 +                               _mm256_inserti128_si256
89 +                                       (_mm256_castsi128_si256(raw_desc_bh2),
90 +                                       raw_desc_bh3, 1);
91 +                       __m256i raw_desc_bh0_1 =
92 +                               _mm256_inserti128_si256
93 +                                       (_mm256_castsi128_si256(raw_desc_bh0),
94 +                                       raw_desc_bh1, 1);
95 +
96 +                       /**
97 +                        * to shift the 32b RSS hash value to the
98 +                        * highest 32b of each 128b before mask
99 +                        */
100 +                       __m256i rss_hash6_7 =
101 +                               _mm256_slli_epi64(raw_desc_bh6_7, 32);
102 +                       __m256i rss_hash4_5 =
103 +                               _mm256_slli_epi64(raw_desc_bh4_5, 32);
104 +                       __m256i rss_hash2_3 =
105 +                               _mm256_slli_epi64(raw_desc_bh2_3, 32);
106 +                       __m256i rss_hash0_1 =
107 +                               _mm256_slli_epi64(raw_desc_bh0_1, 32);
108 +
109 +                       __m256i rss_hash_msk =
110 +                               _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
111 +                                                0xFFFFFFFF, 0, 0, 0);
112 +
113 +                       rss_hash6_7 = _mm256_and_si256
114 +                                       (rss_hash6_7, rss_hash_msk);
115 +                       rss_hash4_5 = _mm256_and_si256
116 +                                       (rss_hash4_5, rss_hash_msk);
117 +                       rss_hash2_3 = _mm256_and_si256
118 +                                       (rss_hash2_3, rss_hash_msk);
119 +                       rss_hash0_1 = _mm256_and_si256
120 +                                       (rss_hash0_1, rss_hash_msk);
121 +
122 +                       mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
123 +                       mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
124 +                       mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
125 +                       mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
126 +               } /* if() on RSS hash parsing */
127 +
128                 /**
129                  * At this point, we have the 8 sets of flags in the low 16-bits
130                  * of each 32-bit value in vlan0.
131 -- 
132 2.17.1
133