New upstream version 17.11.4
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_common.h>
48 #include <rte_ethdev.h>
49 #include <rte_flow.h>
50 #include <rte_flow_driver.h>
51 #include <rte_malloc.h>
52 #include <rte_ip.h>
53
54 #include "mlx5.h"
55 #include "mlx5_defs.h"
56 #include "mlx5_prm.h"
57
58 /* Define minimal priority for control plane flows. */
59 #define MLX5_CTRL_FLOW_PRIORITY 4
60
61 /* Internet Protocol versions. */
62 #define MLX5_IPV4 4
63 #define MLX5_IPV6 6
64
65 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
66 struct ibv_counter_set {
67         int dummy;
68 };
69
70 static inline int
71 ibv_destroy_counter_set(struct ibv_counter_set *cs)
72 {
73         (void)cs;
74         return -ENOTSUP;
75 }
76 #endif
77
78 /* Dev ops structure defined in mlx5.c */
79 extern const struct eth_dev_ops mlx5_dev_ops;
80 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
81
82 /** Structure give to the conversion functions. */
83 struct mlx5_flow_data {
84         struct mlx5_flow_parse *parser; /** Parser context. */
85         struct rte_flow_error *error; /** Error context. */
86 };
87
88 static int
89 mlx5_flow_create_eth(const struct rte_flow_item *item,
90                      const void *default_mask,
91                      struct mlx5_flow_data *data);
92
93 static int
94 mlx5_flow_create_vlan(const struct rte_flow_item *item,
95                       const void *default_mask,
96                       struct mlx5_flow_data *data);
97
98 static int
99 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
100                       const void *default_mask,
101                       struct mlx5_flow_data *data);
102
103 static int
104 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
105                       const void *default_mask,
106                       struct mlx5_flow_data *data);
107
108 static int
109 mlx5_flow_create_udp(const struct rte_flow_item *item,
110                      const void *default_mask,
111                      struct mlx5_flow_data *data);
112
113 static int
114 mlx5_flow_create_tcp(const struct rte_flow_item *item,
115                      const void *default_mask,
116                      struct mlx5_flow_data *data);
117
118 static int
119 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
120                        const void *default_mask,
121                        struct mlx5_flow_data *data);
122
123 struct mlx5_flow_parse;
124
125 static void
126 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
127                       unsigned int size);
128
129 static int
130 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
131
132 static int
133 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
134
135 /* Hash RX queue types. */
136 enum hash_rxq_type {
137         HASH_RXQ_TCPV4,
138         HASH_RXQ_UDPV4,
139         HASH_RXQ_IPV4,
140         HASH_RXQ_TCPV6,
141         HASH_RXQ_UDPV6,
142         HASH_RXQ_IPV6,
143         HASH_RXQ_ETH,
144 };
145
146 /* Initialization data for hash RX queue. */
147 struct hash_rxq_init {
148         uint64_t hash_fields; /* Fields that participate in the hash. */
149         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
150         unsigned int flow_priority; /* Flow priority to use. */
151         unsigned int ip_version; /* Internet protocol. */
152 };
153
154 /* Initialization data for hash RX queues. */
155 const struct hash_rxq_init hash_rxq_init[] = {
156         [HASH_RXQ_TCPV4] = {
157                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158                                 IBV_RX_HASH_DST_IPV4 |
159                                 IBV_RX_HASH_SRC_PORT_TCP |
160                                 IBV_RX_HASH_DST_PORT_TCP),
161                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
162                 .flow_priority = 1,
163                 .ip_version = MLX5_IPV4,
164         },
165         [HASH_RXQ_UDPV4] = {
166                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
167                                 IBV_RX_HASH_DST_IPV4 |
168                                 IBV_RX_HASH_SRC_PORT_UDP |
169                                 IBV_RX_HASH_DST_PORT_UDP),
170                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
171                 .flow_priority = 1,
172                 .ip_version = MLX5_IPV4,
173         },
174         [HASH_RXQ_IPV4] = {
175                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
176                                 IBV_RX_HASH_DST_IPV4),
177                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
178                                 ETH_RSS_FRAG_IPV4),
179                 .flow_priority = 2,
180                 .ip_version = MLX5_IPV4,
181         },
182         [HASH_RXQ_TCPV6] = {
183                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184                                 IBV_RX_HASH_DST_IPV6 |
185                                 IBV_RX_HASH_SRC_PORT_TCP |
186                                 IBV_RX_HASH_DST_PORT_TCP),
187                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
188                 .flow_priority = 1,
189                 .ip_version = MLX5_IPV6,
190         },
191         [HASH_RXQ_UDPV6] = {
192                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
193                                 IBV_RX_HASH_DST_IPV6 |
194                                 IBV_RX_HASH_SRC_PORT_UDP |
195                                 IBV_RX_HASH_DST_PORT_UDP),
196                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
197                 .flow_priority = 1,
198                 .ip_version = MLX5_IPV6,
199         },
200         [HASH_RXQ_IPV6] = {
201                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
202                                 IBV_RX_HASH_DST_IPV6),
203                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
204                                 ETH_RSS_FRAG_IPV6),
205                 .flow_priority = 2,
206                 .ip_version = MLX5_IPV6,
207         },
208         [HASH_RXQ_ETH] = {
209                 .hash_fields = 0,
210                 .dpdk_rss_hf = 0,
211                 .flow_priority = 3,
212         },
213 };
214
215 /* Number of entries in hash_rxq_init[]. */
216 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
217
218 /** Structure for holding counter stats. */
219 struct mlx5_flow_counter_stats {
220         uint64_t hits; /**< Number of packets matched by the rule. */
221         uint64_t bytes; /**< Number of bytes matched by the rule. */
222 };
223
224 /** Structure for Drop queue. */
225 struct mlx5_hrxq_drop {
226         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
227         struct ibv_qp *qp; /**< Verbs queue pair. */
228         struct ibv_wq *wq; /**< Verbs work queue. */
229         struct ibv_cq *cq; /**< Verbs completion queue. */
230 };
231
232 /* Flows structures. */
233 struct mlx5_flow {
234         uint64_t hash_fields; /**< Fields that participate in the hash. */
235         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
236         struct ibv_flow *ibv_flow; /**< Verbs flow. */
237         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
238 };
239
240 /* Drop flows structures. */
241 struct mlx5_flow_drop {
242         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
243         struct ibv_flow *ibv_flow; /**< Verbs flow. */
244 };
245
246 struct rte_flow {
247         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
248         uint32_t mark:1; /**< Set if the flow is marked. */
249         uint32_t drop:1; /**< Drop queue. */
250         uint16_t queues_n; /**< Number of entries in queue[]. */
251         uint16_t (*queues)[]; /**< Queues indexes to use. */
252         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
253         uint8_t rss_key[40]; /**< copy of the RSS key. */
254         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
255         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
256         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
257         /**< Flow with Rx queue. */
258 };
259
260 /** Static initializer for items. */
261 #define ITEMS(...) \
262         (const enum rte_flow_item_type []){ \
263                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
264         }
265
266 /** Structure to generate a simple graph of layers supported by the NIC. */
267 struct mlx5_flow_items {
268         /** List of possible actions for these items. */
269         const enum rte_flow_action_type *const actions;
270         /** Bit-masks corresponding to the possibilities for the item. */
271         const void *mask;
272         /**
273          * Default bit-masks to use when item->mask is not provided. When
274          * \default_mask is also NULL, the full supported bit-mask (\mask) is
275          * used instead.
276          */
277         const void *default_mask;
278         /** Bit-masks size in bytes. */
279         const unsigned int mask_sz;
280         /**
281          * Conversion function from rte_flow to NIC specific flow.
282          *
283          * @param item
284          *   rte_flow item to convert.
285          * @param default_mask
286          *   Default bit-masks to use when item->mask is not provided.
287          * @param data
288          *   Internal structure to store the conversion.
289          *
290          * @return
291          *   0 on success, a negative errno value otherwise and rte_errno is
292          *   set.
293          */
294         int (*convert)(const struct rte_flow_item *item,
295                        const void *default_mask,
296                        struct mlx5_flow_data *data);
297         /** Size in bytes of the destination structure. */
298         const unsigned int dst_sz;
299         /** List of possible following items.  */
300         const enum rte_flow_item_type *const items;
301 };
302
303 /** Valid action for this PMD. */
304 static const enum rte_flow_action_type valid_actions[] = {
305         RTE_FLOW_ACTION_TYPE_DROP,
306         RTE_FLOW_ACTION_TYPE_QUEUE,
307         RTE_FLOW_ACTION_TYPE_MARK,
308         RTE_FLOW_ACTION_TYPE_FLAG,
309 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
310         RTE_FLOW_ACTION_TYPE_COUNT,
311 #endif
312         RTE_FLOW_ACTION_TYPE_END,
313 };
314
315 /** Graph of supported items and associated actions. */
316 static const struct mlx5_flow_items mlx5_flow_items[] = {
317         [RTE_FLOW_ITEM_TYPE_END] = {
318                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
319                                RTE_FLOW_ITEM_TYPE_VXLAN),
320         },
321         [RTE_FLOW_ITEM_TYPE_ETH] = {
322                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
323                                RTE_FLOW_ITEM_TYPE_IPV4,
324                                RTE_FLOW_ITEM_TYPE_IPV6),
325                 .actions = valid_actions,
326                 .mask = &(const struct rte_flow_item_eth){
327                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
328                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
329                         .type = -1,
330                 },
331                 .default_mask = &rte_flow_item_eth_mask,
332                 .mask_sz = sizeof(struct rte_flow_item_eth),
333                 .convert = mlx5_flow_create_eth,
334                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
335         },
336         [RTE_FLOW_ITEM_TYPE_VLAN] = {
337                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
338                                RTE_FLOW_ITEM_TYPE_IPV6),
339                 .actions = valid_actions,
340                 .mask = &(const struct rte_flow_item_vlan){
341                         .tci = -1,
342                 },
343                 .default_mask = &rte_flow_item_vlan_mask,
344                 .mask_sz = sizeof(struct rte_flow_item_vlan),
345                 .convert = mlx5_flow_create_vlan,
346                 .dst_sz = 0,
347         },
348         [RTE_FLOW_ITEM_TYPE_IPV4] = {
349                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
350                                RTE_FLOW_ITEM_TYPE_TCP),
351                 .actions = valid_actions,
352                 .mask = &(const struct rte_flow_item_ipv4){
353                         .hdr = {
354                                 .src_addr = -1,
355                                 .dst_addr = -1,
356                                 .type_of_service = -1,
357                                 .next_proto_id = -1,
358                         },
359                 },
360                 .default_mask = &rte_flow_item_ipv4_mask,
361                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
362                 .convert = mlx5_flow_create_ipv4,
363                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
364         },
365         [RTE_FLOW_ITEM_TYPE_IPV6] = {
366                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367                                RTE_FLOW_ITEM_TYPE_TCP),
368                 .actions = valid_actions,
369                 .mask = &(const struct rte_flow_item_ipv6){
370                         .hdr = {
371                                 .src_addr = {
372                                         0xff, 0xff, 0xff, 0xff,
373                                         0xff, 0xff, 0xff, 0xff,
374                                         0xff, 0xff, 0xff, 0xff,
375                                         0xff, 0xff, 0xff, 0xff,
376                                 },
377                                 .dst_addr = {
378                                         0xff, 0xff, 0xff, 0xff,
379                                         0xff, 0xff, 0xff, 0xff,
380                                         0xff, 0xff, 0xff, 0xff,
381                                         0xff, 0xff, 0xff, 0xff,
382                                 },
383                                 .vtc_flow = -1,
384                                 .proto = -1,
385                                 .hop_limits = -1,
386                         },
387                 },
388                 .default_mask = &rte_flow_item_ipv6_mask,
389                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
390                 .convert = mlx5_flow_create_ipv6,
391                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
392         },
393         [RTE_FLOW_ITEM_TYPE_UDP] = {
394                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
395                 .actions = valid_actions,
396                 .mask = &(const struct rte_flow_item_udp){
397                         .hdr = {
398                                 .src_port = -1,
399                                 .dst_port = -1,
400                         },
401                 },
402                 .default_mask = &rte_flow_item_udp_mask,
403                 .mask_sz = sizeof(struct rte_flow_item_udp),
404                 .convert = mlx5_flow_create_udp,
405                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
406         },
407         [RTE_FLOW_ITEM_TYPE_TCP] = {
408                 .actions = valid_actions,
409                 .mask = &(const struct rte_flow_item_tcp){
410                         .hdr = {
411                                 .src_port = -1,
412                                 .dst_port = -1,
413                         },
414                 },
415                 .default_mask = &rte_flow_item_tcp_mask,
416                 .mask_sz = sizeof(struct rte_flow_item_tcp),
417                 .convert = mlx5_flow_create_tcp,
418                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
419         },
420         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
421                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
422                 .actions = valid_actions,
423                 .mask = &(const struct rte_flow_item_vxlan){
424                         .vni = "\xff\xff\xff",
425                 },
426                 .default_mask = &rte_flow_item_vxlan_mask,
427                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
428                 .convert = mlx5_flow_create_vxlan,
429                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
430         },
431 };
432
433 /** Structure to pass to the conversion function. */
434 struct mlx5_flow_parse {
435         uint32_t inner; /**< Set once VXLAN is encountered. */
436         uint32_t create:1;
437         /**< Whether resources should remain after a validate. */
438         uint32_t drop:1; /**< Target is a drop queue. */
439         uint32_t mark:1; /**< Mark is present in the flow. */
440         uint32_t count:1; /**< Count is present in the flow. */
441         uint32_t mark_id; /**< Mark identifier. */
442         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
443         uint16_t queues_n; /**< Number of entries in queue[]. */
444         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
445         uint8_t rss_key[40]; /**< copy of the RSS key. */
446         enum hash_rxq_type layer; /**< Last pattern layer detected. */
447         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
448         struct {
449                 struct ibv_flow_attr *ibv_attr;
450                 /**< Pointer to Verbs attributes. */
451                 unsigned int offset;
452                 /**< Current position or total size of the attribute. */
453         } queue[RTE_DIM(hash_rxq_init)];
454 };
455
456 static const struct rte_flow_ops mlx5_flow_ops = {
457         .validate = mlx5_flow_validate,
458         .create = mlx5_flow_create,
459         .destroy = mlx5_flow_destroy,
460         .flush = mlx5_flow_flush,
461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
462         .query = mlx5_flow_query,
463 #else
464         .query = NULL,
465 #endif
466         .isolate = mlx5_flow_isolate,
467 };
468
469 /* Convert FDIR request to Generic flow. */
470 struct mlx5_fdir {
471         struct rte_flow_attr attr;
472         struct rte_flow_action actions[2];
473         struct rte_flow_item items[4];
474         struct rte_flow_item_eth l2;
475         struct rte_flow_item_eth l2_mask;
476         union {
477                 struct rte_flow_item_ipv4 ipv4;
478                 struct rte_flow_item_ipv6 ipv6;
479         } l3;
480         union {
481                 struct rte_flow_item_ipv4 ipv4;
482                 struct rte_flow_item_ipv6 ipv6;
483         } l3_mask;
484         union {
485                 struct rte_flow_item_udp udp;
486                 struct rte_flow_item_tcp tcp;
487         } l4;
488         union {
489                 struct rte_flow_item_udp udp;
490                 struct rte_flow_item_tcp tcp;
491         } l4_mask;
492         struct rte_flow_action_queue queue;
493 };
494
495 /* Verbs specification header. */
496 struct ibv_spec_header {
497         enum ibv_flow_spec_type type;
498         uint16_t size;
499 };
500
501 /**
502  * Check item is fully supported by the NIC matching capability.
503  *
504  * @param item[in]
505  *   Item specification.
506  * @param mask[in]
507  *   Bit-masks covering supported fields to compare with spec, last and mask in
508  *   \item.
509  * @param size
510  *   Bit-Mask size in bytes.
511  *
512  * @return
513  *   0 on success, a negative errno value otherwise and rte_errno is set.
514  */
515 static int
516 mlx5_flow_item_validate(const struct rte_flow_item *item,
517                         const uint8_t *mask, unsigned int size)
518 {
519         unsigned int i;
520         const uint8_t *spec = item->spec;
521         const uint8_t *last = item->last;
522         const uint8_t *m = item->mask ? item->mask : mask;
523
524         if (!spec && (item->mask || last))
525                 goto error;
526         if (!spec)
527                 return 0;
528         /*
529          * Single-pass check to make sure that:
530          * - item->mask is supported, no bits are set outside mask.
531          * - Both masked item->spec and item->last are equal (no range
532          *   supported).
533          */
534         for (i = 0; i < size; i++) {
535                 if (!m[i])
536                         continue;
537                 if ((m[i] | mask[i]) != mask[i])
538                         goto error;
539                 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
540                         goto error;
541         }
542         return 0;
543 error:
544         rte_errno = ENOTSUP;
545         return -rte_errno;
546 }
547
548 /**
549  * Copy the RSS configuration from the user ones, of the rss_conf is null,
550  * uses the driver one.
551  *
552  * @param parser
553  *   Internal parser structure.
554  * @param rss_conf
555  *   User RSS configuration to save.
556  *
557  * @return
558  *   0 on success, a negative errno value otherwise and rte_errno is set.
559  */
560 static int
561 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
562                            const struct rte_eth_rss_conf *rss_conf)
563 {
564         /*
565          * This function is also called at the beginning of
566          * mlx5_flow_convert_actions() to initialize the parser with the
567          * device default RSS configuration.
568          */
569         if (rss_conf) {
570                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
571                         rte_errno = EINVAL;
572                         return -rte_errno;
573                 }
574                 if (rss_conf->rss_key_len != 40) {
575                         rte_errno = EINVAL;
576                         return -rte_errno;
577                 }
578                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
579                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
580                         memcpy(parser->rss_key, rss_conf->rss_key,
581                                rss_conf->rss_key_len);
582                         parser->rss_conf.rss_key = parser->rss_key;
583                 }
584                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
585         }
586         return 0;
587 }
588
589 /**
590  * Extract attribute to the parser.
591  *
592  * @param[in] attr
593  *   Flow rule attributes.
594  * @param[out] error
595  *   Perform verbose error reporting if not NULL.
596  *
597  * @return
598  *   0 on success, a negative errno value otherwise and rte_errno is set.
599  */
600 static int
601 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
602                              struct rte_flow_error *error)
603 {
604         if (attr->group) {
605                 rte_flow_error_set(error, ENOTSUP,
606                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
607                                    NULL,
608                                    "groups are not supported");
609                 return -rte_errno;
610         }
611         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
612                 rte_flow_error_set(error, ENOTSUP,
613                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
614                                    NULL,
615                                    "priorities are not supported");
616                 return -rte_errno;
617         }
618         if (attr->egress) {
619                 rte_flow_error_set(error, ENOTSUP,
620                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
621                                    NULL,
622                                    "egress is not supported");
623                 return -rte_errno;
624         }
625         if (!attr->ingress) {
626                 rte_flow_error_set(error, ENOTSUP,
627                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
628                                    NULL,
629                                    "only ingress is supported");
630                 return -rte_errno;
631         }
632         return 0;
633 }
634
635 /**
636  * Extract actions request to the parser.
637  *
638  * @param dev
639  *   Pointer to Ethernet device.
640  * @param[in] actions
641  *   Associated actions (list terminated by the END action).
642  * @param[out] error
643  *   Perform verbose error reporting if not NULL.
644  * @param[in, out] parser
645  *   Internal parser structure.
646  *
647  * @return
648  *   0 on success, a negative errno value otherwise and rte_errno is set.
649  */
650 static int
651 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
652                           const struct rte_flow_action actions[],
653                           struct rte_flow_error *error,
654                           struct mlx5_flow_parse *parser)
655 {
656         struct priv *priv = dev->data->dev_private;
657         int ret;
658
659         /*
660          * Add default RSS configuration necessary for Verbs to create QP even
661          * if no RSS is necessary.
662          */
663         ret = mlx5_flow_convert_rss_conf(parser,
664                                          (const struct rte_eth_rss_conf *)
665                                          &priv->rss_conf);
666         if (ret)
667                 return ret;
668         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
669                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
670                         continue;
671                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
672                         parser->drop = 1;
673                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
674                         const struct rte_flow_action_queue *queue =
675                                 (const struct rte_flow_action_queue *)
676                                 actions->conf;
677                         uint16_t n;
678                         uint16_t found = 0;
679
680                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
681                                 goto exit_action_not_supported;
682                         for (n = 0; n < parser->queues_n; ++n) {
683                                 if (parser->queues[n] == queue->index) {
684                                         found = 1;
685                                         break;
686                                 }
687                         }
688                         if (parser->queues_n > 1 && !found) {
689                                 rte_flow_error_set(error, ENOTSUP,
690                                            RTE_FLOW_ERROR_TYPE_ACTION,
691                                            actions,
692                                            "queue action not in RSS queues");
693                                 return -rte_errno;
694                         }
695                         if (!found) {
696                                 parser->queues_n = 1;
697                                 parser->queues[0] = queue->index;
698                         }
699                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
700                         const struct rte_flow_action_rss *rss =
701                                 (const struct rte_flow_action_rss *)
702                                 actions->conf;
703                         uint16_t n;
704
705                         if (!rss || !rss->num) {
706                                 rte_flow_error_set(error, EINVAL,
707                                                    RTE_FLOW_ERROR_TYPE_ACTION,
708                                                    actions,
709                                                    "no valid queues");
710                                 return -rte_errno;
711                         }
712                         if (parser->queues_n == 1) {
713                                 uint16_t found = 0;
714
715                                 assert(parser->queues_n);
716                                 for (n = 0; n < rss->num; ++n) {
717                                         if (parser->queues[0] ==
718                                             rss->queue[n]) {
719                                                 found = 1;
720                                                 break;
721                                         }
722                                 }
723                                 if (!found) {
724                                         rte_flow_error_set(error, ENOTSUP,
725                                                    RTE_FLOW_ERROR_TYPE_ACTION,
726                                                    actions,
727                                                    "queue action not in RSS"
728                                                    " queues");
729                                         return -rte_errno;
730                                 }
731                         }
732                         if (rss->num > RTE_DIM(parser->queues)) {
733                                 rte_flow_error_set(error, EINVAL,
734                                                    RTE_FLOW_ERROR_TYPE_ACTION,
735                                                    actions,
736                                                    "too many queues for RSS"
737                                                    " context");
738                                 return -rte_errno;
739                         }
740                         for (n = 0; n < rss->num; ++n) {
741                                 if (rss->queue[n] >= priv->rxqs_n) {
742                                         rte_flow_error_set(error, EINVAL,
743                                                    RTE_FLOW_ERROR_TYPE_ACTION,
744                                                    actions,
745                                                    "queue id > number of"
746                                                    " queues");
747                                         return -rte_errno;
748                                 }
749                         }
750                         for (n = 0; n < rss->num; ++n)
751                                 parser->queues[n] = rss->queue[n];
752                         parser->queues_n = rss->num;
753                         if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
754                                 rte_flow_error_set(error, EINVAL,
755                                                    RTE_FLOW_ERROR_TYPE_ACTION,
756                                                    actions,
757                                                    "wrong RSS configuration");
758                                 return -rte_errno;
759                         }
760                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
761                         const struct rte_flow_action_mark *mark =
762                                 (const struct rte_flow_action_mark *)
763                                 actions->conf;
764
765                         if (!mark) {
766                                 rte_flow_error_set(error, EINVAL,
767                                                    RTE_FLOW_ERROR_TYPE_ACTION,
768                                                    actions,
769                                                    "mark must be defined");
770                                 return -rte_errno;
771                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
772                                 rte_flow_error_set(error, ENOTSUP,
773                                                    RTE_FLOW_ERROR_TYPE_ACTION,
774                                                    actions,
775                                                    "mark must be between 0"
776                                                    " and 16777199");
777                                 return -rte_errno;
778                         }
779                         parser->mark = 1;
780                         parser->mark_id = mark->id;
781                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
782                         parser->mark = 1;
783                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
784                            priv->counter_set_supported) {
785                         parser->count = 1;
786                 } else {
787                         goto exit_action_not_supported;
788                 }
789         }
790         if (parser->drop && parser->mark)
791                 parser->mark = 0;
792         if (!parser->queues_n && !parser->drop) {
793                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
794                                    NULL, "no valid action");
795                 return -rte_errno;
796         }
797         return 0;
798 exit_action_not_supported:
799         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
800                            actions, "action not supported");
801         return -rte_errno;
802 }
803
804 /**
805  * Validate items.
806  *
807  * @param[in] items
808  *   Pattern specification (list terminated by the END pattern item).
809  * @param[out] error
810  *   Perform verbose error reporting if not NULL.
811  * @param[in, out] parser
812  *   Internal parser structure.
813  *
814  * @return
815  *   0 on success, a negative errno value otherwise and rte_errno is set.
816  */
817 static int
818 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
819                                  struct rte_flow_error *error,
820                                  struct mlx5_flow_parse *parser)
821 {
822         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
823         unsigned int i;
824         int ret = 0;
825
826         /* Initialise the offsets to start after verbs attribute. */
827         for (i = 0; i != hash_rxq_init_n; ++i)
828                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
829         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
830                 const struct mlx5_flow_items *token = NULL;
831                 unsigned int n;
832
833                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
834                         continue;
835                 for (i = 0;
836                      cur_item->items &&
837                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
838                      ++i) {
839                         if (cur_item->items[i] == items->type) {
840                                 token = &mlx5_flow_items[items->type];
841                                 break;
842                         }
843                 }
844                 if (!token) {
845                         ret = -ENOTSUP;
846                         goto exit_item_not_supported;
847                 }
848                 cur_item = token;
849                 ret = mlx5_flow_item_validate(items,
850                                               (const uint8_t *)cur_item->mask,
851                                               cur_item->mask_sz);
852                 if (ret)
853                         goto exit_item_not_supported;
854                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
855                         if (parser->inner) {
856                                 rte_flow_error_set(error, ENOTSUP,
857                                                    RTE_FLOW_ERROR_TYPE_ITEM,
858                                                    items,
859                                                    "cannot recognize multiple"
860                                                    " VXLAN encapsulations");
861                                 return -rte_errno;
862                         }
863                         parser->inner = IBV_FLOW_SPEC_INNER;
864                 }
865                 if (parser->drop) {
866                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
867                 } else {
868                         for (n = 0; n != hash_rxq_init_n; ++n)
869                                 parser->queue[n].offset += cur_item->dst_sz;
870                 }
871         }
872         if (parser->drop) {
873                 parser->queue[HASH_RXQ_ETH].offset +=
874                         sizeof(struct ibv_flow_spec_action_drop);
875         }
876         if (parser->mark) {
877                 for (i = 0; i != hash_rxq_init_n; ++i)
878                         parser->queue[i].offset +=
879                                 sizeof(struct ibv_flow_spec_action_tag);
880         }
881         if (parser->count) {
882 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
883                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
884
885                 for (i = 0; i != hash_rxq_init_n; ++i)
886                         parser->queue[i].offset += size;
887 #else
888                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
889                                    items,
890                                    "Count action supported only on "
891                                    "MLNX_OFED_4.2 and above");
892 #endif
893         }
894         return 0;
895 exit_item_not_supported:
896         return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
897                                   items, "item not supported");
898 }
899
900 /**
901  * Allocate memory space to store verbs flow attributes.
902  *
903  * @param[in] size
904  *   Amount of byte to allocate.
905  * @param[out] error
906  *   Perform verbose error reporting if not NULL.
907  *
908  * @return
909  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
910  */
911 static struct ibv_flow_attr *
912 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
913 {
914         struct ibv_flow_attr *ibv_attr;
915
916         ibv_attr = rte_calloc(__func__, 1, size, 0);
917         if (!ibv_attr) {
918                 rte_flow_error_set(error, ENOMEM,
919                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
920                                    NULL,
921                                    "cannot allocate verbs spec attributes");
922                 return NULL;
923         }
924         return ibv_attr;
925 }
926
927 /**
928  * Make inner packet matching with an higher priority from the non Inner
929  * matching.
930  *
931  * @param[in, out] parser
932  *   Internal parser structure.
933  * @param attr
934  *   User flow attribute.
935  */
936 static void
937 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
938                           const struct rte_flow_attr *attr)
939 {
940         unsigned int i;
941
942         if (parser->drop) {
943                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
944                         attr->priority +
945                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
946                 return;
947         }
948         for (i = 0; i != hash_rxq_init_n; ++i) {
949                 if (parser->queue[i].ibv_attr) {
950                         parser->queue[i].ibv_attr->priority =
951                                 attr->priority +
952                                 hash_rxq_init[i].flow_priority -
953                                 (parser->inner ? 1 : 0);
954                 }
955         }
956 }
957
958 /**
959  * Finalise verbs flow attributes.
960  *
961  * @param[in, out] parser
962  *   Internal parser structure.
963  */
964 static void
965 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
966 {
967         const unsigned int ipv4 =
968                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
969         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
970         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
971         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
972         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
973         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
974         unsigned int i;
975
976         /* Remove any other flow not matching the pattern. */
977         if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
978                 for (i = 0; i != hash_rxq_init_n; ++i) {
979                         if (i == HASH_RXQ_ETH)
980                                 continue;
981                         rte_free(parser->queue[i].ibv_attr);
982                         parser->queue[i].ibv_attr = NULL;
983                 }
984                 return;
985         }
986         if (parser->layer == HASH_RXQ_ETH) {
987                 goto fill;
988         } else {
989                 /*
990                  * This layer becomes useless as the pattern define under
991                  * layers.
992                  */
993                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
994                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
995         }
996         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
997         for (i = ohmin; i != (ohmax + 1); ++i) {
998                 if (!parser->queue[i].ibv_attr)
999                         continue;
1000                 rte_free(parser->queue[i].ibv_attr);
1001                 parser->queue[i].ibv_attr = NULL;
1002         }
1003         /* Remove impossible flow according to the RSS configuration. */
1004         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
1005             parser->rss_conf.rss_hf) {
1006                 /* Remove any other flow. */
1007                 for (i = hmin; i != (hmax + 1); ++i) {
1008                         if ((i == parser->layer) ||
1009                              (!parser->queue[i].ibv_attr))
1010                                 continue;
1011                         rte_free(parser->queue[i].ibv_attr);
1012                         parser->queue[i].ibv_attr = NULL;
1013                 }
1014         } else  if (!parser->queue[ip].ibv_attr) {
1015                 /* no RSS possible with the current configuration. */
1016                 parser->queues_n = 1;
1017                 return;
1018         }
1019 fill:
1020         /*
1021          * Fill missing layers in verbs specifications, or compute the correct
1022          * offset to allocate the memory space for the attributes and
1023          * specifications.
1024          */
1025         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1026                 union {
1027                         struct ibv_flow_spec_ipv4_ext ipv4;
1028                         struct ibv_flow_spec_ipv6 ipv6;
1029                         struct ibv_flow_spec_tcp_udp udp_tcp;
1030                 } specs;
1031                 void *dst;
1032                 uint16_t size;
1033
1034                 if (i == parser->layer)
1035                         continue;
1036                 if (parser->layer == HASH_RXQ_ETH) {
1037                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1038                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1039                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1040                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
1041                                         .size = size,
1042                                 };
1043                         } else {
1044                                 size = sizeof(struct ibv_flow_spec_ipv6);
1045                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1046                                         .type = IBV_FLOW_SPEC_IPV6,
1047                                         .size = size,
1048                                 };
1049                         }
1050                         if (parser->queue[i].ibv_attr) {
1051                                 dst = (void *)((uintptr_t)
1052                                                parser->queue[i].ibv_attr +
1053                                                parser->queue[i].offset);
1054                                 memcpy(dst, &specs, size);
1055                                 ++parser->queue[i].ibv_attr->num_of_specs;
1056                         }
1057                         parser->queue[i].offset += size;
1058                 }
1059                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1060                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1061                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1062                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1063                                 .type = ((i == HASH_RXQ_UDPV4 ||
1064                                           i == HASH_RXQ_UDPV6) ?
1065                                          IBV_FLOW_SPEC_UDP :
1066                                          IBV_FLOW_SPEC_TCP),
1067                                 .size = size,
1068                         };
1069                         if (parser->queue[i].ibv_attr) {
1070                                 dst = (void *)((uintptr_t)
1071                                                parser->queue[i].ibv_attr +
1072                                                parser->queue[i].offset);
1073                                 memcpy(dst, &specs, size);
1074                                 ++parser->queue[i].ibv_attr->num_of_specs;
1075                         }
1076                         parser->queue[i].offset += size;
1077                 }
1078         }
1079 }
1080
1081 /**
1082  * Validate and convert a flow supported by the NIC.
1083  *
1084  * @param dev
1085  *   Pointer to Ethernet device.
1086  * @param[in] attr
1087  *   Flow rule attributes.
1088  * @param[in] pattern
1089  *   Pattern specification (list terminated by the END pattern item).
1090  * @param[in] actions
1091  *   Associated actions (list terminated by the END action).
1092  * @param[out] error
1093  *   Perform verbose error reporting if not NULL.
1094  * @param[in, out] parser
1095  *   Internal parser structure.
1096  *
1097  * @return
1098  *   0 on success, a negative errno value otherwise and rte_errno is set.
1099  */
1100 static int
1101 mlx5_flow_convert(struct rte_eth_dev *dev,
1102                   const struct rte_flow_attr *attr,
1103                   const struct rte_flow_item items[],
1104                   const struct rte_flow_action actions[],
1105                   struct rte_flow_error *error,
1106                   struct mlx5_flow_parse *parser)
1107 {
1108         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1109         unsigned int i;
1110         int ret;
1111
1112         /* First step. Validate the attributes, items and actions. */
1113         *parser = (struct mlx5_flow_parse){
1114                 .create = parser->create,
1115                 .layer = HASH_RXQ_ETH,
1116                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1117         };
1118         ret = mlx5_flow_convert_attributes(attr, error);
1119         if (ret)
1120                 return ret;
1121         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1122         if (ret)
1123                 return ret;
1124         ret = mlx5_flow_convert_items_validate(items, error, parser);
1125         if (ret)
1126                 return ret;
1127         mlx5_flow_convert_finalise(parser);
1128         /*
1129          * Second step.
1130          * Allocate the memory space to store verbs specifications.
1131          */
1132         if (parser->drop) {
1133                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1134
1135                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1136                         mlx5_flow_convert_allocate(offset, error);
1137                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1138                         goto exit_enomem;
1139                 parser->queue[HASH_RXQ_ETH].offset =
1140                         sizeof(struct ibv_flow_attr);
1141         } else {
1142                 for (i = 0; i != hash_rxq_init_n; ++i) {
1143                         unsigned int offset;
1144
1145                         if (!(parser->rss_conf.rss_hf &
1146                               hash_rxq_init[i].dpdk_rss_hf) &&
1147                             (i != HASH_RXQ_ETH))
1148                                 continue;
1149                         offset = parser->queue[i].offset;
1150                         parser->queue[i].ibv_attr =
1151                                 mlx5_flow_convert_allocate(offset, error);
1152                         if (!parser->queue[i].ibv_attr)
1153                                 goto exit_enomem;
1154                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1155                 }
1156         }
1157         /* Third step. Conversion parse, fill the specifications. */
1158         parser->inner = 0;
1159         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1160                 struct mlx5_flow_data data = {
1161                         .parser = parser,
1162                         .error = error,
1163                 };
1164
1165                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1166                         continue;
1167                 cur_item = &mlx5_flow_items[items->type];
1168                 ret = cur_item->convert(items,
1169                                         (cur_item->default_mask ?
1170                                          cur_item->default_mask :
1171                                          cur_item->mask),
1172                                          &data);
1173                 if (ret)
1174                         goto exit_free;
1175         }
1176         if (parser->mark)
1177                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1178         if (parser->count && parser->create) {
1179                 mlx5_flow_create_count(dev, parser);
1180                 if (!parser->cs)
1181                         goto exit_count_error;
1182         }
1183         /*
1184          * Last step. Complete missing specification to reach the RSS
1185          * configuration.
1186          */
1187         if (!parser->drop)
1188                 mlx5_flow_convert_finalise(parser);
1189         mlx5_flow_update_priority(parser, attr);
1190 exit_free:
1191         /* Only verification is expected, all resources should be released. */
1192         if (!parser->create) {
1193                 for (i = 0; i != hash_rxq_init_n; ++i) {
1194                         if (parser->queue[i].ibv_attr) {
1195                                 rte_free(parser->queue[i].ibv_attr);
1196                                 parser->queue[i].ibv_attr = NULL;
1197                         }
1198                 }
1199         }
1200         return ret;
1201 exit_enomem:
1202         for (i = 0; i != hash_rxq_init_n; ++i) {
1203                 if (parser->queue[i].ibv_attr) {
1204                         rte_free(parser->queue[i].ibv_attr);
1205                         parser->queue[i].ibv_attr = NULL;
1206                 }
1207         }
1208         rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1209                            NULL, "cannot allocate verbs spec attributes");
1210         return -rte_errno;
1211 exit_count_error:
1212         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1213                            NULL, "cannot create counter");
1214         return -rte_errno;
1215 }
1216
1217 /**
1218  * Copy the specification created into the flow.
1219  *
1220  * @param parser
1221  *   Internal parser structure.
1222  * @param src
1223  *   Create specification.
1224  * @param size
1225  *   Size in bytes of the specification to copy.
1226  */
1227 static void
1228 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1229                       unsigned int size)
1230 {
1231         unsigned int i;
1232         void *dst;
1233
1234         for (i = 0; i != hash_rxq_init_n; ++i) {
1235                 if (!parser->queue[i].ibv_attr)
1236                         continue;
1237                 /* Specification must be the same l3 type or none. */
1238                 if (parser->layer == HASH_RXQ_ETH ||
1239                     (hash_rxq_init[parser->layer].ip_version ==
1240                      hash_rxq_init[i].ip_version) ||
1241                     (hash_rxq_init[i].ip_version == 0)) {
1242                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1243                                         parser->queue[i].offset);
1244                         memcpy(dst, src, size);
1245                         ++parser->queue[i].ibv_attr->num_of_specs;
1246                         parser->queue[i].offset += size;
1247                 }
1248         }
1249 }
1250
1251 /**
1252  * Convert Ethernet item to Verbs specification.
1253  *
1254  * @param item[in]
1255  *   Item specification.
1256  * @param default_mask[in]
1257  *   Default bit-masks to use when item->mask is not provided.
1258  * @param data[in, out]
1259  *   User structure.
1260  *
1261  * @return
1262  *   0 on success, a negative errno value otherwise and rte_errno is set.
1263  */
1264 static int
1265 mlx5_flow_create_eth(const struct rte_flow_item *item,
1266                      const void *default_mask,
1267                      struct mlx5_flow_data *data)
1268 {
1269         const struct rte_flow_item_eth *spec = item->spec;
1270         const struct rte_flow_item_eth *mask = item->mask;
1271         struct mlx5_flow_parse *parser = data->parser;
1272         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1273         struct ibv_flow_spec_eth eth = {
1274                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1275                 .size = eth_size,
1276         };
1277
1278         /* Don't update layer for the inner pattern. */
1279         if (!parser->inner)
1280                 parser->layer = HASH_RXQ_ETH;
1281         if (spec) {
1282                 unsigned int i;
1283
1284                 if (!mask)
1285                         mask = default_mask;
1286                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1287                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1288                 eth.val.ether_type = spec->type;
1289                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1290                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1291                 eth.mask.ether_type = mask->type;
1292                 /* Remove unwanted bits from values. */
1293                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1294                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1295                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1296                 }
1297                 eth.val.ether_type &= eth.mask.ether_type;
1298         }
1299         mlx5_flow_create_copy(parser, &eth, eth_size);
1300         return 0;
1301 }
1302
1303 /**
1304  * Convert VLAN item to Verbs specification.
1305  *
1306  * @param item[in]
1307  *   Item specification.
1308  * @param default_mask[in]
1309  *   Default bit-masks to use when item->mask is not provided.
1310  * @param data[in, out]
1311  *   User structure.
1312  *
1313  * @return
1314  *   0 on success, a negative errno value otherwise and rte_errno is set.
1315  */
1316 static int
1317 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1318                       const void *default_mask,
1319                       struct mlx5_flow_data *data)
1320 {
1321         const struct rte_flow_item_vlan *spec = item->spec;
1322         const struct rte_flow_item_vlan *mask = item->mask;
1323         struct mlx5_flow_parse *parser = data->parser;
1324         struct ibv_flow_spec_eth *eth;
1325         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1326
1327         if (spec) {
1328                 unsigned int i;
1329                 if (!mask)
1330                         mask = default_mask;
1331
1332                 for (i = 0; i != hash_rxq_init_n; ++i) {
1333                         if (!parser->queue[i].ibv_attr)
1334                                 continue;
1335
1336                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1337                                        parser->queue[i].offset - eth_size);
1338                         eth->val.vlan_tag = spec->tci;
1339                         eth->mask.vlan_tag = mask->tci;
1340                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1341                         /*
1342                          * From verbs perspective an empty VLAN is equivalent
1343                          * to a packet without VLAN layer.
1344                          */
1345                         if (!eth->mask.vlan_tag)
1346                                 goto error;
1347                 }
1348                 return 0;
1349         }
1350 error:
1351         return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1352                                   item, "VLAN cannot be empty");
1353 }
1354
1355 /**
1356  * Convert IPv4 item to Verbs specification.
1357  *
1358  * @param item[in]
1359  *   Item specification.
1360  * @param default_mask[in]
1361  *   Default bit-masks to use when item->mask is not provided.
1362  * @param data[in, out]
1363  *   User structure.
1364  *
1365  * @return
1366  *   0 on success, a negative errno value otherwise and rte_errno is set.
1367  */
1368 static int
1369 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1370                       const void *default_mask,
1371                       struct mlx5_flow_data *data)
1372 {
1373         const struct rte_flow_item_ipv4 *spec = item->spec;
1374         const struct rte_flow_item_ipv4 *mask = item->mask;
1375         struct mlx5_flow_parse *parser = data->parser;
1376         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1377         struct ibv_flow_spec_ipv4_ext ipv4 = {
1378                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1379                 .size = ipv4_size,
1380         };
1381
1382         /* Don't update layer for the inner pattern. */
1383         if (!parser->inner)
1384                 parser->layer = HASH_RXQ_IPV4;
1385         if (spec) {
1386                 if (!mask)
1387                         mask = default_mask;
1388                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1389                         .src_ip = spec->hdr.src_addr,
1390                         .dst_ip = spec->hdr.dst_addr,
1391                         .proto = spec->hdr.next_proto_id,
1392                         .tos = spec->hdr.type_of_service,
1393                 };
1394                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1395                         .src_ip = mask->hdr.src_addr,
1396                         .dst_ip = mask->hdr.dst_addr,
1397                         .proto = mask->hdr.next_proto_id,
1398                         .tos = mask->hdr.type_of_service,
1399                 };
1400                 /* Remove unwanted bits from values. */
1401                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1402                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1403                 ipv4.val.proto &= ipv4.mask.proto;
1404                 ipv4.val.tos &= ipv4.mask.tos;
1405         }
1406         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1407         return 0;
1408 }
1409
1410 /**
1411  * Convert IPv6 item to Verbs specification.
1412  *
1413  * @param item[in]
1414  *   Item specification.
1415  * @param default_mask[in]
1416  *   Default bit-masks to use when item->mask is not provided.
1417  * @param data[in, out]
1418  *   User structure.
1419  *
1420  * @return
1421  *   0 on success, a negative errno value otherwise and rte_errno is set.
1422  */
1423 static int
1424 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1425                       const void *default_mask,
1426                       struct mlx5_flow_data *data)
1427 {
1428         const struct rte_flow_item_ipv6 *spec = item->spec;
1429         const struct rte_flow_item_ipv6 *mask = item->mask;
1430         struct mlx5_flow_parse *parser = data->parser;
1431         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1432         struct ibv_flow_spec_ipv6 ipv6 = {
1433                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1434                 .size = ipv6_size,
1435         };
1436
1437         /* Don't update layer for the inner pattern. */
1438         if (!parser->inner)
1439                 parser->layer = HASH_RXQ_IPV6;
1440         if (spec) {
1441                 unsigned int i;
1442                 uint32_t vtc_flow_val;
1443                 uint32_t vtc_flow_mask;
1444
1445                 if (!mask)
1446                         mask = default_mask;
1447                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1448                        RTE_DIM(ipv6.val.src_ip));
1449                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1450                        RTE_DIM(ipv6.val.dst_ip));
1451                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1452                        RTE_DIM(ipv6.mask.src_ip));
1453                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1454                        RTE_DIM(ipv6.mask.dst_ip));
1455                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1456                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1457                 ipv6.val.flow_label =
1458                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1459                                          IPV6_HDR_FL_SHIFT);
1460                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1461                                          IPV6_HDR_TC_SHIFT;
1462                 ipv6.val.next_hdr = spec->hdr.proto;
1463                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1464                 ipv6.mask.flow_label =
1465                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1466                                          IPV6_HDR_FL_SHIFT);
1467                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1468                                           IPV6_HDR_TC_SHIFT;
1469                 ipv6.mask.next_hdr = mask->hdr.proto;
1470                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1471                 /* Remove unwanted bits from values. */
1472                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1473                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1474                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1475                 }
1476                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1477                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1478                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1479                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1480         }
1481         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1482         return 0;
1483 }
1484
1485 /**
1486  * Convert UDP item to Verbs specification.
1487  *
1488  * @param item[in]
1489  *   Item specification.
1490  * @param default_mask[in]
1491  *   Default bit-masks to use when item->mask is not provided.
1492  * @param data[in, out]
1493  *   User structure.
1494  *
1495  * @return
1496  *   0 on success, a negative errno value otherwise and rte_errno is set.
1497  */
1498 static int
1499 mlx5_flow_create_udp(const struct rte_flow_item *item,
1500                      const void *default_mask,
1501                      struct mlx5_flow_data *data)
1502 {
1503         const struct rte_flow_item_udp *spec = item->spec;
1504         const struct rte_flow_item_udp *mask = item->mask;
1505         struct mlx5_flow_parse *parser = data->parser;
1506         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1507         struct ibv_flow_spec_tcp_udp udp = {
1508                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1509                 .size = udp_size,
1510         };
1511
1512         /* Don't update layer for the inner pattern. */
1513         if (!parser->inner) {
1514                 if (parser->layer == HASH_RXQ_IPV4)
1515                         parser->layer = HASH_RXQ_UDPV4;
1516                 else
1517                         parser->layer = HASH_RXQ_UDPV6;
1518         }
1519         if (spec) {
1520                 if (!mask)
1521                         mask = default_mask;
1522                 udp.val.dst_port = spec->hdr.dst_port;
1523                 udp.val.src_port = spec->hdr.src_port;
1524                 udp.mask.dst_port = mask->hdr.dst_port;
1525                 udp.mask.src_port = mask->hdr.src_port;
1526                 /* Remove unwanted bits from values. */
1527                 udp.val.src_port &= udp.mask.src_port;
1528                 udp.val.dst_port &= udp.mask.dst_port;
1529         }
1530         mlx5_flow_create_copy(parser, &udp, udp_size);
1531         return 0;
1532 }
1533
1534 /**
1535  * Convert TCP item to Verbs specification.
1536  *
1537  * @param item[in]
1538  *   Item specification.
1539  * @param default_mask[in]
1540  *   Default bit-masks to use when item->mask is not provided.
1541  * @param data[in, out]
1542  *   User structure.
1543  *
1544  * @return
1545  *   0 on success, a negative errno value otherwise and rte_errno is set.
1546  */
1547 static int
1548 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1549                      const void *default_mask,
1550                      struct mlx5_flow_data *data)
1551 {
1552         const struct rte_flow_item_tcp *spec = item->spec;
1553         const struct rte_flow_item_tcp *mask = item->mask;
1554         struct mlx5_flow_parse *parser = data->parser;
1555         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1556         struct ibv_flow_spec_tcp_udp tcp = {
1557                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1558                 .size = tcp_size,
1559         };
1560
1561         /* Don't update layer for the inner pattern. */
1562         if (!parser->inner) {
1563                 if (parser->layer == HASH_RXQ_IPV4)
1564                         parser->layer = HASH_RXQ_TCPV4;
1565                 else
1566                         parser->layer = HASH_RXQ_TCPV6;
1567         }
1568         if (spec) {
1569                 if (!mask)
1570                         mask = default_mask;
1571                 tcp.val.dst_port = spec->hdr.dst_port;
1572                 tcp.val.src_port = spec->hdr.src_port;
1573                 tcp.mask.dst_port = mask->hdr.dst_port;
1574                 tcp.mask.src_port = mask->hdr.src_port;
1575                 /* Remove unwanted bits from values. */
1576                 tcp.val.src_port &= tcp.mask.src_port;
1577                 tcp.val.dst_port &= tcp.mask.dst_port;
1578         }
1579         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1580         return 0;
1581 }
1582
1583 /**
1584  * Convert VXLAN item to Verbs specification.
1585  *
1586  * @param item[in]
1587  *   Item specification.
1588  * @param default_mask[in]
1589  *   Default bit-masks to use when item->mask is not provided.
1590  * @param data[in, out]
1591  *   User structure.
1592  *
1593  * @return
1594  *   0 on success, a negative errno value otherwise and rte_errno is set.
1595  */
1596 static int
1597 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1598                        const void *default_mask,
1599                        struct mlx5_flow_data *data)
1600 {
1601         const struct rte_flow_item_vxlan *spec = item->spec;
1602         const struct rte_flow_item_vxlan *mask = item->mask;
1603         struct mlx5_flow_parse *parser = data->parser;
1604         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1605         struct ibv_flow_spec_tunnel vxlan = {
1606                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1607                 .size = size,
1608         };
1609         union vni {
1610                 uint32_t vlan_id;
1611                 uint8_t vni[4];
1612         } id;
1613
1614         id.vni[0] = 0;
1615         parser->inner = IBV_FLOW_SPEC_INNER;
1616         if (spec) {
1617                 if (!mask)
1618                         mask = default_mask;
1619                 memcpy(&id.vni[1], spec->vni, 3);
1620                 vxlan.val.tunnel_id = id.vlan_id;
1621                 memcpy(&id.vni[1], mask->vni, 3);
1622                 vxlan.mask.tunnel_id = id.vlan_id;
1623                 /* Remove unwanted bits from values. */
1624                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1625         }
1626         /*
1627          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1628          * layer is defined in the Verbs specification it is interpreted as
1629          * wildcard and all packets will match this rule, if it follows a full
1630          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1631          * before will also match this rule.
1632          * To avoid such situation, VNI 0 is currently refused.
1633          */
1634         if (!vxlan.val.tunnel_id)
1635                 return rte_flow_error_set(data->error, EINVAL,
1636                                           RTE_FLOW_ERROR_TYPE_ITEM,
1637                                           item,
1638                                           "VxLAN vni cannot be 0");
1639         mlx5_flow_create_copy(parser, &vxlan, size);
1640         return 0;
1641 }
1642
1643 /**
1644  * Convert mark/flag action to Verbs specification.
1645  *
1646  * @param parser
1647  *   Internal parser structure.
1648  * @param mark_id
1649  *   Mark identifier.
1650  *
1651  * @return
1652  *   0 on success, a negative errno value otherwise and rte_errno is set.
1653  */
1654 static int
1655 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1656 {
1657         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1658         struct ibv_flow_spec_action_tag tag = {
1659                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1660                 .size = size,
1661                 .tag_id = mlx5_flow_mark_set(mark_id),
1662         };
1663
1664         assert(parser->mark);
1665         mlx5_flow_create_copy(parser, &tag, size);
1666         return 0;
1667 }
1668
1669 /**
1670  * Convert count action to Verbs specification.
1671  *
1672  * @param dev
1673  *   Pointer to Ethernet device.
1674  * @param parser
1675  *   Pointer to MLX5 flow parser structure.
1676  *
1677  * @return
1678  *   0 on success, a negative errno value otherwise and rte_errno is set.
1679  */
1680 static int
1681 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1682                        struct mlx5_flow_parse *parser __rte_unused)
1683 {
1684 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1685         struct priv *priv = dev->data->dev_private;
1686         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1687         struct ibv_counter_set_init_attr init_attr = {0};
1688         struct ibv_flow_spec_counter_action counter = {
1689                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1690                 .size = size,
1691                 .counter_set_handle = 0,
1692         };
1693
1694         init_attr.counter_set_id = 0;
1695         parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1696         if (!parser->cs) {
1697                 rte_errno = EINVAL;
1698                 return -rte_errno;
1699         }
1700         counter.counter_set_handle = parser->cs->handle;
1701         mlx5_flow_create_copy(parser, &counter, size);
1702 #endif
1703         return 0;
1704 }
1705
1706 /**
1707  * Complete flow rule creation with a drop queue.
1708  *
1709  * @param dev
1710  *   Pointer to Ethernet device.
1711  * @param parser
1712  *   Internal parser structure.
1713  * @param flow
1714  *   Pointer to the rte_flow.
1715  * @param[out] error
1716  *   Perform verbose error reporting if not NULL.
1717  *
1718  * @return
1719  *   0 on success, a negative errno value otherwise and rte_errno is set.
1720  */
1721 static int
1722 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1723                                    struct mlx5_flow_parse *parser,
1724                                    struct rte_flow *flow,
1725                                    struct rte_flow_error *error)
1726 {
1727         struct priv *priv = dev->data->dev_private;
1728         struct ibv_flow_spec_action_drop *drop;
1729         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1730
1731         assert(priv->pd);
1732         assert(priv->ctx);
1733         flow->drop = 1;
1734         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1735                         parser->queue[HASH_RXQ_ETH].offset);
1736         *drop = (struct ibv_flow_spec_action_drop){
1737                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1738                         .size = size,
1739         };
1740         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1741         parser->queue[HASH_RXQ_ETH].offset += size;
1742         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1743                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1744         if (parser->count)
1745                 flow->cs = parser->cs;
1746         if (!dev->data->dev_started)
1747                 return 0;
1748         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1749         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1750                 ibv_create_flow(priv->flow_drop_queue->qp,
1751                                 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1752         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1753                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1754                                    NULL, "flow rule creation failure");
1755                 goto error;
1756         }
1757         return 0;
1758 error:
1759         assert(flow);
1760         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1761                 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1762                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1763         }
1764         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1765                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1766                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1767         }
1768         if (flow->cs) {
1769                 claim_zero(ibv_destroy_counter_set(flow->cs));
1770                 flow->cs = NULL;
1771                 parser->cs = NULL;
1772         }
1773         return -rte_errno;
1774 }
1775
1776 /**
1777  * Create hash Rx queues when RSS is enabled.
1778  *
1779  * @param dev
1780  *   Pointer to Ethernet device.
1781  * @param parser
1782  *   Internal parser structure.
1783  * @param flow
1784  *   Pointer to the rte_flow.
1785  * @param[out] error
1786  *   Perform verbose error reporting if not NULL.
1787  *
1788  * @return
1789  *   0 on success, a negative errno value otherwise and rte_errno is set.
1790  */
1791 static int
1792 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1793                                   struct mlx5_flow_parse *parser,
1794                                   struct rte_flow *flow,
1795                                   struct rte_flow_error *error)
1796 {
1797         unsigned int i;
1798
1799         for (i = 0; i != hash_rxq_init_n; ++i) {
1800                 uint64_t hash_fields;
1801
1802                 if (!parser->queue[i].ibv_attr)
1803                         continue;
1804                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1805                 parser->queue[i].ibv_attr = NULL;
1806                 hash_fields = hash_rxq_init[i].hash_fields;
1807                 if (!dev->data->dev_started)
1808                         continue;
1809                 flow->frxq[i].hrxq =
1810                         mlx5_hrxq_get(dev,
1811                                       parser->rss_conf.rss_key,
1812                                       parser->rss_conf.rss_key_len,
1813                                       hash_fields,
1814                                       parser->queues,
1815                                       parser->queues_n);
1816                 if (flow->frxq[i].hrxq)
1817                         continue;
1818                 flow->frxq[i].hrxq =
1819                         mlx5_hrxq_new(dev,
1820                                       parser->rss_conf.rss_key,
1821                                       parser->rss_conf.rss_key_len,
1822                                       hash_fields,
1823                                       parser->queues,
1824                                       parser->queues_n);
1825                 if (!flow->frxq[i].hrxq) {
1826                         return rte_flow_error_set(error, ENOMEM,
1827                                                   RTE_FLOW_ERROR_TYPE_HANDLE,
1828                                                   NULL,
1829                                                   "cannot create hash rxq");
1830                 }
1831         }
1832         return 0;
1833 }
1834
1835 /**
1836  * Complete flow rule creation.
1837  *
1838  * @param dev
1839  *   Pointer to Ethernet device.
1840  * @param parser
1841  *   Internal parser structure.
1842  * @param flow
1843  *   Pointer to the rte_flow.
1844  * @param[out] error
1845  *   Perform verbose error reporting if not NULL.
1846  *
1847  * @return
1848  *   0 on success, a negative errno value otherwise and rte_errno is set.
1849  */
1850 static int
1851 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1852                               struct mlx5_flow_parse *parser,
1853                               struct rte_flow *flow,
1854                               struct rte_flow_error *error)
1855 {
1856         struct priv *priv __rte_unused = dev->data->dev_private;
1857         int ret;
1858         unsigned int i;
1859         unsigned int flows_n = 0;
1860
1861         assert(priv->pd);
1862         assert(priv->ctx);
1863         assert(!parser->drop);
1864         ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1865         if (ret)
1866                 goto error;
1867         if (parser->count)
1868                 flow->cs = parser->cs;
1869         if (!dev->data->dev_started)
1870                 return 0;
1871         for (i = 0; i != hash_rxq_init_n; ++i) {
1872                 if (!flow->frxq[i].hrxq)
1873                         continue;
1874                 flow->frxq[i].ibv_flow =
1875                         ibv_create_flow(flow->frxq[i].hrxq->qp,
1876                                         flow->frxq[i].ibv_attr);
1877                 if (!flow->frxq[i].ibv_flow) {
1878                         rte_flow_error_set(error, ENOMEM,
1879                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1880                                            NULL, "flow rule creation failure");
1881                         goto error;
1882                 }
1883                 ++flows_n;
1884                 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1885                         dev->data->port_id,
1886                         (void *)flow, i,
1887                         (void *)flow->frxq[i].hrxq,
1888                         (void *)flow->frxq[i].ibv_flow);
1889         }
1890         if (!flows_n) {
1891                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1892                                    NULL, "internal error in flow creation");
1893                 goto error;
1894         }
1895         for (i = 0; i != parser->queues_n; ++i) {
1896                 struct mlx5_rxq_data *q =
1897                         (*priv->rxqs)[parser->queues[i]];
1898
1899                 q->mark |= parser->mark;
1900         }
1901         return 0;
1902 error:
1903         ret = rte_errno; /* Save rte_errno before cleanup. */
1904         assert(flow);
1905         for (i = 0; i != hash_rxq_init_n; ++i) {
1906                 if (flow->frxq[i].ibv_flow) {
1907                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1908
1909                         claim_zero(ibv_destroy_flow(ibv_flow));
1910                 }
1911                 if (flow->frxq[i].hrxq)
1912                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1913                 if (flow->frxq[i].ibv_attr)
1914                         rte_free(flow->frxq[i].ibv_attr);
1915         }
1916         if (flow->cs) {
1917                 claim_zero(ibv_destroy_counter_set(flow->cs));
1918                 flow->cs = NULL;
1919                 parser->cs = NULL;
1920         }
1921         rte_errno = ret; /* Restore rte_errno. */
1922         return -rte_errno;
1923 }
1924
1925 /**
1926  * Convert a flow.
1927  *
1928  * @param dev
1929  *   Pointer to Ethernet device.
1930  * @param list
1931  *   Pointer to a TAILQ flow list.
1932  * @param[in] attr
1933  *   Flow rule attributes.
1934  * @param[in] pattern
1935  *   Pattern specification (list terminated by the END pattern item).
1936  * @param[in] actions
1937  *   Associated actions (list terminated by the END action).
1938  * @param[out] error
1939  *   Perform verbose error reporting if not NULL.
1940  *
1941  * @return
1942  *   A flow on success, NULL otherwise and rte_errno is set.
1943  */
1944 static struct rte_flow *
1945 mlx5_flow_list_create(struct rte_eth_dev *dev,
1946                       struct mlx5_flows *list,
1947                       const struct rte_flow_attr *attr,
1948                       const struct rte_flow_item items[],
1949                       const struct rte_flow_action actions[],
1950                       struct rte_flow_error *error)
1951 {
1952         struct mlx5_flow_parse parser = { .create = 1, };
1953         struct rte_flow *flow = NULL;
1954         unsigned int i;
1955         int ret;
1956
1957         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1958         if (ret)
1959                 goto exit;
1960         flow = rte_calloc(__func__, 1,
1961                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1962                           0);
1963         if (!flow) {
1964                 rte_flow_error_set(error, ENOMEM,
1965                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1966                                    NULL,
1967                                    "cannot allocate flow memory");
1968                 return NULL;
1969         }
1970         /* Copy queues configuration. */
1971         flow->queues = (uint16_t (*)[])(flow + 1);
1972         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1973         flow->queues_n = parser.queues_n;
1974         flow->mark = parser.mark;
1975         /* Copy RSS configuration. */
1976         flow->rss_conf = parser.rss_conf;
1977         flow->rss_conf.rss_key = flow->rss_key;
1978         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1979         /* finalise the flow. */
1980         if (parser.drop)
1981                 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1982                                                          error);
1983         else
1984                 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1985         if (ret)
1986                 goto exit;
1987         TAILQ_INSERT_TAIL(list, flow, next);
1988         DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1989                 (void *)flow);
1990         return flow;
1991 exit:
1992         for (i = 0; i != hash_rxq_init_n; ++i) {
1993                 if (parser.queue[i].ibv_attr)
1994                         rte_free(parser.queue[i].ibv_attr);
1995         }
1996         rte_free(flow);
1997         return NULL;
1998 }
1999
2000 /**
2001  * Validate a flow supported by the NIC.
2002  *
2003  * @see rte_flow_validate()
2004  * @see rte_flow_ops
2005  */
2006 int
2007 mlx5_flow_validate(struct rte_eth_dev *dev,
2008                    const struct rte_flow_attr *attr,
2009                    const struct rte_flow_item items[],
2010                    const struct rte_flow_action actions[],
2011                    struct rte_flow_error *error)
2012 {
2013         struct mlx5_flow_parse parser = { .create = 0, };
2014
2015         return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2016 }
2017
2018 /**
2019  * Create a flow.
2020  *
2021  * @see rte_flow_create()
2022  * @see rte_flow_ops
2023  */
2024 struct rte_flow *
2025 mlx5_flow_create(struct rte_eth_dev *dev,
2026                  const struct rte_flow_attr *attr,
2027                  const struct rte_flow_item items[],
2028                  const struct rte_flow_action actions[],
2029                  struct rte_flow_error *error)
2030 {
2031         struct priv *priv = dev->data->dev_private;
2032
2033         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2034                                      error);
2035 }
2036
2037 /**
2038  * Destroy a flow in a list.
2039  *
2040  * @param dev
2041  *   Pointer to Ethernet device.
2042  * @param list
2043  *   Pointer to a TAILQ flow list.
2044  * @param[in] flow
2045  *   Flow to destroy.
2046  */
2047 static void
2048 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2049                        struct rte_flow *flow)
2050 {
2051         struct priv *priv = dev->data->dev_private;
2052         unsigned int i;
2053
2054         if (flow->drop || !flow->mark)
2055                 goto free;
2056         for (i = 0; i != flow->queues_n; ++i) {
2057                 struct rte_flow *tmp;
2058                 int mark = 0;
2059
2060                 /*
2061                  * To remove the mark from the queue, the queue must not be
2062                  * present in any other marked flow (RSS or not).
2063                  */
2064                 TAILQ_FOREACH(tmp, list, next) {
2065                         unsigned int j;
2066                         uint16_t *tqs = NULL;
2067                         uint16_t tq_n = 0;
2068
2069                         if (!tmp->mark)
2070                                 continue;
2071                         for (j = 0; j != hash_rxq_init_n; ++j) {
2072                                 if (!tmp->frxq[j].hrxq)
2073                                         continue;
2074                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2075                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2076                         }
2077                         if (!tq_n)
2078                                 continue;
2079                         for (j = 0; (j != tq_n) && !mark; j++)
2080                                 if (tqs[j] == (*flow->queues)[i])
2081                                         mark = 1;
2082                 }
2083                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2084         }
2085 free:
2086         if (flow->drop) {
2087                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2088                         claim_zero(ibv_destroy_flow
2089                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2090                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2091         } else {
2092                 for (i = 0; i != hash_rxq_init_n; ++i) {
2093                         struct mlx5_flow *frxq = &flow->frxq[i];
2094
2095                         if (frxq->ibv_flow)
2096                                 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2097                         if (frxq->hrxq)
2098                                 mlx5_hrxq_release(dev, frxq->hrxq);
2099                         if (frxq->ibv_attr)
2100                                 rte_free(frxq->ibv_attr);
2101                 }
2102         }
2103         if (flow->cs) {
2104                 claim_zero(ibv_destroy_counter_set(flow->cs));
2105                 flow->cs = NULL;
2106         }
2107         TAILQ_REMOVE(list, flow, next);
2108         DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2109                 (void *)flow);
2110         rte_free(flow);
2111 }
2112
2113 /**
2114  * Destroy all flows.
2115  *
2116  * @param dev
2117  *   Pointer to Ethernet device.
2118  * @param list
2119  *   Pointer to a TAILQ flow list.
2120  */
2121 void
2122 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2123 {
2124         while (!TAILQ_EMPTY(list)) {
2125                 struct rte_flow *flow;
2126
2127                 flow = TAILQ_FIRST(list);
2128                 mlx5_flow_list_destroy(dev, list, flow);
2129         }
2130 }
2131
2132 /**
2133  * Create drop queue.
2134  *
2135  * @param dev
2136  *   Pointer to Ethernet device.
2137  *
2138  * @return
2139  *   0 on success, a negative errno value otherwise and rte_errno is set.
2140  */
2141 int
2142 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2143 {
2144         struct priv *priv = dev->data->dev_private;
2145         struct mlx5_hrxq_drop *fdq = NULL;
2146
2147         assert(priv->pd);
2148         assert(priv->ctx);
2149         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2150         if (!fdq) {
2151                 DRV_LOG(WARNING,
2152                         "port %u cannot allocate memory for drop queue",
2153                         dev->data->port_id);
2154                 rte_errno = ENOMEM;
2155                 return -rte_errno;
2156         }
2157         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2158         if (!fdq->cq) {
2159                 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2160                         dev->data->port_id);
2161                 rte_errno = errno;
2162                 goto error;
2163         }
2164         fdq->wq = ibv_create_wq(priv->ctx,
2165                         &(struct ibv_wq_init_attr){
2166                         .wq_type = IBV_WQT_RQ,
2167                         .max_wr = 1,
2168                         .max_sge = 1,
2169                         .pd = priv->pd,
2170                         .cq = fdq->cq,
2171                         });
2172         if (!fdq->wq) {
2173                 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2174                         dev->data->port_id);
2175                 rte_errno = errno;
2176                 goto error;
2177         }
2178         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2179                         &(struct ibv_rwq_ind_table_init_attr){
2180                         .log_ind_tbl_size = 0,
2181                         .ind_tbl = &fdq->wq,
2182                         .comp_mask = 0,
2183                         });
2184         if (!fdq->ind_table) {
2185                 DRV_LOG(WARNING,
2186                         "port %u cannot allocate indirection table for drop"
2187                         " queue",
2188                         dev->data->port_id);
2189                 rte_errno = errno;
2190                 goto error;
2191         }
2192         fdq->qp = ibv_create_qp_ex(priv->ctx,
2193                 &(struct ibv_qp_init_attr_ex){
2194                         .qp_type = IBV_QPT_RAW_PACKET,
2195                         .comp_mask =
2196                                 IBV_QP_INIT_ATTR_PD |
2197                                 IBV_QP_INIT_ATTR_IND_TABLE |
2198                                 IBV_QP_INIT_ATTR_RX_HASH,
2199                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2200                                 .rx_hash_function =
2201                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2202                                 .rx_hash_key_len = rss_hash_default_key_len,
2203                                 .rx_hash_key = rss_hash_default_key,
2204                                 .rx_hash_fields_mask = 0,
2205                                 },
2206                         .rwq_ind_tbl = fdq->ind_table,
2207                         .pd = priv->pd
2208                 });
2209         if (!fdq->qp) {
2210                 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2211                         dev->data->port_id);
2212                 rte_errno = errno;
2213                 goto error;
2214         }
2215         priv->flow_drop_queue = fdq;
2216         return 0;
2217 error:
2218         if (fdq->qp)
2219                 claim_zero(ibv_destroy_qp(fdq->qp));
2220         if (fdq->ind_table)
2221                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2222         if (fdq->wq)
2223                 claim_zero(ibv_destroy_wq(fdq->wq));
2224         if (fdq->cq)
2225                 claim_zero(ibv_destroy_cq(fdq->cq));
2226         if (fdq)
2227                 rte_free(fdq);
2228         priv->flow_drop_queue = NULL;
2229         return -rte_errno;
2230 }
2231
2232 /**
2233  * Delete drop queue.
2234  *
2235  * @param dev
2236  *   Pointer to Ethernet device.
2237  */
2238 void
2239 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2240 {
2241         struct priv *priv = dev->data->dev_private;
2242         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2243
2244         if (!fdq)
2245                 return;
2246         if (fdq->qp)
2247                 claim_zero(ibv_destroy_qp(fdq->qp));
2248         if (fdq->ind_table)
2249                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2250         if (fdq->wq)
2251                 claim_zero(ibv_destroy_wq(fdq->wq));
2252         if (fdq->cq)
2253                 claim_zero(ibv_destroy_cq(fdq->cq));
2254         rte_free(fdq);
2255         priv->flow_drop_queue = NULL;
2256 }
2257
2258 /**
2259  * Remove all flows.
2260  *
2261  * @param dev
2262  *   Pointer to Ethernet device.
2263  * @param list
2264  *   Pointer to a TAILQ flow list.
2265  */
2266 void
2267 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2268 {
2269         struct priv *priv = dev->data->dev_private;
2270         struct rte_flow *flow;
2271
2272         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2273                 unsigned int i;
2274                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2275
2276                 if (flow->drop) {
2277                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2278                                 continue;
2279                         claim_zero(ibv_destroy_flow
2280                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2281                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2282                         DRV_LOG(DEBUG, "port %u flow %p removed",
2283                                 dev->data->port_id, (void *)flow);
2284                         /* Next flow. */
2285                         continue;
2286                 }
2287                 /* Verify the flow has not already been cleaned. */
2288                 for (i = 0; i != hash_rxq_init_n; ++i) {
2289                         if (!flow->frxq[i].ibv_flow)
2290                                 continue;
2291                         /*
2292                          * Indirection table may be necessary to remove the
2293                          * flags in the Rx queues.
2294                          * This helps to speed-up the process by avoiding
2295                          * another loop.
2296                          */
2297                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2298                         break;
2299                 }
2300                 if (i == hash_rxq_init_n)
2301                         return;
2302                 if (flow->mark) {
2303                         assert(ind_tbl);
2304                         for (i = 0; i != ind_tbl->queues_n; ++i)
2305                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2306                 }
2307                 for (i = 0; i != hash_rxq_init_n; ++i) {
2308                         if (!flow->frxq[i].ibv_flow)
2309                                 continue;
2310                         claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2311                         flow->frxq[i].ibv_flow = NULL;
2312                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2313                         flow->frxq[i].hrxq = NULL;
2314                 }
2315                 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2316                         (void *)flow);
2317         }
2318 }
2319
2320 /**
2321  * Add all flows.
2322  *
2323  * @param dev
2324  *   Pointer to Ethernet device.
2325  * @param list
2326  *   Pointer to a TAILQ flow list.
2327  *
2328  * @return
2329  *   0 on success, a negative errno value otherwise and rte_errno is set.
2330  */
2331 int
2332 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2333 {
2334         struct priv *priv = dev->data->dev_private;
2335         struct rte_flow *flow;
2336
2337         TAILQ_FOREACH(flow, list, next) {
2338                 unsigned int i;
2339
2340                 if (flow->drop) {
2341                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2342                                 ibv_create_flow
2343                                 (priv->flow_drop_queue->qp,
2344                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2345                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2346                                 DRV_LOG(DEBUG,
2347                                         "port %u flow %p cannot be applied",
2348                                         dev->data->port_id, (void *)flow);
2349                                 rte_errno = EINVAL;
2350                                 return -rte_errno;
2351                         }
2352                         DRV_LOG(DEBUG, "port %u flow %p applied",
2353                                 dev->data->port_id, (void *)flow);
2354                         /* Next flow. */
2355                         continue;
2356                 }
2357                 for (i = 0; i != hash_rxq_init_n; ++i) {
2358                         if (!flow->frxq[i].ibv_attr)
2359                                 continue;
2360                         flow->frxq[i].hrxq =
2361                                 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2362                                               flow->rss_conf.rss_key_len,
2363                                               hash_rxq_init[i].hash_fields,
2364                                               (*flow->queues),
2365                                               flow->queues_n);
2366                         if (flow->frxq[i].hrxq)
2367                                 goto flow_create;
2368                         flow->frxq[i].hrxq =
2369                                 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2370                                               flow->rss_conf.rss_key_len,
2371                                               hash_rxq_init[i].hash_fields,
2372                                               (*flow->queues),
2373                                               flow->queues_n);
2374                         if (!flow->frxq[i].hrxq) {
2375                                 DRV_LOG(DEBUG,
2376                                         "port %u flow %p cannot be applied",
2377                                         dev->data->port_id, (void *)flow);
2378                                 rte_errno = EINVAL;
2379                                 return -rte_errno;
2380                         }
2381 flow_create:
2382                         flow->frxq[i].ibv_flow =
2383                                 ibv_create_flow(flow->frxq[i].hrxq->qp,
2384                                                 flow->frxq[i].ibv_attr);
2385                         if (!flow->frxq[i].ibv_flow) {
2386                                 DRV_LOG(DEBUG,
2387                                         "port %u flow %p cannot be applied",
2388                                         dev->data->port_id, (void *)flow);
2389                                 rte_errno = EINVAL;
2390                                 return -rte_errno;
2391                         }
2392                         DRV_LOG(DEBUG, "port %u flow %p applied",
2393                                 dev->data->port_id, (void *)flow);
2394                 }
2395                 if (!flow->mark)
2396                         continue;
2397                 for (i = 0; i != flow->queues_n; ++i)
2398                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2399         }
2400         return 0;
2401 }
2402
2403 /**
2404  * Verify the flow list is empty
2405  *
2406  * @param dev
2407  *  Pointer to Ethernet device.
2408  *
2409  * @return the number of flows not released.
2410  */
2411 int
2412 mlx5_flow_verify(struct rte_eth_dev *dev)
2413 {
2414         struct priv *priv = dev->data->dev_private;
2415         struct rte_flow *flow;
2416         int ret = 0;
2417
2418         TAILQ_FOREACH(flow, &priv->flows, next) {
2419                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2420                         dev->data->port_id, (void *)flow);
2421                 ++ret;
2422         }
2423         return ret;
2424 }
2425
2426 /**
2427  * Enable a control flow configured from the control plane.
2428  *
2429  * @param dev
2430  *   Pointer to Ethernet device.
2431  * @param eth_spec
2432  *   An Ethernet flow spec to apply.
2433  * @param eth_mask
2434  *   An Ethernet flow mask to apply.
2435  * @param vlan_spec
2436  *   A VLAN flow spec to apply.
2437  * @param vlan_mask
2438  *   A VLAN flow mask to apply.
2439  *
2440  * @return
2441  *   0 on success, a negative errno value otherwise and rte_errno is set.
2442  */
2443 int
2444 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2445                     struct rte_flow_item_eth *eth_spec,
2446                     struct rte_flow_item_eth *eth_mask,
2447                     struct rte_flow_item_vlan *vlan_spec,
2448                     struct rte_flow_item_vlan *vlan_mask)
2449 {
2450         struct priv *priv = dev->data->dev_private;
2451         const struct rte_flow_attr attr = {
2452                 .ingress = 1,
2453                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2454         };
2455         struct rte_flow_item items[] = {
2456                 {
2457                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2458                         .spec = eth_spec,
2459                         .last = NULL,
2460                         .mask = eth_mask,
2461                 },
2462                 {
2463                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2464                                 RTE_FLOW_ITEM_TYPE_END,
2465                         .spec = vlan_spec,
2466                         .last = NULL,
2467                         .mask = vlan_mask,
2468                 },
2469                 {
2470                         .type = RTE_FLOW_ITEM_TYPE_END,
2471                 },
2472         };
2473         struct rte_flow_action actions[] = {
2474                 {
2475                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2476                 },
2477                 {
2478                         .type = RTE_FLOW_ACTION_TYPE_END,
2479                 },
2480         };
2481         struct rte_flow *flow;
2482         struct rte_flow_error error;
2483         unsigned int i;
2484         union {
2485                 struct rte_flow_action_rss rss;
2486                 struct {
2487                         const struct rte_eth_rss_conf *rss_conf;
2488                         uint16_t num;
2489                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2490                 } local;
2491         } action_rss;
2492
2493         if (!priv->reta_idx_n) {
2494                 rte_errno = EINVAL;
2495                 return -rte_errno;
2496         }
2497         for (i = 0; i != priv->reta_idx_n; ++i)
2498                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2499         action_rss.local.rss_conf = &priv->rss_conf;
2500         action_rss.local.num = priv->reta_idx_n;
2501         actions[0].conf = (const void *)&action_rss.rss;
2502         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2503                                      actions, &error);
2504         if (!flow)
2505                 return -rte_errno;
2506         return 0;
2507 }
2508
2509 /**
2510  * Enable a flow control configured from the control plane.
2511  *
2512  * @param dev
2513  *   Pointer to Ethernet device.
2514  * @param eth_spec
2515  *   An Ethernet flow spec to apply.
2516  * @param eth_mask
2517  *   An Ethernet flow mask to apply.
2518  *
2519  * @return
2520  *   0 on success, a negative errno value otherwise and rte_errno is set.
2521  */
2522 int
2523 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2524                struct rte_flow_item_eth *eth_spec,
2525                struct rte_flow_item_eth *eth_mask)
2526 {
2527         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2528 }
2529
2530 /**
2531  * Destroy a flow.
2532  *
2533  * @see rte_flow_destroy()
2534  * @see rte_flow_ops
2535  */
2536 int
2537 mlx5_flow_destroy(struct rte_eth_dev *dev,
2538                   struct rte_flow *flow,
2539                   struct rte_flow_error *error __rte_unused)
2540 {
2541         struct priv *priv = dev->data->dev_private;
2542
2543         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2544         return 0;
2545 }
2546
2547 /**
2548  * Destroy all flows.
2549  *
2550  * @see rte_flow_flush()
2551  * @see rte_flow_ops
2552  */
2553 int
2554 mlx5_flow_flush(struct rte_eth_dev *dev,
2555                 struct rte_flow_error *error __rte_unused)
2556 {
2557         struct priv *priv = dev->data->dev_private;
2558
2559         mlx5_flow_list_flush(dev, &priv->flows);
2560         return 0;
2561 }
2562
2563 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2564 /**
2565  * Query flow counter.
2566  *
2567  * @param cs
2568  *   the counter set.
2569  * @param counter_value
2570  *   returned data from the counter.
2571  *
2572  * @return
2573  *   0 on success, a negative errno value otherwise and rte_errno is set.
2574  */
2575 static int
2576 mlx5_flow_query_count(struct ibv_counter_set *cs,
2577                       struct mlx5_flow_counter_stats *counter_stats,
2578                       struct rte_flow_query_count *query_count,
2579                       struct rte_flow_error *error)
2580 {
2581         uint64_t counters[2];
2582         struct ibv_query_counter_set_attr query_cs_attr = {
2583                 .cs = cs,
2584                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2585         };
2586         struct ibv_counter_set_data query_out = {
2587                 .out = counters,
2588                 .outlen = 2 * sizeof(uint64_t),
2589         };
2590         int err = ibv_query_counter_set(&query_cs_attr, &query_out);
2591
2592         if (err)
2593                 return rte_flow_error_set(error, err,
2594                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2595                                           NULL,
2596                                           "cannot read counter");
2597         query_count->hits_set = 1;
2598         query_count->bytes_set = 1;
2599         query_count->hits = counters[0] - counter_stats->hits;
2600         query_count->bytes = counters[1] - counter_stats->bytes;
2601         if (query_count->reset) {
2602                 counter_stats->hits = counters[0];
2603                 counter_stats->bytes = counters[1];
2604         }
2605         return 0;
2606 }
2607
2608 /**
2609  * Query a flows.
2610  *
2611  * @see rte_flow_query()
2612  * @see rte_flow_ops
2613  */
2614 int
2615 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2616                 struct rte_flow *flow,
2617                 enum rte_flow_action_type action __rte_unused,
2618                 void *data,
2619                 struct rte_flow_error *error)
2620 {
2621         if (flow->cs) {
2622                 int ret;
2623
2624                 ret = mlx5_flow_query_count(flow->cs,
2625                                             &flow->counter_stats,
2626                                             (struct rte_flow_query_count *)data,
2627                                             error);
2628                 if (ret)
2629                         return ret;
2630         } else {
2631                 return rte_flow_error_set(error, EINVAL,
2632                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2633                                           NULL,
2634                                           "no counter found for flow");
2635         }
2636         return 0;
2637 }
2638 #endif
2639
2640 /**
2641  * Isolated mode.
2642  *
2643  * @see rte_flow_isolate()
2644  * @see rte_flow_ops
2645  */
2646 int
2647 mlx5_flow_isolate(struct rte_eth_dev *dev,
2648                   int enable,
2649                   struct rte_flow_error *error)
2650 {
2651         struct priv *priv = dev->data->dev_private;
2652
2653         if (dev->data->dev_started) {
2654                 rte_flow_error_set(error, EBUSY,
2655                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2656                                    NULL,
2657                                    "port must be stopped first");
2658                 return -rte_errno;
2659         }
2660         priv->isolated = !!enable;
2661         if (enable)
2662                 dev->dev_ops = &mlx5_dev_ops_isolate;
2663         else
2664                 dev->dev_ops = &mlx5_dev_ops;
2665         return 0;
2666 }
2667
2668 /**
2669  * Convert a flow director filter to a generic flow.
2670  *
2671  * @param dev
2672  *   Pointer to Ethernet device.
2673  * @param fdir_filter
2674  *   Flow director filter to add.
2675  * @param attributes
2676  *   Generic flow parameters structure.
2677  *
2678  * @return
2679  *   0 on success, a negative errno value otherwise and rte_errno is set.
2680  */
2681 static int
2682 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2683                          const struct rte_eth_fdir_filter *fdir_filter,
2684                          struct mlx5_fdir *attributes)
2685 {
2686         struct priv *priv = dev->data->dev_private;
2687         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2688         const struct rte_eth_fdir_masks *mask =
2689                 &dev->data->dev_conf.fdir_conf.mask;
2690
2691         /* Validate queue number. */
2692         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2693                 DRV_LOG(ERR, "port %u invalid queue number %d",
2694                         dev->data->port_id, fdir_filter->action.rx_queue);
2695                 rte_errno = EINVAL;
2696                 return -rte_errno;
2697         }
2698         attributes->attr.ingress = 1;
2699         attributes->items[0] = (struct rte_flow_item) {
2700                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2701                 .spec = &attributes->l2,
2702                 .mask = &attributes->l2_mask,
2703         };
2704         switch (fdir_filter->action.behavior) {
2705         case RTE_ETH_FDIR_ACCEPT:
2706                 attributes->actions[0] = (struct rte_flow_action){
2707                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2708                         .conf = &attributes->queue,
2709                 };
2710                 break;
2711         case RTE_ETH_FDIR_REJECT:
2712                 attributes->actions[0] = (struct rte_flow_action){
2713                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2714                 };
2715                 break;
2716         default:
2717                 DRV_LOG(ERR, "port %u invalid behavior %d",
2718                         dev->data->port_id,
2719                         fdir_filter->action.behavior);
2720                 rte_errno = ENOTSUP;
2721                 return -rte_errno;
2722         }
2723         attributes->queue.index = fdir_filter->action.rx_queue;
2724         /* Handle L3. */
2725         switch (fdir_filter->input.flow_type) {
2726         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2727         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2728         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2729                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2730                         .src_addr = input->flow.ip4_flow.src_ip,
2731                         .dst_addr = input->flow.ip4_flow.dst_ip,
2732                         .time_to_live = input->flow.ip4_flow.ttl,
2733                         .type_of_service = input->flow.ip4_flow.tos,
2734                         .next_proto_id = input->flow.ip4_flow.proto,
2735                 };
2736                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2737                         .src_addr = mask->ipv4_mask.src_ip,
2738                         .dst_addr = mask->ipv4_mask.dst_ip,
2739                         .time_to_live = mask->ipv4_mask.ttl,
2740                         .type_of_service = mask->ipv4_mask.tos,
2741                         .next_proto_id = mask->ipv4_mask.proto,
2742                 };
2743                 attributes->items[1] = (struct rte_flow_item){
2744                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2745                         .spec = &attributes->l3,
2746                         .mask = &attributes->l3_mask,
2747                 };
2748                 break;
2749         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2750         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2751         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2752                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2753                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2754                         .proto = input->flow.ipv6_flow.proto,
2755                 };
2756
2757                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2758                        input->flow.ipv6_flow.src_ip,
2759                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2760                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2761                        input->flow.ipv6_flow.dst_ip,
2762                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2763                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2764                        mask->ipv6_mask.src_ip,
2765                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2766                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2767                        mask->ipv6_mask.dst_ip,
2768                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2769                 attributes->items[1] = (struct rte_flow_item){
2770                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2771                         .spec = &attributes->l3,
2772                         .mask = &attributes->l3_mask,
2773                 };
2774                 break;
2775         default:
2776                 DRV_LOG(ERR, "port %u invalid flow type%d",
2777                         dev->data->port_id, fdir_filter->input.flow_type);
2778                 rte_errno = ENOTSUP;
2779                 return -rte_errno;
2780         }
2781         /* Handle L4. */
2782         switch (fdir_filter->input.flow_type) {
2783         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2784                 attributes->l4.udp.hdr = (struct udp_hdr){
2785                         .src_port = input->flow.udp4_flow.src_port,
2786                         .dst_port = input->flow.udp4_flow.dst_port,
2787                 };
2788                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2789                         .src_port = mask->src_port_mask,
2790                         .dst_port = mask->dst_port_mask,
2791                 };
2792                 attributes->items[2] = (struct rte_flow_item){
2793                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2794                         .spec = &attributes->l4,
2795                         .mask = &attributes->l4_mask,
2796                 };
2797                 break;
2798         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2799                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2800                         .src_port = input->flow.tcp4_flow.src_port,
2801                         .dst_port = input->flow.tcp4_flow.dst_port,
2802                 };
2803                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2804                         .src_port = mask->src_port_mask,
2805                         .dst_port = mask->dst_port_mask,
2806                 };
2807                 attributes->items[2] = (struct rte_flow_item){
2808                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2809                         .spec = &attributes->l4,
2810                         .mask = &attributes->l4_mask,
2811                 };
2812                 break;
2813         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2814                 attributes->l4.udp.hdr = (struct udp_hdr){
2815                         .src_port = input->flow.udp6_flow.src_port,
2816                         .dst_port = input->flow.udp6_flow.dst_port,
2817                 };
2818                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2819                         .src_port = mask->src_port_mask,
2820                         .dst_port = mask->dst_port_mask,
2821                 };
2822                 attributes->items[2] = (struct rte_flow_item){
2823                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2824                         .spec = &attributes->l4,
2825                         .mask = &attributes->l4_mask,
2826                 };
2827                 break;
2828         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2829                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2830                         .src_port = input->flow.tcp6_flow.src_port,
2831                         .dst_port = input->flow.tcp6_flow.dst_port,
2832                 };
2833                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2834                         .src_port = mask->src_port_mask,
2835                         .dst_port = mask->dst_port_mask,
2836                 };
2837                 attributes->items[2] = (struct rte_flow_item){
2838                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2839                         .spec = &attributes->l4,
2840                         .mask = &attributes->l4_mask,
2841                 };
2842                 break;
2843         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2844         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2845                 break;
2846         default:
2847                 DRV_LOG(ERR, "port %u invalid flow type%d",
2848                         dev->data->port_id, fdir_filter->input.flow_type);
2849                 rte_errno = ENOTSUP;
2850                 return -rte_errno;
2851         }
2852         return 0;
2853 }
2854
2855 /**
2856  * Add new flow director filter and store it in list.
2857  *
2858  * @param dev
2859  *   Pointer to Ethernet device.
2860  * @param fdir_filter
2861  *   Flow director filter to add.
2862  *
2863  * @return
2864  *   0 on success, a negative errno value otherwise and rte_errno is set.
2865  */
2866 static int
2867 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2868                      const struct rte_eth_fdir_filter *fdir_filter)
2869 {
2870         struct priv *priv = dev->data->dev_private;
2871         struct mlx5_fdir attributes = {
2872                 .attr.group = 0,
2873                 .l2_mask = {
2874                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2875                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2876                         .type = 0,
2877                 },
2878         };
2879         struct mlx5_flow_parse parser = {
2880                 .layer = HASH_RXQ_ETH,
2881         };
2882         struct rte_flow_error error;
2883         struct rte_flow *flow;
2884         int ret;
2885
2886         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2887         if (ret)
2888                 return ret;
2889         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2890                                 attributes.actions, &error, &parser);
2891         if (ret)
2892                 return ret;
2893         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2894                                      attributes.items, attributes.actions,
2895                                      &error);
2896         if (flow) {
2897                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2898                         (void *)flow);
2899                 return 0;
2900         }
2901         return -rte_errno;
2902 }
2903
2904 /**
2905  * Delete specific filter.
2906  *
2907  * @param dev
2908  *   Pointer to Ethernet device.
2909  * @param fdir_filter
2910  *   Filter to be deleted.
2911  *
2912  * @return
2913  *   0 on success, a negative errno value otherwise and rte_errno is set.
2914  */
2915 static int
2916 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2917                         const struct rte_eth_fdir_filter *fdir_filter)
2918 {
2919         struct priv *priv = dev->data->dev_private;
2920         struct mlx5_fdir attributes = {
2921                 .attr.group = 0,
2922         };
2923         struct mlx5_flow_parse parser = {
2924                 .create = 1,
2925                 .layer = HASH_RXQ_ETH,
2926         };
2927         struct rte_flow_error error;
2928         struct rte_flow *flow;
2929         unsigned int i;
2930         int ret;
2931
2932         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2933         if (ret)
2934                 return ret;
2935         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2936                                 attributes.actions, &error, &parser);
2937         if (ret)
2938                 goto exit;
2939         /*
2940          * Special case for drop action which is only set in the
2941          * specifications when the flow is created.  In this situation the
2942          * drop specification is missing.
2943          */
2944         if (parser.drop) {
2945                 struct ibv_flow_spec_action_drop *drop;
2946
2947                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2948                                 parser.queue[HASH_RXQ_ETH].offset);
2949                 *drop = (struct ibv_flow_spec_action_drop){
2950                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2951                         .size = sizeof(struct ibv_flow_spec_action_drop),
2952                 };
2953                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2954         }
2955         TAILQ_FOREACH(flow, &priv->flows, next) {
2956                 struct ibv_flow_attr *attr;
2957                 struct ibv_spec_header *attr_h;
2958                 void *spec;
2959                 struct ibv_flow_attr *flow_attr;
2960                 struct ibv_spec_header *flow_h;
2961                 void *flow_spec;
2962                 unsigned int specs_n;
2963                 unsigned int queue_id;
2964
2965                 /*
2966                  * Search for a non-empty ibv_attr. There should be only one
2967                  * because no RSS action is allowed for FDIR. This should have
2968                  * been referenced directly by parser.layer but due to a bug in
2969                  * mlx5_flow_convert() as of v17.11.4, parser.layer isn't
2970                  * correct. This bug will have to be addressed later.
2971                  */
2972                 for (queue_id = 0; queue_id != hash_rxq_init_n; ++queue_id) {
2973                         attr = parser.queue[queue_id].ibv_attr;
2974                         if (attr)
2975                                 break;
2976                 }
2977                 assert(!parser.drop || queue_id == HASH_RXQ_ETH);
2978                 flow_attr = flow->frxq[queue_id].ibv_attr;
2979                 /* Compare first the attributes. */
2980                 if (!flow_attr ||
2981                     memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2982                         continue;
2983                 if (attr->num_of_specs == 0)
2984                         continue;
2985                 spec = (void *)((uintptr_t)attr +
2986                                 sizeof(struct ibv_flow_attr));
2987                 flow_spec = (void *)((uintptr_t)flow_attr +
2988                                      sizeof(struct ibv_flow_attr));
2989                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2990                 for (i = 0; i != specs_n; ++i) {
2991                         attr_h = spec;
2992                         flow_h = flow_spec;
2993                         if (memcmp(spec, flow_spec,
2994                                    RTE_MIN(attr_h->size, flow_h->size)))
2995                                 goto wrong_flow;
2996                         spec = (void *)((uintptr_t)spec + attr_h->size);
2997                         flow_spec = (void *)((uintptr_t)flow_spec +
2998                                              flow_h->size);
2999                 }
3000                 /* At this point, the flow match. */
3001                 break;
3002 wrong_flow:
3003                 /* The flow does not match. */
3004                 continue;
3005         }
3006         if (flow)
3007                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3008 exit:
3009         if (ret)
3010                 ret = rte_errno; /* Save rte_errno before cleanup. */
3011         for (i = 0; i != hash_rxq_init_n; ++i) {
3012                 if (parser.queue[i].ibv_attr)
3013                         rte_free(parser.queue[i].ibv_attr);
3014         }
3015         if (ret) {
3016                 rte_errno = ret; /* Restore rte_errno. */
3017                 return -rte_errno;
3018         }
3019         return 0;
3020 }
3021
3022 /**
3023  * Update queue for specific filter.
3024  *
3025  * @param dev
3026  *   Pointer to Ethernet device.
3027  * @param fdir_filter
3028  *   Filter to be updated.
3029  *
3030  * @return
3031  *   0 on success, a negative errno value otherwise and rte_errno is set.
3032  */
3033 static int
3034 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3035                         const struct rte_eth_fdir_filter *fdir_filter)
3036 {
3037         int ret;
3038
3039         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3040         if (ret)
3041                 return ret;
3042         return mlx5_fdir_filter_add(dev, fdir_filter);
3043 }
3044
3045 /**
3046  * Flush all filters.
3047  *
3048  * @param dev
3049  *   Pointer to Ethernet device.
3050  */
3051 static void
3052 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3053 {
3054         struct priv *priv = dev->data->dev_private;
3055
3056         mlx5_flow_list_flush(dev, &priv->flows);
3057 }
3058
3059 /**
3060  * Get flow director information.
3061  *
3062  * @param dev
3063  *   Pointer to Ethernet device.
3064  * @param[out] fdir_info
3065  *   Resulting flow director information.
3066  */
3067 static void
3068 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3069 {
3070         struct rte_eth_fdir_masks *mask =
3071                 &dev->data->dev_conf.fdir_conf.mask;
3072
3073         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3074         fdir_info->guarant_spc = 0;
3075         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3076         fdir_info->max_flexpayload = 0;
3077         fdir_info->flow_types_mask[0] = 0;
3078         fdir_info->flex_payload_unit = 0;
3079         fdir_info->max_flex_payload_segment_num = 0;
3080         fdir_info->flex_payload_limit = 0;
3081         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3082 }
3083
3084 /**
3085  * Deal with flow director operations.
3086  *
3087  * @param dev
3088  *   Pointer to Ethernet device.
3089  * @param filter_op
3090  *   Operation to perform.
3091  * @param arg
3092  *   Pointer to operation-specific structure.
3093  *
3094  * @return
3095  *   0 on success, a negative errno value otherwise and rte_errno is set.
3096  */
3097 static int
3098 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3099                     void *arg)
3100 {
3101         enum rte_fdir_mode fdir_mode =
3102                 dev->data->dev_conf.fdir_conf.mode;
3103
3104         if (filter_op == RTE_ETH_FILTER_NOP)
3105                 return 0;
3106         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3107             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3108                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3109                         dev->data->port_id, fdir_mode);
3110                 rte_errno = EINVAL;
3111                 return -rte_errno;
3112         }
3113         switch (filter_op) {
3114         case RTE_ETH_FILTER_ADD:
3115                 return mlx5_fdir_filter_add(dev, arg);
3116         case RTE_ETH_FILTER_UPDATE:
3117                 return mlx5_fdir_filter_update(dev, arg);
3118         case RTE_ETH_FILTER_DELETE:
3119                 return mlx5_fdir_filter_delete(dev, arg);
3120         case RTE_ETH_FILTER_FLUSH:
3121                 mlx5_fdir_filter_flush(dev);
3122                 break;
3123         case RTE_ETH_FILTER_INFO:
3124                 mlx5_fdir_info_get(dev, arg);
3125                 break;
3126         default:
3127                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3128                         dev->data->port_id, filter_op);
3129                 rte_errno = EINVAL;
3130                 return -rte_errno;
3131         }
3132         return 0;
3133 }
3134
3135 /**
3136  * Manage filter operations.
3137  *
3138  * @param dev
3139  *   Pointer to Ethernet device structure.
3140  * @param filter_type
3141  *   Filter type.
3142  * @param filter_op
3143  *   Operation to perform.
3144  * @param arg
3145  *   Pointer to operation-specific structure.
3146  *
3147  * @return
3148  *   0 on success, a negative errno value otherwise and rte_errno is set.
3149  */
3150 int
3151 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3152                      enum rte_filter_type filter_type,
3153                      enum rte_filter_op filter_op,
3154                      void *arg)
3155 {
3156         switch (filter_type) {
3157         case RTE_ETH_FILTER_GENERIC:
3158                 if (filter_op != RTE_ETH_FILTER_GET) {
3159                         rte_errno = EINVAL;
3160                         return -rte_errno;
3161                 }
3162                 *(const void **)arg = &mlx5_flow_ops;
3163                 return 0;
3164         case RTE_ETH_FILTER_FDIR:
3165                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3166         default:
3167                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3168                         dev->data->port_id, filter_type);
3169                 rte_errno = ENOTSUP;
3170                 return -rte_errno;
3171         }
3172         return 0;
3173 }