New upstream version 17.11.1
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_defs.h"
54 #include "mlx5_prm.h"
55
56 /* Define minimal priority for control plane flows. */
57 #define MLX5_CTRL_FLOW_PRIORITY 4
58
59 /* Internet Protocol versions. */
60 #define MLX5_IPV4 4
61 #define MLX5_IPV6 6
62
63 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64 struct ibv_counter_set_init_attr {
65         int dummy;
66 };
67 struct ibv_flow_spec_counter_action {
68         int dummy;
69 };
70 struct ibv_counter_set {
71         int dummy;
72 };
73
74 static inline int
75 ibv_destroy_counter_set(struct ibv_counter_set *cs)
76 {
77         (void)cs;
78         return -ENOTSUP;
79 }
80 #endif
81
82 /* Dev ops structure defined in mlx5.c */
83 extern const struct eth_dev_ops mlx5_dev_ops;
84 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
85
86 static int
87 mlx5_flow_create_eth(const struct rte_flow_item *item,
88                      const void *default_mask,
89                      void *data);
90
91 static int
92 mlx5_flow_create_vlan(const struct rte_flow_item *item,
93                       const void *default_mask,
94                       void *data);
95
96 static int
97 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
98                       const void *default_mask,
99                       void *data);
100
101 static int
102 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
103                       const void *default_mask,
104                       void *data);
105
106 static int
107 mlx5_flow_create_udp(const struct rte_flow_item *item,
108                      const void *default_mask,
109                      void *data);
110
111 static int
112 mlx5_flow_create_tcp(const struct rte_flow_item *item,
113                      const void *default_mask,
114                      void *data);
115
116 static int
117 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
118                        const void *default_mask,
119                        void *data);
120
121 struct mlx5_flow_parse;
122
123 static void
124 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
125                       unsigned int size);
126
127 static int
128 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
129
130 static int
131 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
132
133 /* Hash RX queue types. */
134 enum hash_rxq_type {
135         HASH_RXQ_TCPV4,
136         HASH_RXQ_UDPV4,
137         HASH_RXQ_IPV4,
138         HASH_RXQ_TCPV6,
139         HASH_RXQ_UDPV6,
140         HASH_RXQ_IPV6,
141         HASH_RXQ_ETH,
142 };
143
144 /* Initialization data for hash RX queue. */
145 struct hash_rxq_init {
146         uint64_t hash_fields; /* Fields that participate in the hash. */
147         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
148         unsigned int flow_priority; /* Flow priority to use. */
149         unsigned int ip_version; /* Internet protocol. */
150 };
151
152 /* Initialization data for hash RX queues. */
153 const struct hash_rxq_init hash_rxq_init[] = {
154         [HASH_RXQ_TCPV4] = {
155                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
156                                 IBV_RX_HASH_DST_IPV4 |
157                                 IBV_RX_HASH_SRC_PORT_TCP |
158                                 IBV_RX_HASH_DST_PORT_TCP),
159                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
160                 .flow_priority = 0,
161                 .ip_version = MLX5_IPV4,
162         },
163         [HASH_RXQ_UDPV4] = {
164                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
165                                 IBV_RX_HASH_DST_IPV4 |
166                                 IBV_RX_HASH_SRC_PORT_UDP |
167                                 IBV_RX_HASH_DST_PORT_UDP),
168                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
169                 .flow_priority = 0,
170                 .ip_version = MLX5_IPV4,
171         },
172         [HASH_RXQ_IPV4] = {
173                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
174                                 IBV_RX_HASH_DST_IPV4),
175                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
176                                 ETH_RSS_FRAG_IPV4),
177                 .flow_priority = 1,
178                 .ip_version = MLX5_IPV4,
179         },
180         [HASH_RXQ_TCPV6] = {
181                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
182                                 IBV_RX_HASH_DST_IPV6 |
183                                 IBV_RX_HASH_SRC_PORT_TCP |
184                                 IBV_RX_HASH_DST_PORT_TCP),
185                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
186                 .flow_priority = 0,
187                 .ip_version = MLX5_IPV6,
188         },
189         [HASH_RXQ_UDPV6] = {
190                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
191                                 IBV_RX_HASH_DST_IPV6 |
192                                 IBV_RX_HASH_SRC_PORT_UDP |
193                                 IBV_RX_HASH_DST_PORT_UDP),
194                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
195                 .flow_priority = 0,
196                 .ip_version = MLX5_IPV6,
197         },
198         [HASH_RXQ_IPV6] = {
199                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
200                                 IBV_RX_HASH_DST_IPV6),
201                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
202                                 ETH_RSS_FRAG_IPV6),
203                 .flow_priority = 1,
204                 .ip_version = MLX5_IPV6,
205         },
206         [HASH_RXQ_ETH] = {
207                 .hash_fields = 0,
208                 .dpdk_rss_hf = 0,
209                 .flow_priority = 2,
210         },
211 };
212
213 /* Number of entries in hash_rxq_init[]. */
214 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
215
216 /** Structure for holding counter stats. */
217 struct mlx5_flow_counter_stats {
218         uint64_t hits; /**< Number of packets matched by the rule. */
219         uint64_t bytes; /**< Number of bytes matched by the rule. */
220 };
221
222 /** Structure for Drop queue. */
223 struct mlx5_hrxq_drop {
224         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
225         struct ibv_qp *qp; /**< Verbs queue pair. */
226         struct ibv_wq *wq; /**< Verbs work queue. */
227         struct ibv_cq *cq; /**< Verbs completion queue. */
228 };
229
230 /* Flows structures. */
231 struct mlx5_flow {
232         uint64_t hash_fields; /**< Fields that participate in the hash. */
233         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
234         struct ibv_flow *ibv_flow; /**< Verbs flow. */
235         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
236 };
237
238 /* Drop flows structures. */
239 struct mlx5_flow_drop {
240         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
241         struct ibv_flow *ibv_flow; /**< Verbs flow. */
242 };
243
244 struct rte_flow {
245         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
246         uint32_t mark:1; /**< Set if the flow is marked. */
247         uint32_t drop:1; /**< Drop queue. */
248         uint16_t queues_n; /**< Number of entries in queue[]. */
249         uint16_t (*queues)[]; /**< Queues indexes to use. */
250         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
251         uint8_t rss_key[40]; /**< copy of the RSS key. */
252         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
253         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
254         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
255         /**< Flow with Rx queue. */
256 };
257
258 /** Static initializer for items. */
259 #define ITEMS(...) \
260         (const enum rte_flow_item_type []){ \
261                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
262         }
263
264 /** Structure to generate a simple graph of layers supported by the NIC. */
265 struct mlx5_flow_items {
266         /** List of possible actions for these items. */
267         const enum rte_flow_action_type *const actions;
268         /** Bit-masks corresponding to the possibilities for the item. */
269         const void *mask;
270         /**
271          * Default bit-masks to use when item->mask is not provided. When
272          * \default_mask is also NULL, the full supported bit-mask (\mask) is
273          * used instead.
274          */
275         const void *default_mask;
276         /** Bit-masks size in bytes. */
277         const unsigned int mask_sz;
278         /**
279          * Conversion function from rte_flow to NIC specific flow.
280          *
281          * @param item
282          *   rte_flow item to convert.
283          * @param default_mask
284          *   Default bit-masks to use when item->mask is not provided.
285          * @param data
286          *   Internal structure to store the conversion.
287          *
288          * @return
289          *   0 on success, negative value otherwise.
290          */
291         int (*convert)(const struct rte_flow_item *item,
292                        const void *default_mask,
293                        void *data);
294         /** Size in bytes of the destination structure. */
295         const unsigned int dst_sz;
296         /** List of possible following items.  */
297         const enum rte_flow_item_type *const items;
298 };
299
300 /** Valid action for this PMD. */
301 static const enum rte_flow_action_type valid_actions[] = {
302         RTE_FLOW_ACTION_TYPE_DROP,
303         RTE_FLOW_ACTION_TYPE_QUEUE,
304         RTE_FLOW_ACTION_TYPE_MARK,
305         RTE_FLOW_ACTION_TYPE_FLAG,
306 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
307         RTE_FLOW_ACTION_TYPE_COUNT,
308 #endif
309         RTE_FLOW_ACTION_TYPE_END,
310 };
311
312 /** Graph of supported items and associated actions. */
313 static const struct mlx5_flow_items mlx5_flow_items[] = {
314         [RTE_FLOW_ITEM_TYPE_END] = {
315                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
316                                RTE_FLOW_ITEM_TYPE_VXLAN),
317         },
318         [RTE_FLOW_ITEM_TYPE_ETH] = {
319                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
320                                RTE_FLOW_ITEM_TYPE_IPV4,
321                                RTE_FLOW_ITEM_TYPE_IPV6),
322                 .actions = valid_actions,
323                 .mask = &(const struct rte_flow_item_eth){
324                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326                         .type = -1,
327                 },
328                 .default_mask = &rte_flow_item_eth_mask,
329                 .mask_sz = sizeof(struct rte_flow_item_eth),
330                 .convert = mlx5_flow_create_eth,
331                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
332         },
333         [RTE_FLOW_ITEM_TYPE_VLAN] = {
334                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
335                                RTE_FLOW_ITEM_TYPE_IPV6),
336                 .actions = valid_actions,
337                 .mask = &(const struct rte_flow_item_vlan){
338                         .tci = -1,
339                 },
340                 .default_mask = &rte_flow_item_vlan_mask,
341                 .mask_sz = sizeof(struct rte_flow_item_vlan),
342                 .convert = mlx5_flow_create_vlan,
343                 .dst_sz = 0,
344         },
345         [RTE_FLOW_ITEM_TYPE_IPV4] = {
346                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
347                                RTE_FLOW_ITEM_TYPE_TCP),
348                 .actions = valid_actions,
349                 .mask = &(const struct rte_flow_item_ipv4){
350                         .hdr = {
351                                 .src_addr = -1,
352                                 .dst_addr = -1,
353                                 .type_of_service = -1,
354                                 .next_proto_id = -1,
355                         },
356                 },
357                 .default_mask = &rte_flow_item_ipv4_mask,
358                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359                 .convert = mlx5_flow_create_ipv4,
360                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
361         },
362         [RTE_FLOW_ITEM_TYPE_IPV6] = {
363                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364                                RTE_FLOW_ITEM_TYPE_TCP),
365                 .actions = valid_actions,
366                 .mask = &(const struct rte_flow_item_ipv6){
367                         .hdr = {
368                                 .src_addr = {
369                                         0xff, 0xff, 0xff, 0xff,
370                                         0xff, 0xff, 0xff, 0xff,
371                                         0xff, 0xff, 0xff, 0xff,
372                                         0xff, 0xff, 0xff, 0xff,
373                                 },
374                                 .dst_addr = {
375                                         0xff, 0xff, 0xff, 0xff,
376                                         0xff, 0xff, 0xff, 0xff,
377                                         0xff, 0xff, 0xff, 0xff,
378                                         0xff, 0xff, 0xff, 0xff,
379                                 },
380                                 .vtc_flow = -1,
381                                 .proto = -1,
382                                 .hop_limits = -1,
383                         },
384                 },
385                 .default_mask = &rte_flow_item_ipv6_mask,
386                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
387                 .convert = mlx5_flow_create_ipv6,
388                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
389         },
390         [RTE_FLOW_ITEM_TYPE_UDP] = {
391                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
392                 .actions = valid_actions,
393                 .mask = &(const struct rte_flow_item_udp){
394                         .hdr = {
395                                 .src_port = -1,
396                                 .dst_port = -1,
397                         },
398                 },
399                 .default_mask = &rte_flow_item_udp_mask,
400                 .mask_sz = sizeof(struct rte_flow_item_udp),
401                 .convert = mlx5_flow_create_udp,
402                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
403         },
404         [RTE_FLOW_ITEM_TYPE_TCP] = {
405                 .actions = valid_actions,
406                 .mask = &(const struct rte_flow_item_tcp){
407                         .hdr = {
408                                 .src_port = -1,
409                                 .dst_port = -1,
410                         },
411                 },
412                 .default_mask = &rte_flow_item_tcp_mask,
413                 .mask_sz = sizeof(struct rte_flow_item_tcp),
414                 .convert = mlx5_flow_create_tcp,
415                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
416         },
417         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
418                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
419                 .actions = valid_actions,
420                 .mask = &(const struct rte_flow_item_vxlan){
421                         .vni = "\xff\xff\xff",
422                 },
423                 .default_mask = &rte_flow_item_vxlan_mask,
424                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
425                 .convert = mlx5_flow_create_vxlan,
426                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
427         },
428 };
429
430 /** Structure to pass to the conversion function. */
431 struct mlx5_flow_parse {
432         uint32_t inner; /**< Set once VXLAN is encountered. */
433         uint32_t create:1;
434         /**< Whether resources should remain after a validate. */
435         uint32_t drop:1; /**< Target is a drop queue. */
436         uint32_t mark:1; /**< Mark is present in the flow. */
437         uint32_t count:1; /**< Count is present in the flow. */
438         uint32_t mark_id; /**< Mark identifier. */
439         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
440         uint16_t queues_n; /**< Number of entries in queue[]. */
441         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
442         uint8_t rss_key[40]; /**< copy of the RSS key. */
443         enum hash_rxq_type layer; /**< Last pattern layer detected. */
444         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
445         struct {
446                 struct ibv_flow_attr *ibv_attr;
447                 /**< Pointer to Verbs attributes. */
448                 unsigned int offset;
449                 /**< Current position or total size of the attribute. */
450         } queue[RTE_DIM(hash_rxq_init)];
451 };
452
453 static const struct rte_flow_ops mlx5_flow_ops = {
454         .validate = mlx5_flow_validate,
455         .create = mlx5_flow_create,
456         .destroy = mlx5_flow_destroy,
457         .flush = mlx5_flow_flush,
458 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
459         .query = mlx5_flow_query,
460 #else
461         .query = NULL,
462 #endif
463         .isolate = mlx5_flow_isolate,
464 };
465
466 /* Convert FDIR request to Generic flow. */
467 struct mlx5_fdir {
468         struct rte_flow_attr attr;
469         struct rte_flow_action actions[2];
470         struct rte_flow_item items[4];
471         struct rte_flow_item_eth l2;
472         struct rte_flow_item_eth l2_mask;
473         union {
474                 struct rte_flow_item_ipv4 ipv4;
475                 struct rte_flow_item_ipv6 ipv6;
476         } l3;
477         union {
478                 struct rte_flow_item_udp udp;
479                 struct rte_flow_item_tcp tcp;
480         } l4;
481         struct rte_flow_action_queue queue;
482 };
483
484 /* Verbs specification header. */
485 struct ibv_spec_header {
486         enum ibv_flow_spec_type type;
487         uint16_t size;
488 };
489
490 /**
491  * Check support for a given item.
492  *
493  * @param item[in]
494  *   Item specification.
495  * @param mask[in]
496  *   Bit-masks covering supported fields to compare with spec, last and mask in
497  *   \item.
498  * @param size
499  *   Bit-Mask size in bytes.
500  *
501  * @return
502  *   0 on success.
503  */
504 static int
505 mlx5_flow_item_validate(const struct rte_flow_item *item,
506                         const uint8_t *mask, unsigned int size)
507 {
508         int ret = 0;
509
510         if (!item->spec && (item->mask || item->last))
511                 return -1;
512         if (item->spec && !item->mask) {
513                 unsigned int i;
514                 const uint8_t *spec = item->spec;
515
516                 for (i = 0; i < size; ++i)
517                         if ((spec[i] | mask[i]) != mask[i])
518                                 return -1;
519         }
520         if (item->last && !item->mask) {
521                 unsigned int i;
522                 const uint8_t *spec = item->last;
523
524                 for (i = 0; i < size; ++i)
525                         if ((spec[i] | mask[i]) != mask[i])
526                                 return -1;
527         }
528         if (item->mask) {
529                 unsigned int i;
530                 const uint8_t *spec = item->spec;
531
532                 for (i = 0; i < size; ++i)
533                         if ((spec[i] | mask[i]) != mask[i])
534                                 return -1;
535         }
536         if (item->spec && item->last) {
537                 uint8_t spec[size];
538                 uint8_t last[size];
539                 const uint8_t *apply = mask;
540                 unsigned int i;
541
542                 if (item->mask)
543                         apply = item->mask;
544                 for (i = 0; i < size; ++i) {
545                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
546                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
547                 }
548                 ret = memcmp(spec, last, size);
549         }
550         return ret;
551 }
552
553 /**
554  * Copy the RSS configuration from the user ones, of the rss_conf is null,
555  * uses the driver one.
556  *
557  * @param priv
558  *   Pointer to private structure.
559  * @param parser
560  *   Internal parser structure.
561  * @param rss_conf
562  *   User RSS configuration to save.
563  *
564  * @return
565  *   0 on success, errno value on failure.
566  */
567 static int
568 priv_flow_convert_rss_conf(struct priv *priv,
569                            struct mlx5_flow_parse *parser,
570                            const struct rte_eth_rss_conf *rss_conf)
571 {
572         /*
573          * This function is also called at the beginning of
574          * priv_flow_convert_actions() to initialize the parser with the
575          * device default RSS configuration.
576          */
577         (void)priv;
578         if (rss_conf) {
579                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
580                         return EINVAL;
581                 if (rss_conf->rss_key_len != 40)
582                         return EINVAL;
583                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
584                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
585                         memcpy(parser->rss_key, rss_conf->rss_key,
586                                rss_conf->rss_key_len);
587                         parser->rss_conf.rss_key = parser->rss_key;
588                 }
589                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
590         }
591         return 0;
592 }
593
594 /**
595  * Extract attribute to the parser.
596  *
597  * @param priv
598  *   Pointer to private structure.
599  * @param[in] attr
600  *   Flow rule attributes.
601  * @param[out] error
602  *   Perform verbose error reporting if not NULL.
603  * @param[in, out] parser
604  *   Internal parser structure.
605  *
606  * @return
607  *   0 on success, a negative errno value otherwise and rte_errno is set.
608  */
609 static int
610 priv_flow_convert_attributes(struct priv *priv,
611                              const struct rte_flow_attr *attr,
612                              struct rte_flow_error *error,
613                              struct mlx5_flow_parse *parser)
614 {
615         (void)priv;
616         (void)parser;
617         if (attr->group) {
618                 rte_flow_error_set(error, ENOTSUP,
619                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
620                                    NULL,
621                                    "groups are not supported");
622                 return -rte_errno;
623         }
624         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
625                 rte_flow_error_set(error, ENOTSUP,
626                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
627                                    NULL,
628                                    "priorities are not supported");
629                 return -rte_errno;
630         }
631         if (attr->egress) {
632                 rte_flow_error_set(error, ENOTSUP,
633                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
634                                    NULL,
635                                    "egress is not supported");
636                 return -rte_errno;
637         }
638         if (!attr->ingress) {
639                 rte_flow_error_set(error, ENOTSUP,
640                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
641                                    NULL,
642                                    "only ingress is supported");
643                 return -rte_errno;
644         }
645         return 0;
646 }
647
648 /**
649  * Extract actions request to the parser.
650  *
651  * @param priv
652  *   Pointer to private structure.
653  * @param[in] actions
654  *   Associated actions (list terminated by the END action).
655  * @param[out] error
656  *   Perform verbose error reporting if not NULL.
657  * @param[in, out] parser
658  *   Internal parser structure.
659  *
660  * @return
661  *   0 on success, a negative errno value otherwise and rte_errno is set.
662  */
663 static int
664 priv_flow_convert_actions(struct priv *priv,
665                           const struct rte_flow_action actions[],
666                           struct rte_flow_error *error,
667                           struct mlx5_flow_parse *parser)
668 {
669         /*
670          * Add default RSS configuration necessary for Verbs to create QP even
671          * if no RSS is necessary.
672          */
673         priv_flow_convert_rss_conf(priv, parser,
674                                    (const struct rte_eth_rss_conf *)
675                                    &priv->rss_conf);
676         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
677                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
678                         continue;
679                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
680                         parser->drop = 1;
681                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
682                         const struct rte_flow_action_queue *queue =
683                                 (const struct rte_flow_action_queue *)
684                                 actions->conf;
685                         uint16_t n;
686                         uint16_t found = 0;
687
688                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
689                                 goto exit_action_not_supported;
690                         for (n = 0; n < parser->queues_n; ++n) {
691                                 if (parser->queues[n] == queue->index) {
692                                         found = 1;
693                                         break;
694                                 }
695                         }
696                         if (parser->queues_n > 1 && !found) {
697                                 rte_flow_error_set(error, ENOTSUP,
698                                            RTE_FLOW_ERROR_TYPE_ACTION,
699                                            actions,
700                                            "queue action not in RSS queues");
701                                 return -rte_errno;
702                         }
703                         if (!found) {
704                                 parser->queues_n = 1;
705                                 parser->queues[0] = queue->index;
706                         }
707                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
708                         const struct rte_flow_action_rss *rss =
709                                 (const struct rte_flow_action_rss *)
710                                 actions->conf;
711                         uint16_t n;
712
713                         if (!rss || !rss->num) {
714                                 rte_flow_error_set(error, EINVAL,
715                                                    RTE_FLOW_ERROR_TYPE_ACTION,
716                                                    actions,
717                                                    "no valid queues");
718                                 return -rte_errno;
719                         }
720                         if (parser->queues_n == 1) {
721                                 uint16_t found = 0;
722
723                                 assert(parser->queues_n);
724                                 for (n = 0; n < rss->num; ++n) {
725                                         if (parser->queues[0] ==
726                                             rss->queue[n]) {
727                                                 found = 1;
728                                                 break;
729                                         }
730                                 }
731                                 if (!found) {
732                                         rte_flow_error_set(error, ENOTSUP,
733                                                    RTE_FLOW_ERROR_TYPE_ACTION,
734                                                    actions,
735                                                    "queue action not in RSS"
736                                                    " queues");
737                                         return -rte_errno;
738                                 }
739                         }
740                         for (n = 0; n < rss->num; ++n) {
741                                 if (rss->queue[n] >= priv->rxqs_n) {
742                                         rte_flow_error_set(error, EINVAL,
743                                                    RTE_FLOW_ERROR_TYPE_ACTION,
744                                                    actions,
745                                                    "queue id > number of"
746                                                    " queues");
747                                         return -rte_errno;
748                                 }
749                         }
750                         for (n = 0; n < rss->num; ++n)
751                                 parser->queues[n] = rss->queue[n];
752                         parser->queues_n = rss->num;
753                         if (priv_flow_convert_rss_conf(priv, parser,
754                                                        rss->rss_conf)) {
755                                 rte_flow_error_set(error, EINVAL,
756                                                    RTE_FLOW_ERROR_TYPE_ACTION,
757                                                    actions,
758                                                    "wrong RSS configuration");
759                                 return -rte_errno;
760                         }
761                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
762                         const struct rte_flow_action_mark *mark =
763                                 (const struct rte_flow_action_mark *)
764                                 actions->conf;
765
766                         if (!mark) {
767                                 rte_flow_error_set(error, EINVAL,
768                                                    RTE_FLOW_ERROR_TYPE_ACTION,
769                                                    actions,
770                                                    "mark must be defined");
771                                 return -rte_errno;
772                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
773                                 rte_flow_error_set(error, ENOTSUP,
774                                                    RTE_FLOW_ERROR_TYPE_ACTION,
775                                                    actions,
776                                                    "mark must be between 0"
777                                                    " and 16777199");
778                                 return -rte_errno;
779                         }
780                         parser->mark = 1;
781                         parser->mark_id = mark->id;
782                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
783                         parser->mark = 1;
784                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
785                            priv->counter_set_supported) {
786                         parser->count = 1;
787                 } else {
788                         goto exit_action_not_supported;
789                 }
790         }
791         if (parser->drop && parser->mark)
792                 parser->mark = 0;
793         if (!parser->queues_n && !parser->drop) {
794                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
795                                    NULL, "no valid action");
796                 return -rte_errno;
797         }
798         return 0;
799 exit_action_not_supported:
800         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
801                            actions, "action not supported");
802         return -rte_errno;
803 }
804
805 /**
806  * Validate items.
807  *
808  * @param priv
809  *   Pointer to private structure.
810  * @param[in] items
811  *   Pattern specification (list terminated by the END pattern item).
812  * @param[out] error
813  *   Perform verbose error reporting if not NULL.
814  * @param[in, out] parser
815  *   Internal parser structure.
816  *
817  * @return
818  *   0 on success, a negative errno value otherwise and rte_errno is set.
819  */
820 static int
821 priv_flow_convert_items_validate(struct priv *priv,
822                                  const struct rte_flow_item items[],
823                                  struct rte_flow_error *error,
824                                  struct mlx5_flow_parse *parser)
825 {
826         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
827         unsigned int i;
828
829         (void)priv;
830         /* Initialise the offsets to start after verbs attribute. */
831         for (i = 0; i != hash_rxq_init_n; ++i)
832                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
833         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
834                 const struct mlx5_flow_items *token = NULL;
835                 unsigned int n;
836                 int err;
837
838                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
839                         continue;
840                 for (i = 0;
841                      cur_item->items &&
842                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
843                      ++i) {
844                         if (cur_item->items[i] == items->type) {
845                                 token = &mlx5_flow_items[items->type];
846                                 break;
847                         }
848                 }
849                 if (!token)
850                         goto exit_item_not_supported;
851                 cur_item = token;
852                 err = mlx5_flow_item_validate(items,
853                                               (const uint8_t *)cur_item->mask,
854                                               cur_item->mask_sz);
855                 if (err)
856                         goto exit_item_not_supported;
857                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
858                         if (parser->inner) {
859                                 rte_flow_error_set(error, ENOTSUP,
860                                                    RTE_FLOW_ERROR_TYPE_ITEM,
861                                                    items,
862                                                    "cannot recognize multiple"
863                                                    " VXLAN encapsulations");
864                                 return -rte_errno;
865                         }
866                         parser->inner = IBV_FLOW_SPEC_INNER;
867                 }
868                 if (parser->drop) {
869                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
870                 } else {
871                         for (n = 0; n != hash_rxq_init_n; ++n)
872                                 parser->queue[n].offset += cur_item->dst_sz;
873                 }
874         }
875         if (parser->drop) {
876                 parser->queue[HASH_RXQ_ETH].offset +=
877                         sizeof(struct ibv_flow_spec_action_drop);
878         }
879         if (parser->mark) {
880                 for (i = 0; i != hash_rxq_init_n; ++i)
881                         parser->queue[i].offset +=
882                                 sizeof(struct ibv_flow_spec_action_tag);
883         }
884         if (parser->count) {
885                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
886
887                 for (i = 0; i != hash_rxq_init_n; ++i)
888                         parser->queue[i].offset += size;
889         }
890         return 0;
891 exit_item_not_supported:
892         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
893                            items, "item not supported");
894         return -rte_errno;
895 }
896
897 /**
898  * Allocate memory space to store verbs flow attributes.
899  *
900  * @param priv
901  *   Pointer to private structure.
902  * @param[in] priority
903  *   Flow priority.
904  * @param[in] size
905  *   Amount of byte to allocate.
906  * @param[out] error
907  *   Perform verbose error reporting if not NULL.
908  *
909  * @return
910  *   A verbs flow attribute on success, NULL otherwise.
911  */
912 static struct ibv_flow_attr*
913 priv_flow_convert_allocate(struct priv *priv,
914                            unsigned int priority,
915                            unsigned int size,
916                            struct rte_flow_error *error)
917 {
918         struct ibv_flow_attr *ibv_attr;
919
920         (void)priv;
921         ibv_attr = rte_calloc(__func__, 1, size, 0);
922         if (!ibv_attr) {
923                 rte_flow_error_set(error, ENOMEM,
924                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
925                                    NULL,
926                                    "cannot allocate verbs spec attributes.");
927                 return NULL;
928         }
929         ibv_attr->priority = priority;
930         return ibv_attr;
931 }
932
933 /**
934  * Finalise verbs flow attributes.
935  *
936  * @param priv
937  *   Pointer to private structure.
938  * @param[in, out] parser
939  *   Internal parser structure.
940  */
941 static void
942 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
943 {
944         const unsigned int ipv4 =
945                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
946         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
947         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
948         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
949         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
950         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
951         unsigned int i;
952
953         (void)priv;
954         if (parser->layer == HASH_RXQ_ETH) {
955                 goto fill;
956         } else {
957                 /*
958                  * This layer becomes useless as the pattern define under
959                  * layers.
960                  */
961                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
962                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
963         }
964         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
965         for (i = ohmin; i != (ohmax + 1); ++i) {
966                 if (!parser->queue[i].ibv_attr)
967                         continue;
968                 rte_free(parser->queue[i].ibv_attr);
969                 parser->queue[i].ibv_attr = NULL;
970         }
971         /* Remove impossible flow according to the RSS configuration. */
972         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
973             parser->rss_conf.rss_hf) {
974                 /* Remove any other flow. */
975                 for (i = hmin; i != (hmax + 1); ++i) {
976                         if ((i == parser->layer) ||
977                              (!parser->queue[i].ibv_attr))
978                                 continue;
979                         rte_free(parser->queue[i].ibv_attr);
980                         parser->queue[i].ibv_attr = NULL;
981                 }
982         } else  if (!parser->queue[ip].ibv_attr) {
983                 /* no RSS possible with the current configuration. */
984                 parser->queues_n = 1;
985                 return;
986         }
987 fill:
988         /*
989          * Fill missing layers in verbs specifications, or compute the correct
990          * offset to allocate the memory space for the attributes and
991          * specifications.
992          */
993         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
994                 union {
995                         struct ibv_flow_spec_ipv4_ext ipv4;
996                         struct ibv_flow_spec_ipv6 ipv6;
997                         struct ibv_flow_spec_tcp_udp udp_tcp;
998                 } specs;
999                 void *dst;
1000                 uint16_t size;
1001
1002                 if (i == parser->layer)
1003                         continue;
1004                 if (parser->layer == HASH_RXQ_ETH) {
1005                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1006                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1007                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1008                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
1009                                         .size = size,
1010                                 };
1011                         } else {
1012                                 size = sizeof(struct ibv_flow_spec_ipv6);
1013                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1014                                         .type = IBV_FLOW_SPEC_IPV6,
1015                                         .size = size,
1016                                 };
1017                         }
1018                         if (parser->queue[i].ibv_attr) {
1019                                 dst = (void *)((uintptr_t)
1020                                                parser->queue[i].ibv_attr +
1021                                                parser->queue[i].offset);
1022                                 memcpy(dst, &specs, size);
1023                                 ++parser->queue[i].ibv_attr->num_of_specs;
1024                         }
1025                         parser->queue[i].offset += size;
1026                 }
1027                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1028                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1029                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1030                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1031                                 .type = ((i == HASH_RXQ_UDPV4 ||
1032                                           i == HASH_RXQ_UDPV6) ?
1033                                          IBV_FLOW_SPEC_UDP :
1034                                          IBV_FLOW_SPEC_TCP),
1035                                 .size = size,
1036                         };
1037                         if (parser->queue[i].ibv_attr) {
1038                                 dst = (void *)((uintptr_t)
1039                                                parser->queue[i].ibv_attr +
1040                                                parser->queue[i].offset);
1041                                 memcpy(dst, &specs, size);
1042                                 ++parser->queue[i].ibv_attr->num_of_specs;
1043                         }
1044                         parser->queue[i].offset += size;
1045                 }
1046         }
1047 }
1048
1049 /**
1050  * Validate and convert a flow supported by the NIC.
1051  *
1052  * @param priv
1053  *   Pointer to private structure.
1054  * @param[in] attr
1055  *   Flow rule attributes.
1056  * @param[in] pattern
1057  *   Pattern specification (list terminated by the END pattern item).
1058  * @param[in] actions
1059  *   Associated actions (list terminated by the END action).
1060  * @param[out] error
1061  *   Perform verbose error reporting if not NULL.
1062  * @param[in, out] parser
1063  *   Internal parser structure.
1064  *
1065  * @return
1066  *   0 on success, a negative errno value otherwise and rte_errno is set.
1067  */
1068 static int
1069 priv_flow_convert(struct priv *priv,
1070                   const struct rte_flow_attr *attr,
1071                   const struct rte_flow_item items[],
1072                   const struct rte_flow_action actions[],
1073                   struct rte_flow_error *error,
1074                   struct mlx5_flow_parse *parser)
1075 {
1076         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1077         unsigned int i;
1078         int ret;
1079
1080         /* First step. Validate the attributes, items and actions. */
1081         *parser = (struct mlx5_flow_parse){
1082                 .create = parser->create,
1083                 .layer = HASH_RXQ_ETH,
1084                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1085         };
1086         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1087         if (ret)
1088                 return ret;
1089         ret = priv_flow_convert_actions(priv, actions, error, parser);
1090         if (ret)
1091                 return ret;
1092         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1093         if (ret)
1094                 return ret;
1095         priv_flow_convert_finalise(priv, parser);
1096         /*
1097          * Second step.
1098          * Allocate the memory space to store verbs specifications.
1099          */
1100         if (parser->drop) {
1101                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1102                         priv_flow_convert_allocate
1103                         (priv, attr->priority,
1104                          parser->queue[HASH_RXQ_ETH].offset,
1105                          error);
1106                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1107                         return ENOMEM;
1108                 parser->queue[HASH_RXQ_ETH].offset =
1109                         sizeof(struct ibv_flow_attr);
1110         } else {
1111                 for (i = 0; i != hash_rxq_init_n; ++i) {
1112                         unsigned int priority =
1113                                 attr->priority +
1114                                 hash_rxq_init[i].flow_priority;
1115                         unsigned int offset;
1116
1117                         if (!(parser->rss_conf.rss_hf &
1118                               hash_rxq_init[i].dpdk_rss_hf) &&
1119                             (i != HASH_RXQ_ETH))
1120                                 continue;
1121                         offset = parser->queue[i].offset;
1122                         parser->queue[i].ibv_attr =
1123                                 priv_flow_convert_allocate(priv, priority,
1124                                                            offset, error);
1125                         if (!parser->queue[i].ibv_attr)
1126                                 goto exit_enomem;
1127                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1128                 }
1129         }
1130         /* Third step. Conversion parse, fill the specifications. */
1131         parser->inner = 0;
1132         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1133                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1134                         continue;
1135                 cur_item = &mlx5_flow_items[items->type];
1136                 ret = cur_item->convert(items,
1137                                         (cur_item->default_mask ?
1138                                          cur_item->default_mask :
1139                                          cur_item->mask),
1140                                         parser);
1141                 if (ret) {
1142                         rte_flow_error_set(error, ret,
1143                                            RTE_FLOW_ERROR_TYPE_ITEM,
1144                                            items, "item not supported");
1145                         goto exit_free;
1146                 }
1147         }
1148         if (parser->mark)
1149                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1150         if (parser->count && parser->create) {
1151                 mlx5_flow_create_count(priv, parser);
1152                 if (!parser->cs)
1153                         goto exit_count_error;
1154         }
1155         /*
1156          * Last step. Complete missing specification to reach the RSS
1157          * configuration.
1158          */
1159         if (!parser->drop) {
1160                 priv_flow_convert_finalise(priv, parser);
1161         } else {
1162                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1163                         attr->priority +
1164                         hash_rxq_init[parser->layer].flow_priority;
1165         }
1166 exit_free:
1167         /* Only verification is expected, all resources should be released. */
1168         if (!parser->create) {
1169                 for (i = 0; i != hash_rxq_init_n; ++i) {
1170                         if (parser->queue[i].ibv_attr) {
1171                                 rte_free(parser->queue[i].ibv_attr);
1172                                 parser->queue[i].ibv_attr = NULL;
1173                         }
1174                 }
1175         }
1176         return ret;
1177 exit_enomem:
1178         for (i = 0; i != hash_rxq_init_n; ++i) {
1179                 if (parser->queue[i].ibv_attr) {
1180                         rte_free(parser->queue[i].ibv_attr);
1181                         parser->queue[i].ibv_attr = NULL;
1182                 }
1183         }
1184         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1185                            NULL, "cannot allocate verbs spec attributes.");
1186         return ret;
1187 exit_count_error:
1188         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1189                            NULL, "cannot create counter.");
1190         return rte_errno;
1191 }
1192
1193 /**
1194  * Copy the specification created into the flow.
1195  *
1196  * @param parser
1197  *   Internal parser structure.
1198  * @param src
1199  *   Create specification.
1200  * @param size
1201  *   Size in bytes of the specification to copy.
1202  */
1203 static void
1204 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1205                       unsigned int size)
1206 {
1207         unsigned int i;
1208         void *dst;
1209
1210         for (i = 0; i != hash_rxq_init_n; ++i) {
1211                 if (!parser->queue[i].ibv_attr)
1212                         continue;
1213                 /* Specification must be the same l3 type or none. */
1214                 if (parser->layer == HASH_RXQ_ETH ||
1215                     (hash_rxq_init[parser->layer].ip_version ==
1216                      hash_rxq_init[i].ip_version) ||
1217                     (hash_rxq_init[i].ip_version == 0)) {
1218                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1219                                         parser->queue[i].offset);
1220                         memcpy(dst, src, size);
1221                         ++parser->queue[i].ibv_attr->num_of_specs;
1222                         parser->queue[i].offset += size;
1223                 }
1224         }
1225 }
1226
1227 /**
1228  * Convert Ethernet item to Verbs specification.
1229  *
1230  * @param item[in]
1231  *   Item specification.
1232  * @param default_mask[in]
1233  *   Default bit-masks to use when item->mask is not provided.
1234  * @param data[in, out]
1235  *   User structure.
1236  */
1237 static int
1238 mlx5_flow_create_eth(const struct rte_flow_item *item,
1239                      const void *default_mask,
1240                      void *data)
1241 {
1242         const struct rte_flow_item_eth *spec = item->spec;
1243         const struct rte_flow_item_eth *mask = item->mask;
1244         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1245         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1246         struct ibv_flow_spec_eth eth = {
1247                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1248                 .size = eth_size,
1249         };
1250
1251         /* Don't update layer for the inner pattern. */
1252         if (!parser->inner)
1253                 parser->layer = HASH_RXQ_ETH;
1254         if (spec) {
1255                 unsigned int i;
1256
1257                 if (!mask)
1258                         mask = default_mask;
1259                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1260                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1261                 eth.val.ether_type = spec->type;
1262                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1263                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1264                 eth.mask.ether_type = mask->type;
1265                 /* Remove unwanted bits from values. */
1266                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1267                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1268                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1269                 }
1270                 eth.val.ether_type &= eth.mask.ether_type;
1271         }
1272         mlx5_flow_create_copy(parser, &eth, eth_size);
1273         return 0;
1274 }
1275
1276 /**
1277  * Convert VLAN item to Verbs specification.
1278  *
1279  * @param item[in]
1280  *   Item specification.
1281  * @param default_mask[in]
1282  *   Default bit-masks to use when item->mask is not provided.
1283  * @param data[in, out]
1284  *   User structure.
1285  */
1286 static int
1287 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1288                       const void *default_mask,
1289                       void *data)
1290 {
1291         const struct rte_flow_item_vlan *spec = item->spec;
1292         const struct rte_flow_item_vlan *mask = item->mask;
1293         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1294         struct ibv_flow_spec_eth *eth;
1295         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1296
1297         if (spec) {
1298                 unsigned int i;
1299                 if (!mask)
1300                         mask = default_mask;
1301
1302                 for (i = 0; i != hash_rxq_init_n; ++i) {
1303                         if (!parser->queue[i].ibv_attr)
1304                                 continue;
1305
1306                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1307                                        parser->queue[i].offset - eth_size);
1308                         eth->val.vlan_tag = spec->tci;
1309                         eth->mask.vlan_tag = mask->tci;
1310                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1311                 }
1312         }
1313         return 0;
1314 }
1315
1316 /**
1317  * Convert IPv4 item to Verbs specification.
1318  *
1319  * @param item[in]
1320  *   Item specification.
1321  * @param default_mask[in]
1322  *   Default bit-masks to use when item->mask is not provided.
1323  * @param data[in, out]
1324  *   User structure.
1325  */
1326 static int
1327 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1328                       const void *default_mask,
1329                       void *data)
1330 {
1331         const struct rte_flow_item_ipv4 *spec = item->spec;
1332         const struct rte_flow_item_ipv4 *mask = item->mask;
1333         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1334         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1335         struct ibv_flow_spec_ipv4_ext ipv4 = {
1336                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1337                 .size = ipv4_size,
1338         };
1339
1340         /* Don't update layer for the inner pattern. */
1341         if (!parser->inner)
1342                 parser->layer = HASH_RXQ_IPV4;
1343         if (spec) {
1344                 if (!mask)
1345                         mask = default_mask;
1346                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1347                         .src_ip = spec->hdr.src_addr,
1348                         .dst_ip = spec->hdr.dst_addr,
1349                         .proto = spec->hdr.next_proto_id,
1350                         .tos = spec->hdr.type_of_service,
1351                 };
1352                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1353                         .src_ip = mask->hdr.src_addr,
1354                         .dst_ip = mask->hdr.dst_addr,
1355                         .proto = mask->hdr.next_proto_id,
1356                         .tos = mask->hdr.type_of_service,
1357                 };
1358                 /* Remove unwanted bits from values. */
1359                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1360                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1361                 ipv4.val.proto &= ipv4.mask.proto;
1362                 ipv4.val.tos &= ipv4.mask.tos;
1363         }
1364         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1365         return 0;
1366 }
1367
1368 /**
1369  * Convert IPv6 item to Verbs specification.
1370  *
1371  * @param item[in]
1372  *   Item specification.
1373  * @param default_mask[in]
1374  *   Default bit-masks to use when item->mask is not provided.
1375  * @param data[in, out]
1376  *   User structure.
1377  */
1378 static int
1379 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1380                       const void *default_mask,
1381                       void *data)
1382 {
1383         const struct rte_flow_item_ipv6 *spec = item->spec;
1384         const struct rte_flow_item_ipv6 *mask = item->mask;
1385         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1386         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1387         struct ibv_flow_spec_ipv6 ipv6 = {
1388                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1389                 .size = ipv6_size,
1390         };
1391
1392         /* Don't update layer for the inner pattern. */
1393         if (!parser->inner)
1394                 parser->layer = HASH_RXQ_IPV6;
1395         if (spec) {
1396                 unsigned int i;
1397
1398                 if (!mask)
1399                         mask = default_mask;
1400                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1401                        RTE_DIM(ipv6.val.src_ip));
1402                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1403                        RTE_DIM(ipv6.val.dst_ip));
1404                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1405                        RTE_DIM(ipv6.mask.src_ip));
1406                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1407                        RTE_DIM(ipv6.mask.dst_ip));
1408                 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1409                 ipv6.mask.next_hdr = mask->hdr.proto;
1410                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1411                 /* Remove unwanted bits from values. */
1412                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1413                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1414                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1415                 }
1416                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1417                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1418                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1419         }
1420         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1421         return 0;
1422 }
1423
1424 /**
1425  * Convert UDP item to Verbs specification.
1426  *
1427  * @param item[in]
1428  *   Item specification.
1429  * @param default_mask[in]
1430  *   Default bit-masks to use when item->mask is not provided.
1431  * @param data[in, out]
1432  *   User structure.
1433  */
1434 static int
1435 mlx5_flow_create_udp(const struct rte_flow_item *item,
1436                      const void *default_mask,
1437                      void *data)
1438 {
1439         const struct rte_flow_item_udp *spec = item->spec;
1440         const struct rte_flow_item_udp *mask = item->mask;
1441         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1442         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1443         struct ibv_flow_spec_tcp_udp udp = {
1444                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1445                 .size = udp_size,
1446         };
1447
1448         /* Don't update layer for the inner pattern. */
1449         if (!parser->inner) {
1450                 if (parser->layer == HASH_RXQ_IPV4)
1451                         parser->layer = HASH_RXQ_UDPV4;
1452                 else
1453                         parser->layer = HASH_RXQ_UDPV6;
1454         }
1455         if (spec) {
1456                 if (!mask)
1457                         mask = default_mask;
1458                 udp.val.dst_port = spec->hdr.dst_port;
1459                 udp.val.src_port = spec->hdr.src_port;
1460                 udp.mask.dst_port = mask->hdr.dst_port;
1461                 udp.mask.src_port = mask->hdr.src_port;
1462                 /* Remove unwanted bits from values. */
1463                 udp.val.src_port &= udp.mask.src_port;
1464                 udp.val.dst_port &= udp.mask.dst_port;
1465         }
1466         mlx5_flow_create_copy(parser, &udp, udp_size);
1467         return 0;
1468 }
1469
1470 /**
1471  * Convert TCP item to Verbs specification.
1472  *
1473  * @param item[in]
1474  *   Item specification.
1475  * @param default_mask[in]
1476  *   Default bit-masks to use when item->mask is not provided.
1477  * @param data[in, out]
1478  *   User structure.
1479  */
1480 static int
1481 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1482                      const void *default_mask,
1483                      void *data)
1484 {
1485         const struct rte_flow_item_tcp *spec = item->spec;
1486         const struct rte_flow_item_tcp *mask = item->mask;
1487         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1488         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1489         struct ibv_flow_spec_tcp_udp tcp = {
1490                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1491                 .size = tcp_size,
1492         };
1493
1494         /* Don't update layer for the inner pattern. */
1495         if (!parser->inner) {
1496                 if (parser->layer == HASH_RXQ_IPV4)
1497                         parser->layer = HASH_RXQ_TCPV4;
1498                 else
1499                         parser->layer = HASH_RXQ_TCPV6;
1500         }
1501         if (spec) {
1502                 if (!mask)
1503                         mask = default_mask;
1504                 tcp.val.dst_port = spec->hdr.dst_port;
1505                 tcp.val.src_port = spec->hdr.src_port;
1506                 tcp.mask.dst_port = mask->hdr.dst_port;
1507                 tcp.mask.src_port = mask->hdr.src_port;
1508                 /* Remove unwanted bits from values. */
1509                 tcp.val.src_port &= tcp.mask.src_port;
1510                 tcp.val.dst_port &= tcp.mask.dst_port;
1511         }
1512         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1513         return 0;
1514 }
1515
1516 /**
1517  * Convert VXLAN item to Verbs specification.
1518  *
1519  * @param item[in]
1520  *   Item specification.
1521  * @param default_mask[in]
1522  *   Default bit-masks to use when item->mask is not provided.
1523  * @param data[in, out]
1524  *   User structure.
1525  */
1526 static int
1527 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1528                        const void *default_mask,
1529                        void *data)
1530 {
1531         const struct rte_flow_item_vxlan *spec = item->spec;
1532         const struct rte_flow_item_vxlan *mask = item->mask;
1533         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1534         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1535         struct ibv_flow_spec_tunnel vxlan = {
1536                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1537                 .size = size,
1538         };
1539         union vni {
1540                 uint32_t vlan_id;
1541                 uint8_t vni[4];
1542         } id;
1543
1544         id.vni[0] = 0;
1545         parser->inner = IBV_FLOW_SPEC_INNER;
1546         if (spec) {
1547                 if (!mask)
1548                         mask = default_mask;
1549                 memcpy(&id.vni[1], spec->vni, 3);
1550                 vxlan.val.tunnel_id = id.vlan_id;
1551                 memcpy(&id.vni[1], mask->vni, 3);
1552                 vxlan.mask.tunnel_id = id.vlan_id;
1553                 /* Remove unwanted bits from values. */
1554                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1555         }
1556         /*
1557          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1558          * layer is defined in the Verbs specification it is interpreted as
1559          * wildcard and all packets will match this rule, if it follows a full
1560          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1561          * before will also match this rule.
1562          * To avoid such situation, VNI 0 is currently refused.
1563          */
1564         if (!vxlan.val.tunnel_id)
1565                 return EINVAL;
1566         mlx5_flow_create_copy(parser, &vxlan, size);
1567         return 0;
1568 }
1569
1570 /**
1571  * Convert mark/flag action to Verbs specification.
1572  *
1573  * @param parser
1574  *   Internal parser structure.
1575  * @param mark_id
1576  *   Mark identifier.
1577  */
1578 static int
1579 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1580 {
1581         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1582         struct ibv_flow_spec_action_tag tag = {
1583                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1584                 .size = size,
1585                 .tag_id = mlx5_flow_mark_set(mark_id),
1586         };
1587
1588         assert(parser->mark);
1589         mlx5_flow_create_copy(parser, &tag, size);
1590         return 0;
1591 }
1592
1593 /**
1594  * Convert count action to Verbs specification.
1595  *
1596  * @param priv
1597  *   Pointer to private structure.
1598  * @param parser
1599  *   Pointer to MLX5 flow parser structure.
1600  *
1601  * @return
1602  *   0 on success, errno value on failure.
1603  */
1604 static int
1605 mlx5_flow_create_count(struct priv *priv __rte_unused,
1606                        struct mlx5_flow_parse *parser __rte_unused)
1607 {
1608 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1609         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1610         struct ibv_counter_set_init_attr init_attr = {0};
1611         struct ibv_flow_spec_counter_action counter = {
1612                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1613                 .size = size,
1614                 .counter_set_handle = 0,
1615         };
1616
1617         init_attr.counter_set_id = 0;
1618         parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1619         if (!parser->cs)
1620                 return EINVAL;
1621         counter.counter_set_handle = parser->cs->handle;
1622         mlx5_flow_create_copy(parser, &counter, size);
1623 #endif
1624         return 0;
1625 }
1626
1627 /**
1628  * Complete flow rule creation with a drop queue.
1629  *
1630  * @param priv
1631  *   Pointer to private structure.
1632  * @param parser
1633  *   Internal parser structure.
1634  * @param flow
1635  *   Pointer to the rte_flow.
1636  * @param[out] error
1637  *   Perform verbose error reporting if not NULL.
1638  *
1639  * @return
1640  *   0 on success, errno value on failure.
1641  */
1642 static int
1643 priv_flow_create_action_queue_drop(struct priv *priv,
1644                                    struct mlx5_flow_parse *parser,
1645                                    struct rte_flow *flow,
1646                                    struct rte_flow_error *error)
1647 {
1648         struct ibv_flow_spec_action_drop *drop;
1649         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1650         int err = 0;
1651
1652         assert(priv->pd);
1653         assert(priv->ctx);
1654         flow->drop = 1;
1655         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1656                         parser->queue[HASH_RXQ_ETH].offset);
1657         *drop = (struct ibv_flow_spec_action_drop){
1658                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1659                         .size = size,
1660         };
1661         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1662         parser->queue[HASH_RXQ_ETH].offset += size;
1663         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1664                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1665         if (parser->count)
1666                 flow->cs = parser->cs;
1667         if (!priv->dev->data->dev_started)
1668                 return 0;
1669         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1670         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1671                 ibv_create_flow(priv->flow_drop_queue->qp,
1672                                 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1673         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1674                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1675                                    NULL, "flow rule creation failure");
1676                 err = ENOMEM;
1677                 goto error;
1678         }
1679         return 0;
1680 error:
1681         assert(flow);
1682         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1683                 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1684                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1685         }
1686         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1687                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1688                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1689         }
1690         if (flow->cs) {
1691                 claim_zero(ibv_destroy_counter_set(flow->cs));
1692                 flow->cs = NULL;
1693                 parser->cs = NULL;
1694         }
1695         return err;
1696 }
1697
1698 /**
1699  * Create hash Rx queues when RSS is enabled.
1700  *
1701  * @param priv
1702  *   Pointer to private structure.
1703  * @param parser
1704  *   Internal parser structure.
1705  * @param flow
1706  *   Pointer to the rte_flow.
1707  * @param[out] error
1708  *   Perform verbose error reporting if not NULL.
1709  *
1710  * @return
1711  *   0 on success, a errno value otherwise and rte_errno is set.
1712  */
1713 static int
1714 priv_flow_create_action_queue_rss(struct priv *priv,
1715                                   struct mlx5_flow_parse *parser,
1716                                   struct rte_flow *flow,
1717                                   struct rte_flow_error *error)
1718 {
1719         unsigned int i;
1720
1721         for (i = 0; i != hash_rxq_init_n; ++i) {
1722                 uint64_t hash_fields;
1723
1724                 if (!parser->queue[i].ibv_attr)
1725                         continue;
1726                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1727                 parser->queue[i].ibv_attr = NULL;
1728                 hash_fields = hash_rxq_init[i].hash_fields;
1729                 if (!priv->dev->data->dev_started)
1730                         continue;
1731                 flow->frxq[i].hrxq =
1732                         mlx5_priv_hrxq_get(priv,
1733                                            parser->rss_conf.rss_key,
1734                                            parser->rss_conf.rss_key_len,
1735                                            hash_fields,
1736                                            parser->queues,
1737                                            parser->queues_n);
1738                 if (flow->frxq[i].hrxq)
1739                         continue;
1740                 flow->frxq[i].hrxq =
1741                         mlx5_priv_hrxq_new(priv,
1742                                            parser->rss_conf.rss_key,
1743                                            parser->rss_conf.rss_key_len,
1744                                            hash_fields,
1745                                            parser->queues,
1746                                            parser->queues_n);
1747                 if (!flow->frxq[i].hrxq) {
1748                         rte_flow_error_set(error, ENOMEM,
1749                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1750                                            NULL, "cannot create hash rxq");
1751                         return ENOMEM;
1752                 }
1753         }
1754         return 0;
1755 }
1756
1757 /**
1758  * Complete flow rule creation.
1759  *
1760  * @param priv
1761  *   Pointer to private structure.
1762  * @param parser
1763  *   Internal parser structure.
1764  * @param flow
1765  *   Pointer to the rte_flow.
1766  * @param[out] error
1767  *   Perform verbose error reporting if not NULL.
1768  *
1769  * @return
1770  *   0 on success, a errno value otherwise and rte_errno is set.
1771  */
1772 static int
1773 priv_flow_create_action_queue(struct priv *priv,
1774                               struct mlx5_flow_parse *parser,
1775                               struct rte_flow *flow,
1776                               struct rte_flow_error *error)
1777 {
1778         int err = 0;
1779         unsigned int i;
1780
1781         assert(priv->pd);
1782         assert(priv->ctx);
1783         assert(!parser->drop);
1784         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1785         if (err)
1786                 goto error;
1787         if (parser->count)
1788                 flow->cs = parser->cs;
1789         if (!priv->dev->data->dev_started)
1790                 return 0;
1791         for (i = 0; i != hash_rxq_init_n; ++i) {
1792                 if (!flow->frxq[i].hrxq)
1793                         continue;
1794                 flow->frxq[i].ibv_flow =
1795                         ibv_create_flow(flow->frxq[i].hrxq->qp,
1796                                         flow->frxq[i].ibv_attr);
1797                 if (!flow->frxq[i].ibv_flow) {
1798                         rte_flow_error_set(error, ENOMEM,
1799                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1800                                            NULL, "flow rule creation failure");
1801                         err = ENOMEM;
1802                         goto error;
1803                 }
1804                 DEBUG("%p type %d QP %p ibv_flow %p",
1805                       (void *)flow, i,
1806                       (void *)flow->frxq[i].hrxq,
1807                       (void *)flow->frxq[i].ibv_flow);
1808         }
1809         for (i = 0; i != parser->queues_n; ++i) {
1810                 struct mlx5_rxq_data *q =
1811                         (*priv->rxqs)[parser->queues[i]];
1812
1813                 q->mark |= parser->mark;
1814         }
1815         return 0;
1816 error:
1817         assert(flow);
1818         for (i = 0; i != hash_rxq_init_n; ++i) {
1819                 if (flow->frxq[i].ibv_flow) {
1820                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1821
1822                         claim_zero(ibv_destroy_flow(ibv_flow));
1823                 }
1824                 if (flow->frxq[i].hrxq)
1825                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1826                 if (flow->frxq[i].ibv_attr)
1827                         rte_free(flow->frxq[i].ibv_attr);
1828         }
1829         if (flow->cs) {
1830                 claim_zero(ibv_destroy_counter_set(flow->cs));
1831                 flow->cs = NULL;
1832                 parser->cs = NULL;
1833         }
1834         return err;
1835 }
1836
1837 /**
1838  * Convert a flow.
1839  *
1840  * @param priv
1841  *   Pointer to private structure.
1842  * @param list
1843  *   Pointer to a TAILQ flow list.
1844  * @param[in] attr
1845  *   Flow rule attributes.
1846  * @param[in] pattern
1847  *   Pattern specification (list terminated by the END pattern item).
1848  * @param[in] actions
1849  *   Associated actions (list terminated by the END action).
1850  * @param[out] error
1851  *   Perform verbose error reporting if not NULL.
1852  *
1853  * @return
1854  *   A flow on success, NULL otherwise.
1855  */
1856 static struct rte_flow *
1857 priv_flow_create(struct priv *priv,
1858                  struct mlx5_flows *list,
1859                  const struct rte_flow_attr *attr,
1860                  const struct rte_flow_item items[],
1861                  const struct rte_flow_action actions[],
1862                  struct rte_flow_error *error)
1863 {
1864         struct mlx5_flow_parse parser = { .create = 1, };
1865         struct rte_flow *flow = NULL;
1866         unsigned int i;
1867         int err;
1868
1869         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1870         if (err)
1871                 goto exit;
1872         flow = rte_calloc(__func__, 1,
1873                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1874                           0);
1875         if (!flow) {
1876                 rte_flow_error_set(error, ENOMEM,
1877                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1878                                    NULL,
1879                                    "cannot allocate flow memory");
1880                 return NULL;
1881         }
1882         /* Copy queues configuration. */
1883         flow->queues = (uint16_t (*)[])(flow + 1);
1884         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1885         flow->queues_n = parser.queues_n;
1886         flow->mark = parser.mark;
1887         /* Copy RSS configuration. */
1888         flow->rss_conf = parser.rss_conf;
1889         flow->rss_conf.rss_key = flow->rss_key;
1890         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1891         /* finalise the flow. */
1892         if (parser.drop)
1893                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1894                                                          error);
1895         else
1896                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1897         if (err)
1898                 goto exit;
1899         TAILQ_INSERT_TAIL(list, flow, next);
1900         DEBUG("Flow created %p", (void *)flow);
1901         return flow;
1902 exit:
1903         for (i = 0; i != hash_rxq_init_n; ++i) {
1904                 if (parser.queue[i].ibv_attr)
1905                         rte_free(parser.queue[i].ibv_attr);
1906         }
1907         rte_free(flow);
1908         return NULL;
1909 }
1910
1911 /**
1912  * Validate a flow supported by the NIC.
1913  *
1914  * @see rte_flow_validate()
1915  * @see rte_flow_ops
1916  */
1917 int
1918 mlx5_flow_validate(struct rte_eth_dev *dev,
1919                    const struct rte_flow_attr *attr,
1920                    const struct rte_flow_item items[],
1921                    const struct rte_flow_action actions[],
1922                    struct rte_flow_error *error)
1923 {
1924         struct priv *priv = dev->data->dev_private;
1925         int ret;
1926         struct mlx5_flow_parse parser = { .create = 0, };
1927
1928         priv_lock(priv);
1929         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1930         priv_unlock(priv);
1931         return ret;
1932 }
1933
1934 /**
1935  * Create a flow.
1936  *
1937  * @see rte_flow_create()
1938  * @see rte_flow_ops
1939  */
1940 struct rte_flow *
1941 mlx5_flow_create(struct rte_eth_dev *dev,
1942                  const struct rte_flow_attr *attr,
1943                  const struct rte_flow_item items[],
1944                  const struct rte_flow_action actions[],
1945                  struct rte_flow_error *error)
1946 {
1947         struct priv *priv = dev->data->dev_private;
1948         struct rte_flow *flow;
1949
1950         priv_lock(priv);
1951         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1952                                 error);
1953         priv_unlock(priv);
1954         return flow;
1955 }
1956
1957 /**
1958  * Destroy a flow.
1959  *
1960  * @param priv
1961  *   Pointer to private structure.
1962  * @param list
1963  *   Pointer to a TAILQ flow list.
1964  * @param[in] flow
1965  *   Flow to destroy.
1966  */
1967 static void
1968 priv_flow_destroy(struct priv *priv,
1969                   struct mlx5_flows *list,
1970                   struct rte_flow *flow)
1971 {
1972         unsigned int i;
1973
1974         if (flow->drop || !flow->mark)
1975                 goto free;
1976         for (i = 0; i != flow->queues_n; ++i) {
1977                 struct rte_flow *tmp;
1978                 int mark = 0;
1979
1980                 /*
1981                  * To remove the mark from the queue, the queue must not be
1982                  * present in any other marked flow (RSS or not).
1983                  */
1984                 TAILQ_FOREACH(tmp, list, next) {
1985                         unsigned int j;
1986                         uint16_t *tqs = NULL;
1987                         uint16_t tq_n = 0;
1988
1989                         if (!tmp->mark)
1990                                 continue;
1991                         for (j = 0; j != hash_rxq_init_n; ++j) {
1992                                 if (!tmp->frxq[j].hrxq)
1993                                         continue;
1994                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1995                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1996                         }
1997                         if (!tq_n)
1998                                 continue;
1999                         for (j = 0; (j != tq_n) && !mark; j++)
2000                                 if (tqs[j] == (*flow->queues)[i])
2001                                         mark = 1;
2002                 }
2003                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2004         }
2005 free:
2006         if (flow->drop) {
2007                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2008                         claim_zero(ibv_destroy_flow
2009                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2010                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2011         } else {
2012                 for (i = 0; i != hash_rxq_init_n; ++i) {
2013                         struct mlx5_flow *frxq = &flow->frxq[i];
2014
2015                         if (frxq->ibv_flow)
2016                                 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2017                         if (frxq->hrxq)
2018                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2019                         if (frxq->ibv_attr)
2020                                 rte_free(frxq->ibv_attr);
2021                 }
2022         }
2023         if (flow->cs) {
2024                 claim_zero(ibv_destroy_counter_set(flow->cs));
2025                 flow->cs = NULL;
2026         }
2027         TAILQ_REMOVE(list, flow, next);
2028         DEBUG("Flow destroyed %p", (void *)flow);
2029         rte_free(flow);
2030 }
2031
2032 /**
2033  * Destroy all flows.
2034  *
2035  * @param priv
2036  *   Pointer to private structure.
2037  * @param list
2038  *   Pointer to a TAILQ flow list.
2039  */
2040 void
2041 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2042 {
2043         while (!TAILQ_EMPTY(list)) {
2044                 struct rte_flow *flow;
2045
2046                 flow = TAILQ_FIRST(list);
2047                 priv_flow_destroy(priv, list, flow);
2048         }
2049 }
2050
2051 /**
2052  * Create drop queue.
2053  *
2054  * @param priv
2055  *   Pointer to private structure.
2056  *
2057  * @return
2058  *   0 on success.
2059  */
2060 int
2061 priv_flow_create_drop_queue(struct priv *priv)
2062 {
2063         struct mlx5_hrxq_drop *fdq = NULL;
2064
2065         assert(priv->pd);
2066         assert(priv->ctx);
2067         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2068         if (!fdq) {
2069                 WARN("cannot allocate memory for drop queue");
2070                 goto error;
2071         }
2072         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2073         if (!fdq->cq) {
2074                 WARN("cannot allocate CQ for drop queue");
2075                 goto error;
2076         }
2077         fdq->wq = ibv_create_wq(priv->ctx,
2078                         &(struct ibv_wq_init_attr){
2079                         .wq_type = IBV_WQT_RQ,
2080                         .max_wr = 1,
2081                         .max_sge = 1,
2082                         .pd = priv->pd,
2083                         .cq = fdq->cq,
2084                         });
2085         if (!fdq->wq) {
2086                 WARN("cannot allocate WQ for drop queue");
2087                 goto error;
2088         }
2089         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2090                         &(struct ibv_rwq_ind_table_init_attr){
2091                         .log_ind_tbl_size = 0,
2092                         .ind_tbl = &fdq->wq,
2093                         .comp_mask = 0,
2094                         });
2095         if (!fdq->ind_table) {
2096                 WARN("cannot allocate indirection table for drop queue");
2097                 goto error;
2098         }
2099         fdq->qp = ibv_create_qp_ex(priv->ctx,
2100                 &(struct ibv_qp_init_attr_ex){
2101                         .qp_type = IBV_QPT_RAW_PACKET,
2102                         .comp_mask =
2103                                 IBV_QP_INIT_ATTR_PD |
2104                                 IBV_QP_INIT_ATTR_IND_TABLE |
2105                                 IBV_QP_INIT_ATTR_RX_HASH,
2106                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2107                                 .rx_hash_function =
2108                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2109                                 .rx_hash_key_len = rss_hash_default_key_len,
2110                                 .rx_hash_key = rss_hash_default_key,
2111                                 .rx_hash_fields_mask = 0,
2112                                 },
2113                         .rwq_ind_tbl = fdq->ind_table,
2114                         .pd = priv->pd
2115                 });
2116         if (!fdq->qp) {
2117                 WARN("cannot allocate QP for drop queue");
2118                 goto error;
2119         }
2120         priv->flow_drop_queue = fdq;
2121         return 0;
2122 error:
2123         if (fdq->qp)
2124                 claim_zero(ibv_destroy_qp(fdq->qp));
2125         if (fdq->ind_table)
2126                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2127         if (fdq->wq)
2128                 claim_zero(ibv_destroy_wq(fdq->wq));
2129         if (fdq->cq)
2130                 claim_zero(ibv_destroy_cq(fdq->cq));
2131         if (fdq)
2132                 rte_free(fdq);
2133         priv->flow_drop_queue = NULL;
2134         return -1;
2135 }
2136
2137 /**
2138  * Delete drop queue.
2139  *
2140  * @param priv
2141  *   Pointer to private structure.
2142  */
2143 void
2144 priv_flow_delete_drop_queue(struct priv *priv)
2145 {
2146         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2147
2148         if (!fdq)
2149                 return;
2150         if (fdq->qp)
2151                 claim_zero(ibv_destroy_qp(fdq->qp));
2152         if (fdq->ind_table)
2153                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2154         if (fdq->wq)
2155                 claim_zero(ibv_destroy_wq(fdq->wq));
2156         if (fdq->cq)
2157                 claim_zero(ibv_destroy_cq(fdq->cq));
2158         rte_free(fdq);
2159         priv->flow_drop_queue = NULL;
2160 }
2161
2162 /**
2163  * Remove all flows.
2164  *
2165  * @param priv
2166  *   Pointer to private structure.
2167  * @param list
2168  *   Pointer to a TAILQ flow list.
2169  */
2170 void
2171 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2172 {
2173         struct rte_flow *flow;
2174
2175         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2176                 unsigned int i;
2177                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2178
2179                 if (flow->drop) {
2180                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2181                                 continue;
2182                         claim_zero(ibv_destroy_flow
2183                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2184                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2185                         DEBUG("Flow %p removed", (void *)flow);
2186                         /* Next flow. */
2187                         continue;
2188                 }
2189                 /* Verify the flow has not already been cleaned. */
2190                 for (i = 0; i != hash_rxq_init_n; ++i) {
2191                         if (!flow->frxq[i].ibv_flow)
2192                                 continue;
2193                         /*
2194                          * Indirection table may be necessary to remove the
2195                          * flags in the Rx queues.
2196                          * This helps to speed-up the process by avoiding
2197                          * another loop.
2198                          */
2199                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2200                         break;
2201                 }
2202                 if (i == hash_rxq_init_n)
2203                         return;
2204                 if (flow->mark) {
2205                         assert(ind_tbl);
2206                         for (i = 0; i != ind_tbl->queues_n; ++i)
2207                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2208                 }
2209                 for (i = 0; i != hash_rxq_init_n; ++i) {
2210                         if (!flow->frxq[i].ibv_flow)
2211                                 continue;
2212                         claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2213                         flow->frxq[i].ibv_flow = NULL;
2214                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2215                         flow->frxq[i].hrxq = NULL;
2216                 }
2217                 DEBUG("Flow %p removed", (void *)flow);
2218         }
2219 }
2220
2221 /**
2222  * Add all flows.
2223  *
2224  * @param priv
2225  *   Pointer to private structure.
2226  * @param list
2227  *   Pointer to a TAILQ flow list.
2228  *
2229  * @return
2230  *   0 on success, a errno value otherwise and rte_errno is set.
2231  */
2232 int
2233 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2234 {
2235         struct rte_flow *flow;
2236
2237         TAILQ_FOREACH(flow, list, next) {
2238                 unsigned int i;
2239
2240                 if (flow->drop) {
2241                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2242                                 ibv_create_flow
2243                                 (priv->flow_drop_queue->qp,
2244                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2245                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2246                                 DEBUG("Flow %p cannot be applied",
2247                                       (void *)flow);
2248                                 rte_errno = EINVAL;
2249                                 return rte_errno;
2250                         }
2251                         DEBUG("Flow %p applied", (void *)flow);
2252                         /* Next flow. */
2253                         continue;
2254                 }
2255                 for (i = 0; i != hash_rxq_init_n; ++i) {
2256                         if (!flow->frxq[i].ibv_attr)
2257                                 continue;
2258                         flow->frxq[i].hrxq =
2259                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2260                                                    flow->rss_conf.rss_key_len,
2261                                                    hash_rxq_init[i].hash_fields,
2262                                                    (*flow->queues),
2263                                                    flow->queues_n);
2264                         if (flow->frxq[i].hrxq)
2265                                 goto flow_create;
2266                         flow->frxq[i].hrxq =
2267                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2268                                                    flow->rss_conf.rss_key_len,
2269                                                    hash_rxq_init[i].hash_fields,
2270                                                    (*flow->queues),
2271                                                    flow->queues_n);
2272                         if (!flow->frxq[i].hrxq) {
2273                                 DEBUG("Flow %p cannot be applied",
2274                                       (void *)flow);
2275                                 rte_errno = EINVAL;
2276                                 return rte_errno;
2277                         }
2278 flow_create:
2279                         flow->frxq[i].ibv_flow =
2280                                 ibv_create_flow(flow->frxq[i].hrxq->qp,
2281                                                 flow->frxq[i].ibv_attr);
2282                         if (!flow->frxq[i].ibv_flow) {
2283                                 DEBUG("Flow %p cannot be applied",
2284                                       (void *)flow);
2285                                 rte_errno = EINVAL;
2286                                 return rte_errno;
2287                         }
2288                         DEBUG("Flow %p applied", (void *)flow);
2289                 }
2290                 if (!flow->mark)
2291                         continue;
2292                 for (i = 0; i != flow->queues_n; ++i)
2293                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2294         }
2295         return 0;
2296 }
2297
2298 /**
2299  * Verify the flow list is empty
2300  *
2301  * @param priv
2302  *  Pointer to private structure.
2303  *
2304  * @return the number of flows not released.
2305  */
2306 int
2307 priv_flow_verify(struct priv *priv)
2308 {
2309         struct rte_flow *flow;
2310         int ret = 0;
2311
2312         TAILQ_FOREACH(flow, &priv->flows, next) {
2313                 DEBUG("%p: flow %p still referenced", (void *)priv,
2314                       (void *)flow);
2315                 ++ret;
2316         }
2317         return ret;
2318 }
2319
2320 /**
2321  * Enable a control flow configured from the control plane.
2322  *
2323  * @param dev
2324  *   Pointer to Ethernet device.
2325  * @param eth_spec
2326  *   An Ethernet flow spec to apply.
2327  * @param eth_mask
2328  *   An Ethernet flow mask to apply.
2329  * @param vlan_spec
2330  *   A VLAN flow spec to apply.
2331  * @param vlan_mask
2332  *   A VLAN flow mask to apply.
2333  *
2334  * @return
2335  *   0 on success.
2336  */
2337 int
2338 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2339                     struct rte_flow_item_eth *eth_spec,
2340                     struct rte_flow_item_eth *eth_mask,
2341                     struct rte_flow_item_vlan *vlan_spec,
2342                     struct rte_flow_item_vlan *vlan_mask)
2343 {
2344         struct priv *priv = dev->data->dev_private;
2345         const struct rte_flow_attr attr = {
2346                 .ingress = 1,
2347                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2348         };
2349         struct rte_flow_item items[] = {
2350                 {
2351                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2352                         .spec = eth_spec,
2353                         .last = NULL,
2354                         .mask = eth_mask,
2355                 },
2356                 {
2357                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2358                                 RTE_FLOW_ITEM_TYPE_END,
2359                         .spec = vlan_spec,
2360                         .last = NULL,
2361                         .mask = vlan_mask,
2362                 },
2363                 {
2364                         .type = RTE_FLOW_ITEM_TYPE_END,
2365                 },
2366         };
2367         struct rte_flow_action actions[] = {
2368                 {
2369                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2370                 },
2371                 {
2372                         .type = RTE_FLOW_ACTION_TYPE_END,
2373                 },
2374         };
2375         struct rte_flow *flow;
2376         struct rte_flow_error error;
2377         unsigned int i;
2378         union {
2379                 struct rte_flow_action_rss rss;
2380                 struct {
2381                         const struct rte_eth_rss_conf *rss_conf;
2382                         uint16_t num;
2383                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2384                 } local;
2385         } action_rss;
2386
2387         if (!priv->reta_idx_n)
2388                 return EINVAL;
2389         for (i = 0; i != priv->reta_idx_n; ++i)
2390                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2391         action_rss.local.rss_conf = &priv->rss_conf;
2392         action_rss.local.num = priv->reta_idx_n;
2393         actions[0].conf = (const void *)&action_rss.rss;
2394         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2395                                 &error);
2396         if (!flow)
2397                 return rte_errno;
2398         return 0;
2399 }
2400
2401 /**
2402  * Enable a flow control configured from the control plane.
2403  *
2404  * @param dev
2405  *   Pointer to Ethernet device.
2406  * @param eth_spec
2407  *   An Ethernet flow spec to apply.
2408  * @param eth_mask
2409  *   An Ethernet flow mask to apply.
2410  *
2411  * @return
2412  *   0 on success.
2413  */
2414 int
2415 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2416                struct rte_flow_item_eth *eth_spec,
2417                struct rte_flow_item_eth *eth_mask)
2418 {
2419         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2420 }
2421
2422 /**
2423  * Destroy a flow.
2424  *
2425  * @see rte_flow_destroy()
2426  * @see rte_flow_ops
2427  */
2428 int
2429 mlx5_flow_destroy(struct rte_eth_dev *dev,
2430                   struct rte_flow *flow,
2431                   struct rte_flow_error *error)
2432 {
2433         struct priv *priv = dev->data->dev_private;
2434
2435         (void)error;
2436         priv_lock(priv);
2437         priv_flow_destroy(priv, &priv->flows, flow);
2438         priv_unlock(priv);
2439         return 0;
2440 }
2441
2442 /**
2443  * Destroy all flows.
2444  *
2445  * @see rte_flow_flush()
2446  * @see rte_flow_ops
2447  */
2448 int
2449 mlx5_flow_flush(struct rte_eth_dev *dev,
2450                 struct rte_flow_error *error)
2451 {
2452         struct priv *priv = dev->data->dev_private;
2453
2454         (void)error;
2455         priv_lock(priv);
2456         priv_flow_flush(priv, &priv->flows);
2457         priv_unlock(priv);
2458         return 0;
2459 }
2460
2461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2462 /**
2463  * Query flow counter.
2464  *
2465  * @param cs
2466  *   the counter set.
2467  * @param counter_value
2468  *   returned data from the counter.
2469  *
2470  * @return
2471  *   0 on success, a errno value otherwise and rte_errno is set.
2472  */
2473 static int
2474 priv_flow_query_count(struct ibv_counter_set *cs,
2475                       struct mlx5_flow_counter_stats *counter_stats,
2476                       struct rte_flow_query_count *query_count,
2477                       struct rte_flow_error *error)
2478 {
2479         uint64_t counters[2];
2480         struct ibv_query_counter_set_attr query_cs_attr = {
2481                 .cs = cs,
2482                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2483         };
2484         struct ibv_counter_set_data query_out = {
2485                 .out = counters,
2486                 .outlen = 2 * sizeof(uint64_t),
2487         };
2488         int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2489
2490         if (res) {
2491                 rte_flow_error_set(error, -res,
2492                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2493                                    NULL,
2494                                    "cannot read counter");
2495                 return -res;
2496         }
2497         query_count->hits_set = 1;
2498         query_count->bytes_set = 1;
2499         query_count->hits = counters[0] - counter_stats->hits;
2500         query_count->bytes = counters[1] - counter_stats->bytes;
2501         if (query_count->reset) {
2502                 counter_stats->hits = counters[0];
2503                 counter_stats->bytes = counters[1];
2504         }
2505         return 0;
2506 }
2507
2508 /**
2509  * Query a flows.
2510  *
2511  * @see rte_flow_query()
2512  * @see rte_flow_ops
2513  */
2514 int
2515 mlx5_flow_query(struct rte_eth_dev *dev,
2516                 struct rte_flow *flow,
2517                 enum rte_flow_action_type action __rte_unused,
2518                 void *data,
2519                 struct rte_flow_error *error)
2520 {
2521         struct priv *priv = dev->data->dev_private;
2522         int res = EINVAL;
2523
2524         priv_lock(priv);
2525         if (flow->cs) {
2526                 res = priv_flow_query_count(flow->cs,
2527                                         &flow->counter_stats,
2528                                         (struct rte_flow_query_count *)data,
2529                                         error);
2530         } else {
2531                 rte_flow_error_set(error, res,
2532                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2533                                    NULL,
2534                                    "no counter found for flow");
2535         }
2536         priv_unlock(priv);
2537         return -res;
2538 }
2539 #endif
2540
2541 /**
2542  * Isolated mode.
2543  *
2544  * @see rte_flow_isolate()
2545  * @see rte_flow_ops
2546  */
2547 int
2548 mlx5_flow_isolate(struct rte_eth_dev *dev,
2549                   int enable,
2550                   struct rte_flow_error *error)
2551 {
2552         struct priv *priv = dev->data->dev_private;
2553
2554         priv_lock(priv);
2555         if (dev->data->dev_started) {
2556                 rte_flow_error_set(error, EBUSY,
2557                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2558                                    NULL,
2559                                    "port must be stopped first");
2560                 priv_unlock(priv);
2561                 return -rte_errno;
2562         }
2563         priv->isolated = !!enable;
2564         if (enable)
2565                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2566         else
2567                 priv->dev->dev_ops = &mlx5_dev_ops;
2568         priv_unlock(priv);
2569         return 0;
2570 }
2571
2572 /**
2573  * Convert a flow director filter to a generic flow.
2574  *
2575  * @param priv
2576  *   Private structure.
2577  * @param fdir_filter
2578  *   Flow director filter to add.
2579  * @param attributes
2580  *   Generic flow parameters structure.
2581  *
2582  * @return
2583  *  0 on success, errno value on error.
2584  */
2585 static int
2586 priv_fdir_filter_convert(struct priv *priv,
2587                          const struct rte_eth_fdir_filter *fdir_filter,
2588                          struct mlx5_fdir *attributes)
2589 {
2590         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2591
2592         /* Validate queue number. */
2593         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2594                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2595                 return EINVAL;
2596         }
2597         attributes->attr.ingress = 1;
2598         attributes->items[0] = (struct rte_flow_item) {
2599                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2600                 .spec = &attributes->l2,
2601                 .mask = &attributes->l2_mask,
2602         };
2603         switch (fdir_filter->action.behavior) {
2604         case RTE_ETH_FDIR_ACCEPT:
2605                 attributes->actions[0] = (struct rte_flow_action){
2606                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2607                         .conf = &attributes->queue,
2608                 };
2609                 break;
2610         case RTE_ETH_FDIR_REJECT:
2611                 attributes->actions[0] = (struct rte_flow_action){
2612                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2613                 };
2614                 break;
2615         default:
2616                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2617                 return ENOTSUP;
2618         }
2619         attributes->queue.index = fdir_filter->action.rx_queue;
2620         switch (fdir_filter->input.flow_type) {
2621         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2622                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2623                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2624                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2625                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2626                         .type_of_service = input->flow.udp4_flow.ip.tos,
2627                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2628                 };
2629                 attributes->l4.udp.hdr = (struct udp_hdr){
2630                         .src_port = input->flow.udp4_flow.src_port,
2631                         .dst_port = input->flow.udp4_flow.dst_port,
2632                 };
2633                 attributes->items[1] = (struct rte_flow_item){
2634                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2635                         .spec = &attributes->l3,
2636                 };
2637                 attributes->items[2] = (struct rte_flow_item){
2638                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2639                         .spec = &attributes->l4,
2640                 };
2641                 break;
2642         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2643                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2644                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2645                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2646                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2647                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2648                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2649                 };
2650                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2651                         .src_port = input->flow.tcp4_flow.src_port,
2652                         .dst_port = input->flow.tcp4_flow.dst_port,
2653                 };
2654                 attributes->items[1] = (struct rte_flow_item){
2655                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2656                         .spec = &attributes->l3,
2657                 };
2658                 attributes->items[2] = (struct rte_flow_item){
2659                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2660                         .spec = &attributes->l4,
2661                 };
2662                 break;
2663         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2664                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2665                         .src_addr = input->flow.ip4_flow.src_ip,
2666                         .dst_addr = input->flow.ip4_flow.dst_ip,
2667                         .time_to_live = input->flow.ip4_flow.ttl,
2668                         .type_of_service = input->flow.ip4_flow.tos,
2669                         .next_proto_id = input->flow.ip4_flow.proto,
2670                 };
2671                 attributes->items[1] = (struct rte_flow_item){
2672                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2673                         .spec = &attributes->l3,
2674                 };
2675                 break;
2676         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2677                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2678                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2679                         .proto = input->flow.udp6_flow.ip.proto,
2680                 };
2681                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2682                        input->flow.udp6_flow.ip.src_ip,
2683                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2684                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2685                        input->flow.udp6_flow.ip.dst_ip,
2686                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2687                 attributes->l4.udp.hdr = (struct udp_hdr){
2688                         .src_port = input->flow.udp6_flow.src_port,
2689                         .dst_port = input->flow.udp6_flow.dst_port,
2690                 };
2691                 attributes->items[1] = (struct rte_flow_item){
2692                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2693                         .spec = &attributes->l3,
2694                 };
2695                 attributes->items[2] = (struct rte_flow_item){
2696                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2697                         .spec = &attributes->l4,
2698                 };
2699                 break;
2700         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2701                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2702                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2703                         .proto = input->flow.tcp6_flow.ip.proto,
2704                 };
2705                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2706                        input->flow.tcp6_flow.ip.src_ip,
2707                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2708                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2709                        input->flow.tcp6_flow.ip.dst_ip,
2710                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2711                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2712                         .src_port = input->flow.tcp6_flow.src_port,
2713                         .dst_port = input->flow.tcp6_flow.dst_port,
2714                 };
2715                 attributes->items[1] = (struct rte_flow_item){
2716                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2717                         .spec = &attributes->l3,
2718                 };
2719                 attributes->items[2] = (struct rte_flow_item){
2720                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2721                         .spec = &attributes->l4,
2722                 };
2723                 break;
2724         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2725                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2726                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2727                         .proto = input->flow.ipv6_flow.proto,
2728                 };
2729                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2730                        input->flow.ipv6_flow.src_ip,
2731                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2732                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2733                        input->flow.ipv6_flow.dst_ip,
2734                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2735                 attributes->items[1] = (struct rte_flow_item){
2736                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2737                         .spec = &attributes->l3,
2738                 };
2739                 break;
2740         default:
2741                 ERROR("invalid flow type%d",
2742                       fdir_filter->input.flow_type);
2743                 return ENOTSUP;
2744         }
2745         return 0;
2746 }
2747
2748 /**
2749  * Add new flow director filter and store it in list.
2750  *
2751  * @param priv
2752  *   Private structure.
2753  * @param fdir_filter
2754  *   Flow director filter to add.
2755  *
2756  * @return
2757  *   0 on success, errno value on failure.
2758  */
2759 static int
2760 priv_fdir_filter_add(struct priv *priv,
2761                      const struct rte_eth_fdir_filter *fdir_filter)
2762 {
2763         struct mlx5_fdir attributes = {
2764                 .attr.group = 0,
2765                 .l2_mask = {
2766                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2767                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2768                         .type = 0,
2769                 },
2770         };
2771         struct mlx5_flow_parse parser = {
2772                 .layer = HASH_RXQ_ETH,
2773         };
2774         struct rte_flow_error error;
2775         struct rte_flow *flow;
2776         int ret;
2777
2778         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2779         if (ret)
2780                 return -ret;
2781         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2782                                 attributes.actions, &error, &parser);
2783         if (ret)
2784                 return -ret;
2785         flow = priv_flow_create(priv,
2786                                 &priv->flows,
2787                                 &attributes.attr,
2788                                 attributes.items,
2789                                 attributes.actions,
2790                                 &error);
2791         if (flow) {
2792                 DEBUG("FDIR created %p", (void *)flow);
2793                 return 0;
2794         }
2795         return ENOTSUP;
2796 }
2797
2798 /**
2799  * Delete specific filter.
2800  *
2801  * @param priv
2802  *   Private structure.
2803  * @param fdir_filter
2804  *   Filter to be deleted.
2805  *
2806  * @return
2807  *   0 on success, errno value on failure.
2808  */
2809 static int
2810 priv_fdir_filter_delete(struct priv *priv,
2811                         const struct rte_eth_fdir_filter *fdir_filter)
2812 {
2813         struct mlx5_fdir attributes = {
2814                 .attr.group = 0,
2815         };
2816         struct mlx5_flow_parse parser = {
2817                 .create = 1,
2818                 .layer = HASH_RXQ_ETH,
2819         };
2820         struct rte_flow_error error;
2821         struct rte_flow *flow;
2822         unsigned int i;
2823         int ret;
2824
2825         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2826         if (ret)
2827                 return -ret;
2828         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2829                                 attributes.actions, &error, &parser);
2830         if (ret)
2831                 goto exit;
2832         /*
2833          * Special case for drop action which is only set in the
2834          * specifications when the flow is created.  In this situation the
2835          * drop specification is missing.
2836          */
2837         if (parser.drop) {
2838                 struct ibv_flow_spec_action_drop *drop;
2839
2840                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2841                                 parser.queue[HASH_RXQ_ETH].offset);
2842                 *drop = (struct ibv_flow_spec_action_drop){
2843                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2844                         .size = sizeof(struct ibv_flow_spec_action_drop),
2845                 };
2846                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2847         }
2848         TAILQ_FOREACH(flow, &priv->flows, next) {
2849                 struct ibv_flow_attr *attr;
2850                 struct ibv_spec_header *attr_h;
2851                 void *spec;
2852                 struct ibv_flow_attr *flow_attr;
2853                 struct ibv_spec_header *flow_h;
2854                 void *flow_spec;
2855                 unsigned int specs_n;
2856
2857                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2858                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2859                 /* Compare first the attributes. */
2860                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2861                         continue;
2862                 if (attr->num_of_specs == 0)
2863                         continue;
2864                 spec = (void *)((uintptr_t)attr +
2865                                 sizeof(struct ibv_flow_attr));
2866                 flow_spec = (void *)((uintptr_t)flow_attr +
2867                                      sizeof(struct ibv_flow_attr));
2868                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2869                 for (i = 0; i != specs_n; ++i) {
2870                         attr_h = spec;
2871                         flow_h = flow_spec;
2872                         if (memcmp(spec, flow_spec,
2873                                    RTE_MIN(attr_h->size, flow_h->size)))
2874                                 goto wrong_flow;
2875                         spec = (void *)((uintptr_t)spec + attr_h->size);
2876                         flow_spec = (void *)((uintptr_t)flow_spec +
2877                                              flow_h->size);
2878                 }
2879                 /* At this point, the flow match. */
2880                 break;
2881 wrong_flow:
2882                 /* The flow does not match. */
2883                 continue;
2884         }
2885         if (flow)
2886                 priv_flow_destroy(priv, &priv->flows, flow);
2887 exit:
2888         for (i = 0; i != hash_rxq_init_n; ++i) {
2889                 if (parser.queue[i].ibv_attr)
2890                         rte_free(parser.queue[i].ibv_attr);
2891         }
2892         return -ret;
2893 }
2894
2895 /**
2896  * Update queue for specific filter.
2897  *
2898  * @param priv
2899  *   Private structure.
2900  * @param fdir_filter
2901  *   Filter to be updated.
2902  *
2903  * @return
2904  *   0 on success, errno value on failure.
2905  */
2906 static int
2907 priv_fdir_filter_update(struct priv *priv,
2908                         const struct rte_eth_fdir_filter *fdir_filter)
2909 {
2910         int ret;
2911
2912         ret = priv_fdir_filter_delete(priv, fdir_filter);
2913         if (ret)
2914                 return ret;
2915         ret = priv_fdir_filter_add(priv, fdir_filter);
2916         return ret;
2917 }
2918
2919 /**
2920  * Flush all filters.
2921  *
2922  * @param priv
2923  *   Private structure.
2924  */
2925 static void
2926 priv_fdir_filter_flush(struct priv *priv)
2927 {
2928         priv_flow_flush(priv, &priv->flows);
2929 }
2930
2931 /**
2932  * Get flow director information.
2933  *
2934  * @param priv
2935  *   Private structure.
2936  * @param[out] fdir_info
2937  *   Resulting flow director information.
2938  */
2939 static void
2940 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2941 {
2942         struct rte_eth_fdir_masks *mask =
2943                 &priv->dev->data->dev_conf.fdir_conf.mask;
2944
2945         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2946         fdir_info->guarant_spc = 0;
2947         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2948         fdir_info->max_flexpayload = 0;
2949         fdir_info->flow_types_mask[0] = 0;
2950         fdir_info->flex_payload_unit = 0;
2951         fdir_info->max_flex_payload_segment_num = 0;
2952         fdir_info->flex_payload_limit = 0;
2953         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2954 }
2955
2956 /**
2957  * Deal with flow director operations.
2958  *
2959  * @param priv
2960  *   Pointer to private structure.
2961  * @param filter_op
2962  *   Operation to perform.
2963  * @param arg
2964  *   Pointer to operation-specific structure.
2965  *
2966  * @return
2967  *   0 on success, errno value on failure.
2968  */
2969 static int
2970 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2971 {
2972         enum rte_fdir_mode fdir_mode =
2973                 priv->dev->data->dev_conf.fdir_conf.mode;
2974         int ret = 0;
2975
2976         if (filter_op == RTE_ETH_FILTER_NOP)
2977                 return 0;
2978         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2979             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2980                 ERROR("%p: flow director mode %d not supported",
2981                       (void *)priv, fdir_mode);
2982                 return EINVAL;
2983         }
2984         switch (filter_op) {
2985         case RTE_ETH_FILTER_ADD:
2986                 ret = priv_fdir_filter_add(priv, arg);
2987                 break;
2988         case RTE_ETH_FILTER_UPDATE:
2989                 ret = priv_fdir_filter_update(priv, arg);
2990                 break;
2991         case RTE_ETH_FILTER_DELETE:
2992                 ret = priv_fdir_filter_delete(priv, arg);
2993                 break;
2994         case RTE_ETH_FILTER_FLUSH:
2995                 priv_fdir_filter_flush(priv);
2996                 break;
2997         case RTE_ETH_FILTER_INFO:
2998                 priv_fdir_info_get(priv, arg);
2999                 break;
3000         default:
3001                 DEBUG("%p: unknown operation %u", (void *)priv,
3002                       filter_op);
3003                 ret = EINVAL;
3004                 break;
3005         }
3006         return ret;
3007 }
3008
3009 /**
3010  * Manage filter operations.
3011  *
3012  * @param dev
3013  *   Pointer to Ethernet device structure.
3014  * @param filter_type
3015  *   Filter type.
3016  * @param filter_op
3017  *   Operation to perform.
3018  * @param arg
3019  *   Pointer to operation-specific structure.
3020  *
3021  * @return
3022  *   0 on success, negative errno value on failure.
3023  */
3024 int
3025 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3026                      enum rte_filter_type filter_type,
3027                      enum rte_filter_op filter_op,
3028                      void *arg)
3029 {
3030         int ret = EINVAL;
3031         struct priv *priv = dev->data->dev_private;
3032
3033         switch (filter_type) {
3034         case RTE_ETH_FILTER_GENERIC:
3035                 if (filter_op != RTE_ETH_FILTER_GET)
3036                         return -EINVAL;
3037                 *(const void **)arg = &mlx5_flow_ops;
3038                 return 0;
3039         case RTE_ETH_FILTER_FDIR:
3040                 priv_lock(priv);
3041                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3042                 priv_unlock(priv);
3043                 break;
3044         default:
3045                 ERROR("%p: filter type (%d) not supported",
3046                       (void *)dev, filter_type);
3047                 break;
3048         }
3049         return -ret;
3050 }