New upstream version 18.02
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                         },
317                 },
318                 .default_mask = &rte_flow_item_ipv4_mask,
319                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
320                 .convert = mlx5_flow_create_ipv4,
321                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
322         },
323         [RTE_FLOW_ITEM_TYPE_IPV6] = {
324                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
325                                RTE_FLOW_ITEM_TYPE_TCP),
326                 .actions = valid_actions,
327                 .mask = &(const struct rte_flow_item_ipv6){
328                         .hdr = {
329                                 .src_addr = {
330                                         0xff, 0xff, 0xff, 0xff,
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                 },
335                                 .dst_addr = {
336                                         0xff, 0xff, 0xff, 0xff,
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                 },
341                                 .vtc_flow = -1,
342                                 .proto = -1,
343                                 .hop_limits = -1,
344                         },
345                 },
346                 .default_mask = &rte_flow_item_ipv6_mask,
347                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
348                 .convert = mlx5_flow_create_ipv6,
349                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
350         },
351         [RTE_FLOW_ITEM_TYPE_UDP] = {
352                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
353                 .actions = valid_actions,
354                 .mask = &(const struct rte_flow_item_udp){
355                         .hdr = {
356                                 .src_port = -1,
357                                 .dst_port = -1,
358                         },
359                 },
360                 .default_mask = &rte_flow_item_udp_mask,
361                 .mask_sz = sizeof(struct rte_flow_item_udp),
362                 .convert = mlx5_flow_create_udp,
363                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
364         },
365         [RTE_FLOW_ITEM_TYPE_TCP] = {
366                 .actions = valid_actions,
367                 .mask = &(const struct rte_flow_item_tcp){
368                         .hdr = {
369                                 .src_port = -1,
370                                 .dst_port = -1,
371                         },
372                 },
373                 .default_mask = &rte_flow_item_tcp_mask,
374                 .mask_sz = sizeof(struct rte_flow_item_tcp),
375                 .convert = mlx5_flow_create_tcp,
376                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
377         },
378         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
379                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
380                 .actions = valid_actions,
381                 .mask = &(const struct rte_flow_item_vxlan){
382                         .vni = "\xff\xff\xff",
383                 },
384                 .default_mask = &rte_flow_item_vxlan_mask,
385                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
386                 .convert = mlx5_flow_create_vxlan,
387                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
388         },
389 };
390
391 /** Structure to pass to the conversion function. */
392 struct mlx5_flow_parse {
393         uint32_t inner; /**< Set once VXLAN is encountered. */
394         uint32_t create:1;
395         /**< Whether resources should remain after a validate. */
396         uint32_t drop:1; /**< Target is a drop queue. */
397         uint32_t mark:1; /**< Mark is present in the flow. */
398         uint32_t count:1; /**< Count is present in the flow. */
399         uint32_t mark_id; /**< Mark identifier. */
400         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
401         uint16_t queues_n; /**< Number of entries in queue[]. */
402         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
403         uint8_t rss_key[40]; /**< copy of the RSS key. */
404         enum hash_rxq_type layer; /**< Last pattern layer detected. */
405         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
406         struct {
407                 struct ibv_flow_attr *ibv_attr;
408                 /**< Pointer to Verbs attributes. */
409                 unsigned int offset;
410                 /**< Current position or total size of the attribute. */
411         } queue[RTE_DIM(hash_rxq_init)];
412 };
413
414 static const struct rte_flow_ops mlx5_flow_ops = {
415         .validate = mlx5_flow_validate,
416         .create = mlx5_flow_create,
417         .destroy = mlx5_flow_destroy,
418         .flush = mlx5_flow_flush,
419 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
420         .query = mlx5_flow_query,
421 #else
422         .query = NULL,
423 #endif
424         .isolate = mlx5_flow_isolate,
425 };
426
427 /* Convert FDIR request to Generic flow. */
428 struct mlx5_fdir {
429         struct rte_flow_attr attr;
430         struct rte_flow_action actions[2];
431         struct rte_flow_item items[4];
432         struct rte_flow_item_eth l2;
433         struct rte_flow_item_eth l2_mask;
434         union {
435                 struct rte_flow_item_ipv4 ipv4;
436                 struct rte_flow_item_ipv6 ipv6;
437         } l3;
438         union {
439                 struct rte_flow_item_udp udp;
440                 struct rte_flow_item_tcp tcp;
441         } l4;
442         struct rte_flow_action_queue queue;
443 };
444
445 /* Verbs specification header. */
446 struct ibv_spec_header {
447         enum ibv_flow_spec_type type;
448         uint16_t size;
449 };
450
451 /**
452  * Check support for a given item.
453  *
454  * @param item[in]
455  *   Item specification.
456  * @param mask[in]
457  *   Bit-masks covering supported fields to compare with spec, last and mask in
458  *   \item.
459  * @param size
460  *   Bit-Mask size in bytes.
461  *
462  * @return
463  *   0 on success.
464  */
465 static int
466 mlx5_flow_item_validate(const struct rte_flow_item *item,
467                         const uint8_t *mask, unsigned int size)
468 {
469         int ret = 0;
470
471         if (!item->spec && (item->mask || item->last))
472                 return -1;
473         if (item->spec && !item->mask) {
474                 unsigned int i;
475                 const uint8_t *spec = item->spec;
476
477                 for (i = 0; i < size; ++i)
478                         if ((spec[i] | mask[i]) != mask[i])
479                                 return -1;
480         }
481         if (item->last && !item->mask) {
482                 unsigned int i;
483                 const uint8_t *spec = item->last;
484
485                 for (i = 0; i < size; ++i)
486                         if ((spec[i] | mask[i]) != mask[i])
487                                 return -1;
488         }
489         if (item->mask) {
490                 unsigned int i;
491                 const uint8_t *spec = item->spec;
492
493                 for (i = 0; i < size; ++i)
494                         if ((spec[i] | mask[i]) != mask[i])
495                                 return -1;
496         }
497         if (item->spec && item->last) {
498                 uint8_t spec[size];
499                 uint8_t last[size];
500                 const uint8_t *apply = mask;
501                 unsigned int i;
502
503                 if (item->mask)
504                         apply = item->mask;
505                 for (i = 0; i < size; ++i) {
506                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
507                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
508                 }
509                 ret = memcmp(spec, last, size);
510         }
511         return ret;
512 }
513
514 /**
515  * Copy the RSS configuration from the user ones, of the rss_conf is null,
516  * uses the driver one.
517  *
518  * @param priv
519  *   Pointer to private structure.
520  * @param parser
521  *   Internal parser structure.
522  * @param rss_conf
523  *   User RSS configuration to save.
524  *
525  * @return
526  *   0 on success, errno value on failure.
527  */
528 static int
529 priv_flow_convert_rss_conf(struct priv *priv,
530                            struct mlx5_flow_parse *parser,
531                            const struct rte_eth_rss_conf *rss_conf)
532 {
533         /*
534          * This function is also called at the beginning of
535          * priv_flow_convert_actions() to initialize the parser with the
536          * device default RSS configuration.
537          */
538         (void)priv;
539         if (rss_conf) {
540                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
541                         return EINVAL;
542                 if (rss_conf->rss_key_len != 40)
543                         return EINVAL;
544                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
545                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
546                         memcpy(parser->rss_key, rss_conf->rss_key,
547                                rss_conf->rss_key_len);
548                         parser->rss_conf.rss_key = parser->rss_key;
549                 }
550                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
551         }
552         return 0;
553 }
554
555 /**
556  * Extract attribute to the parser.
557  *
558  * @param priv
559  *   Pointer to private structure.
560  * @param[in] attr
561  *   Flow rule attributes.
562  * @param[out] error
563  *   Perform verbose error reporting if not NULL.
564  * @param[in, out] parser
565  *   Internal parser structure.
566  *
567  * @return
568  *   0 on success, a negative errno value otherwise and rte_errno is set.
569  */
570 static int
571 priv_flow_convert_attributes(struct priv *priv,
572                              const struct rte_flow_attr *attr,
573                              struct rte_flow_error *error,
574                              struct mlx5_flow_parse *parser)
575 {
576         (void)priv;
577         (void)parser;
578         if (attr->group) {
579                 rte_flow_error_set(error, ENOTSUP,
580                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
581                                    NULL,
582                                    "groups are not supported");
583                 return -rte_errno;
584         }
585         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
586                 rte_flow_error_set(error, ENOTSUP,
587                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
588                                    NULL,
589                                    "priorities are not supported");
590                 return -rte_errno;
591         }
592         if (attr->egress) {
593                 rte_flow_error_set(error, ENOTSUP,
594                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
595                                    NULL,
596                                    "egress is not supported");
597                 return -rte_errno;
598         }
599         if (!attr->ingress) {
600                 rte_flow_error_set(error, ENOTSUP,
601                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
602                                    NULL,
603                                    "only ingress is supported");
604                 return -rte_errno;
605         }
606         return 0;
607 }
608
609 /**
610  * Extract actions request to the parser.
611  *
612  * @param priv
613  *   Pointer to private structure.
614  * @param[in] actions
615  *   Associated actions (list terminated by the END action).
616  * @param[out] error
617  *   Perform verbose error reporting if not NULL.
618  * @param[in, out] parser
619  *   Internal parser structure.
620  *
621  * @return
622  *   0 on success, a negative errno value otherwise and rte_errno is set.
623  */
624 static int
625 priv_flow_convert_actions(struct priv *priv,
626                           const struct rte_flow_action actions[],
627                           struct rte_flow_error *error,
628                           struct mlx5_flow_parse *parser)
629 {
630         /*
631          * Add default RSS configuration necessary for Verbs to create QP even
632          * if no RSS is necessary.
633          */
634         priv_flow_convert_rss_conf(priv, parser,
635                                    (const struct rte_eth_rss_conf *)
636                                    &priv->rss_conf);
637         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
638                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
639                         continue;
640                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
641                         parser->drop = 1;
642                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
643                         const struct rte_flow_action_queue *queue =
644                                 (const struct rte_flow_action_queue *)
645                                 actions->conf;
646                         uint16_t n;
647                         uint16_t found = 0;
648
649                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
650                                 goto exit_action_not_supported;
651                         for (n = 0; n < parser->queues_n; ++n) {
652                                 if (parser->queues[n] == queue->index) {
653                                         found = 1;
654                                         break;
655                                 }
656                         }
657                         if (parser->queues_n > 1 && !found) {
658                                 rte_flow_error_set(error, ENOTSUP,
659                                            RTE_FLOW_ERROR_TYPE_ACTION,
660                                            actions,
661                                            "queue action not in RSS queues");
662                                 return -rte_errno;
663                         }
664                         if (!found) {
665                                 parser->queues_n = 1;
666                                 parser->queues[0] = queue->index;
667                         }
668                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
669                         const struct rte_flow_action_rss *rss =
670                                 (const struct rte_flow_action_rss *)
671                                 actions->conf;
672                         uint16_t n;
673
674                         if (!rss || !rss->num) {
675                                 rte_flow_error_set(error, EINVAL,
676                                                    RTE_FLOW_ERROR_TYPE_ACTION,
677                                                    actions,
678                                                    "no valid queues");
679                                 return -rte_errno;
680                         }
681                         if (parser->queues_n == 1) {
682                                 uint16_t found = 0;
683
684                                 assert(parser->queues_n);
685                                 for (n = 0; n < rss->num; ++n) {
686                                         if (parser->queues[0] ==
687                                             rss->queue[n]) {
688                                                 found = 1;
689                                                 break;
690                                         }
691                                 }
692                                 if (!found) {
693                                         rte_flow_error_set(error, ENOTSUP,
694                                                    RTE_FLOW_ERROR_TYPE_ACTION,
695                                                    actions,
696                                                    "queue action not in RSS"
697                                                    " queues");
698                                         return -rte_errno;
699                                 }
700                         }
701                         for (n = 0; n < rss->num; ++n) {
702                                 if (rss->queue[n] >= priv->rxqs_n) {
703                                         rte_flow_error_set(error, EINVAL,
704                                                    RTE_FLOW_ERROR_TYPE_ACTION,
705                                                    actions,
706                                                    "queue id > number of"
707                                                    " queues");
708                                         return -rte_errno;
709                                 }
710                         }
711                         for (n = 0; n < rss->num; ++n)
712                                 parser->queues[n] = rss->queue[n];
713                         parser->queues_n = rss->num;
714                         if (priv_flow_convert_rss_conf(priv, parser,
715                                                        rss->rss_conf)) {
716                                 rte_flow_error_set(error, EINVAL,
717                                                    RTE_FLOW_ERROR_TYPE_ACTION,
718                                                    actions,
719                                                    "wrong RSS configuration");
720                                 return -rte_errno;
721                         }
722                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
723                         const struct rte_flow_action_mark *mark =
724                                 (const struct rte_flow_action_mark *)
725                                 actions->conf;
726
727                         if (!mark) {
728                                 rte_flow_error_set(error, EINVAL,
729                                                    RTE_FLOW_ERROR_TYPE_ACTION,
730                                                    actions,
731                                                    "mark must be defined");
732                                 return -rte_errno;
733                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
734                                 rte_flow_error_set(error, ENOTSUP,
735                                                    RTE_FLOW_ERROR_TYPE_ACTION,
736                                                    actions,
737                                                    "mark must be between 0"
738                                                    " and 16777199");
739                                 return -rte_errno;
740                         }
741                         parser->mark = 1;
742                         parser->mark_id = mark->id;
743                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
744                         parser->mark = 1;
745                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
746                            priv->config.flow_counter_en) {
747                         parser->count = 1;
748                 } else {
749                         goto exit_action_not_supported;
750                 }
751         }
752         if (parser->drop && parser->mark)
753                 parser->mark = 0;
754         if (!parser->queues_n && !parser->drop) {
755                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
756                                    NULL, "no valid action");
757                 return -rte_errno;
758         }
759         return 0;
760 exit_action_not_supported:
761         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
762                            actions, "action not supported");
763         return -rte_errno;
764 }
765
766 /**
767  * Validate items.
768  *
769  * @param priv
770  *   Pointer to private structure.
771  * @param[in] items
772  *   Pattern specification (list terminated by the END pattern item).
773  * @param[out] error
774  *   Perform verbose error reporting if not NULL.
775  * @param[in, out] parser
776  *   Internal parser structure.
777  *
778  * @return
779  *   0 on success, a negative errno value otherwise and rte_errno is set.
780  */
781 static int
782 priv_flow_convert_items_validate(struct priv *priv,
783                                  const struct rte_flow_item items[],
784                                  struct rte_flow_error *error,
785                                  struct mlx5_flow_parse *parser)
786 {
787         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
788         unsigned int i;
789
790         (void)priv;
791         /* Initialise the offsets to start after verbs attribute. */
792         for (i = 0; i != hash_rxq_init_n; ++i)
793                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
794         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
795                 const struct mlx5_flow_items *token = NULL;
796                 unsigned int n;
797                 int err;
798
799                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
800                         continue;
801                 for (i = 0;
802                      cur_item->items &&
803                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
804                      ++i) {
805                         if (cur_item->items[i] == items->type) {
806                                 token = &mlx5_flow_items[items->type];
807                                 break;
808                         }
809                 }
810                 if (!token)
811                         goto exit_item_not_supported;
812                 cur_item = token;
813                 err = mlx5_flow_item_validate(items,
814                                               (const uint8_t *)cur_item->mask,
815                                               cur_item->mask_sz);
816                 if (err)
817                         goto exit_item_not_supported;
818                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
819                         if (parser->inner) {
820                                 rte_flow_error_set(error, ENOTSUP,
821                                                    RTE_FLOW_ERROR_TYPE_ITEM,
822                                                    items,
823                                                    "cannot recognize multiple"
824                                                    " VXLAN encapsulations");
825                                 return -rte_errno;
826                         }
827                         parser->inner = IBV_FLOW_SPEC_INNER;
828                 }
829                 if (parser->drop) {
830                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
831                 } else {
832                         for (n = 0; n != hash_rxq_init_n; ++n)
833                                 parser->queue[n].offset += cur_item->dst_sz;
834                 }
835         }
836         if (parser->drop) {
837                 parser->queue[HASH_RXQ_ETH].offset +=
838                         sizeof(struct ibv_flow_spec_action_drop);
839         }
840         if (parser->mark) {
841                 for (i = 0; i != hash_rxq_init_n; ++i)
842                         parser->queue[i].offset +=
843                                 sizeof(struct ibv_flow_spec_action_tag);
844         }
845         if (parser->count) {
846                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
847
848                 for (i = 0; i != hash_rxq_init_n; ++i)
849                         parser->queue[i].offset += size;
850         }
851         return 0;
852 exit_item_not_supported:
853         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
854                            items, "item not supported");
855         return -rte_errno;
856 }
857
858 /**
859  * Allocate memory space to store verbs flow attributes.
860  *
861  * @param priv
862  *   Pointer to private structure.
863  * @param[in] priority
864  *   Flow priority.
865  * @param[in] size
866  *   Amount of byte to allocate.
867  * @param[out] error
868  *   Perform verbose error reporting if not NULL.
869  *
870  * @return
871  *   A verbs flow attribute on success, NULL otherwise.
872  */
873 static struct ibv_flow_attr*
874 priv_flow_convert_allocate(struct priv *priv,
875                            unsigned int priority,
876                            unsigned int size,
877                            struct rte_flow_error *error)
878 {
879         struct ibv_flow_attr *ibv_attr;
880
881         (void)priv;
882         ibv_attr = rte_calloc(__func__, 1, size, 0);
883         if (!ibv_attr) {
884                 rte_flow_error_set(error, ENOMEM,
885                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
886                                    NULL,
887                                    "cannot allocate verbs spec attributes.");
888                 return NULL;
889         }
890         ibv_attr->priority = priority;
891         return ibv_attr;
892 }
893
894 /**
895  * Finalise verbs flow attributes.
896  *
897  * @param priv
898  *   Pointer to private structure.
899  * @param[in, out] parser
900  *   Internal parser structure.
901  */
902 static void
903 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
904 {
905         const unsigned int ipv4 =
906                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
907         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
908         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
909         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
910         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
911         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
912         unsigned int i;
913
914         (void)priv;
915         if (parser->layer == HASH_RXQ_ETH) {
916                 goto fill;
917         } else {
918                 /*
919                  * This layer becomes useless as the pattern define under
920                  * layers.
921                  */
922                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
923                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
924         }
925         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
926         for (i = ohmin; i != (ohmax + 1); ++i) {
927                 if (!parser->queue[i].ibv_attr)
928                         continue;
929                 rte_free(parser->queue[i].ibv_attr);
930                 parser->queue[i].ibv_attr = NULL;
931         }
932         /* Remove impossible flow according to the RSS configuration. */
933         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
934             parser->rss_conf.rss_hf) {
935                 /* Remove any other flow. */
936                 for (i = hmin; i != (hmax + 1); ++i) {
937                         if ((i == parser->layer) ||
938                              (!parser->queue[i].ibv_attr))
939                                 continue;
940                         rte_free(parser->queue[i].ibv_attr);
941                         parser->queue[i].ibv_attr = NULL;
942                 }
943         } else  if (!parser->queue[ip].ibv_attr) {
944                 /* no RSS possible with the current configuration. */
945                 parser->queues_n = 1;
946                 return;
947         }
948 fill:
949         /*
950          * Fill missing layers in verbs specifications, or compute the correct
951          * offset to allocate the memory space for the attributes and
952          * specifications.
953          */
954         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
955                 union {
956                         struct ibv_flow_spec_ipv4_ext ipv4;
957                         struct ibv_flow_spec_ipv6 ipv6;
958                         struct ibv_flow_spec_tcp_udp udp_tcp;
959                 } specs;
960                 void *dst;
961                 uint16_t size;
962
963                 if (i == parser->layer)
964                         continue;
965                 if (parser->layer == HASH_RXQ_ETH) {
966                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
967                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
968                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
969                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
970                                         .size = size,
971                                 };
972                         } else {
973                                 size = sizeof(struct ibv_flow_spec_ipv6);
974                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
975                                         .type = IBV_FLOW_SPEC_IPV6,
976                                         .size = size,
977                                 };
978                         }
979                         if (parser->queue[i].ibv_attr) {
980                                 dst = (void *)((uintptr_t)
981                                                parser->queue[i].ibv_attr +
982                                                parser->queue[i].offset);
983                                 memcpy(dst, &specs, size);
984                                 ++parser->queue[i].ibv_attr->num_of_specs;
985                         }
986                         parser->queue[i].offset += size;
987                 }
988                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
989                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
990                         size = sizeof(struct ibv_flow_spec_tcp_udp);
991                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
992                                 .type = ((i == HASH_RXQ_UDPV4 ||
993                                           i == HASH_RXQ_UDPV6) ?
994                                          IBV_FLOW_SPEC_UDP :
995                                          IBV_FLOW_SPEC_TCP),
996                                 .size = size,
997                         };
998                         if (parser->queue[i].ibv_attr) {
999                                 dst = (void *)((uintptr_t)
1000                                                parser->queue[i].ibv_attr +
1001                                                parser->queue[i].offset);
1002                                 memcpy(dst, &specs, size);
1003                                 ++parser->queue[i].ibv_attr->num_of_specs;
1004                         }
1005                         parser->queue[i].offset += size;
1006                 }
1007         }
1008 }
1009
1010 /**
1011  * Validate and convert a flow supported by the NIC.
1012  *
1013  * @param priv
1014  *   Pointer to private structure.
1015  * @param[in] attr
1016  *   Flow rule attributes.
1017  * @param[in] pattern
1018  *   Pattern specification (list terminated by the END pattern item).
1019  * @param[in] actions
1020  *   Associated actions (list terminated by the END action).
1021  * @param[out] error
1022  *   Perform verbose error reporting if not NULL.
1023  * @param[in, out] parser
1024  *   Internal parser structure.
1025  *
1026  * @return
1027  *   0 on success, a negative errno value otherwise and rte_errno is set.
1028  */
1029 static int
1030 priv_flow_convert(struct priv *priv,
1031                   const struct rte_flow_attr *attr,
1032                   const struct rte_flow_item items[],
1033                   const struct rte_flow_action actions[],
1034                   struct rte_flow_error *error,
1035                   struct mlx5_flow_parse *parser)
1036 {
1037         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1038         unsigned int i;
1039         int ret;
1040
1041         /* First step. Validate the attributes, items and actions. */
1042         *parser = (struct mlx5_flow_parse){
1043                 .create = parser->create,
1044                 .layer = HASH_RXQ_ETH,
1045                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1046         };
1047         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1048         if (ret)
1049                 return ret;
1050         ret = priv_flow_convert_actions(priv, actions, error, parser);
1051         if (ret)
1052                 return ret;
1053         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1054         if (ret)
1055                 return ret;
1056         priv_flow_convert_finalise(priv, parser);
1057         /*
1058          * Second step.
1059          * Allocate the memory space to store verbs specifications.
1060          */
1061         if (parser->drop) {
1062                 unsigned int priority =
1063                         attr->priority +
1064                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1066
1067                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1068                         priv_flow_convert_allocate(priv, priority,
1069                                                    offset, error);
1070                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1071                         return ENOMEM;
1072                 parser->queue[HASH_RXQ_ETH].offset =
1073                         sizeof(struct ibv_flow_attr);
1074         } else {
1075                 for (i = 0; i != hash_rxq_init_n; ++i) {
1076                         unsigned int priority =
1077                                 attr->priority +
1078                                 hash_rxq_init[i].flow_priority;
1079                         unsigned int offset;
1080
1081                         if (!(parser->rss_conf.rss_hf &
1082                               hash_rxq_init[i].dpdk_rss_hf) &&
1083                             (i != HASH_RXQ_ETH))
1084                                 continue;
1085                         offset = parser->queue[i].offset;
1086                         parser->queue[i].ibv_attr =
1087                                 priv_flow_convert_allocate(priv, priority,
1088                                                            offset, error);
1089                         if (!parser->queue[i].ibv_attr)
1090                                 goto exit_enomem;
1091                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1092                 }
1093         }
1094         /* Third step. Conversion parse, fill the specifications. */
1095         parser->inner = 0;
1096         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1097                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1098                         continue;
1099                 cur_item = &mlx5_flow_items[items->type];
1100                 ret = cur_item->convert(items,
1101                                         (cur_item->default_mask ?
1102                                          cur_item->default_mask :
1103                                          cur_item->mask),
1104                                         parser);
1105                 if (ret) {
1106                         rte_flow_error_set(error, ret,
1107                                            RTE_FLOW_ERROR_TYPE_ITEM,
1108                                            items, "item not supported");
1109                         goto exit_free;
1110                 }
1111         }
1112         if (parser->mark)
1113                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1114         if (parser->count && parser->create) {
1115                 mlx5_flow_create_count(priv, parser);
1116                 if (!parser->cs)
1117                         goto exit_count_error;
1118         }
1119         /*
1120          * Last step. Complete missing specification to reach the RSS
1121          * configuration.
1122          */
1123         if (!parser->drop) {
1124                 priv_flow_convert_finalise(priv, parser);
1125         } else {
1126                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1127                         attr->priority +
1128                         hash_rxq_init[parser->layer].flow_priority;
1129         }
1130 exit_free:
1131         /* Only verification is expected, all resources should be released. */
1132         if (!parser->create) {
1133                 for (i = 0; i != hash_rxq_init_n; ++i) {
1134                         if (parser->queue[i].ibv_attr) {
1135                                 rte_free(parser->queue[i].ibv_attr);
1136                                 parser->queue[i].ibv_attr = NULL;
1137                         }
1138                 }
1139         }
1140         return ret;
1141 exit_enomem:
1142         for (i = 0; i != hash_rxq_init_n; ++i) {
1143                 if (parser->queue[i].ibv_attr) {
1144                         rte_free(parser->queue[i].ibv_attr);
1145                         parser->queue[i].ibv_attr = NULL;
1146                 }
1147         }
1148         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1149                            NULL, "cannot allocate verbs spec attributes.");
1150         return ret;
1151 exit_count_error:
1152         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1153                            NULL, "cannot create counter.");
1154         return rte_errno;
1155 }
1156
1157 /**
1158  * Copy the specification created into the flow.
1159  *
1160  * @param parser
1161  *   Internal parser structure.
1162  * @param src
1163  *   Create specification.
1164  * @param size
1165  *   Size in bytes of the specification to copy.
1166  */
1167 static void
1168 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1169                       unsigned int size)
1170 {
1171         unsigned int i;
1172         void *dst;
1173
1174         for (i = 0; i != hash_rxq_init_n; ++i) {
1175                 if (!parser->queue[i].ibv_attr)
1176                         continue;
1177                 /* Specification must be the same l3 type or none. */
1178                 if (parser->layer == HASH_RXQ_ETH ||
1179                     (hash_rxq_init[parser->layer].ip_version ==
1180                      hash_rxq_init[i].ip_version) ||
1181                     (hash_rxq_init[i].ip_version == 0)) {
1182                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1183                                         parser->queue[i].offset);
1184                         memcpy(dst, src, size);
1185                         ++parser->queue[i].ibv_attr->num_of_specs;
1186                         parser->queue[i].offset += size;
1187                 }
1188         }
1189 }
1190
1191 /**
1192  * Convert Ethernet item to Verbs specification.
1193  *
1194  * @param item[in]
1195  *   Item specification.
1196  * @param default_mask[in]
1197  *   Default bit-masks to use when item->mask is not provided.
1198  * @param data[in, out]
1199  *   User structure.
1200  */
1201 static int
1202 mlx5_flow_create_eth(const struct rte_flow_item *item,
1203                      const void *default_mask,
1204                      void *data)
1205 {
1206         const struct rte_flow_item_eth *spec = item->spec;
1207         const struct rte_flow_item_eth *mask = item->mask;
1208         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1209         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1210         struct ibv_flow_spec_eth eth = {
1211                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1212                 .size = eth_size,
1213         };
1214
1215         /* Don't update layer for the inner pattern. */
1216         if (!parser->inner)
1217                 parser->layer = HASH_RXQ_ETH;
1218         if (spec) {
1219                 unsigned int i;
1220
1221                 if (!mask)
1222                         mask = default_mask;
1223                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1224                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1225                 eth.val.ether_type = spec->type;
1226                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1227                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1228                 eth.mask.ether_type = mask->type;
1229                 /* Remove unwanted bits from values. */
1230                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1231                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1232                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1233                 }
1234                 eth.val.ether_type &= eth.mask.ether_type;
1235         }
1236         mlx5_flow_create_copy(parser, &eth, eth_size);
1237         return 0;
1238 }
1239
1240 /**
1241  * Convert VLAN item to Verbs specification.
1242  *
1243  * @param item[in]
1244  *   Item specification.
1245  * @param default_mask[in]
1246  *   Default bit-masks to use when item->mask is not provided.
1247  * @param data[in, out]
1248  *   User structure.
1249  */
1250 static int
1251 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1252                       const void *default_mask,
1253                       void *data)
1254 {
1255         const struct rte_flow_item_vlan *spec = item->spec;
1256         const struct rte_flow_item_vlan *mask = item->mask;
1257         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1258         struct ibv_flow_spec_eth *eth;
1259         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1260
1261         if (spec) {
1262                 unsigned int i;
1263                 if (!mask)
1264                         mask = default_mask;
1265
1266                 for (i = 0; i != hash_rxq_init_n; ++i) {
1267                         if (!parser->queue[i].ibv_attr)
1268                                 continue;
1269
1270                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1271                                        parser->queue[i].offset - eth_size);
1272                         eth->val.vlan_tag = spec->tci;
1273                         eth->mask.vlan_tag = mask->tci;
1274                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1275                 }
1276         }
1277         return 0;
1278 }
1279
1280 /**
1281  * Convert IPv4 item to Verbs specification.
1282  *
1283  * @param item[in]
1284  *   Item specification.
1285  * @param default_mask[in]
1286  *   Default bit-masks to use when item->mask is not provided.
1287  * @param data[in, out]
1288  *   User structure.
1289  */
1290 static int
1291 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1292                       const void *default_mask,
1293                       void *data)
1294 {
1295         const struct rte_flow_item_ipv4 *spec = item->spec;
1296         const struct rte_flow_item_ipv4 *mask = item->mask;
1297         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1298         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1299         struct ibv_flow_spec_ipv4_ext ipv4 = {
1300                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1301                 .size = ipv4_size,
1302         };
1303
1304         /* Don't update layer for the inner pattern. */
1305         if (!parser->inner)
1306                 parser->layer = HASH_RXQ_IPV4;
1307         if (spec) {
1308                 if (!mask)
1309                         mask = default_mask;
1310                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1311                         .src_ip = spec->hdr.src_addr,
1312                         .dst_ip = spec->hdr.dst_addr,
1313                         .proto = spec->hdr.next_proto_id,
1314                         .tos = spec->hdr.type_of_service,
1315                 };
1316                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1317                         .src_ip = mask->hdr.src_addr,
1318                         .dst_ip = mask->hdr.dst_addr,
1319                         .proto = mask->hdr.next_proto_id,
1320                         .tos = mask->hdr.type_of_service,
1321                 };
1322                 /* Remove unwanted bits from values. */
1323                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1324                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1325                 ipv4.val.proto &= ipv4.mask.proto;
1326                 ipv4.val.tos &= ipv4.mask.tos;
1327         }
1328         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1329         return 0;
1330 }
1331
1332 /**
1333  * Convert IPv6 item to Verbs specification.
1334  *
1335  * @param item[in]
1336  *   Item specification.
1337  * @param default_mask[in]
1338  *   Default bit-masks to use when item->mask is not provided.
1339  * @param data[in, out]
1340  *   User structure.
1341  */
1342 static int
1343 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1344                       const void *default_mask,
1345                       void *data)
1346 {
1347         const struct rte_flow_item_ipv6 *spec = item->spec;
1348         const struct rte_flow_item_ipv6 *mask = item->mask;
1349         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1350         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1351         struct ibv_flow_spec_ipv6 ipv6 = {
1352                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1353                 .size = ipv6_size,
1354         };
1355
1356         /* Don't update layer for the inner pattern. */
1357         if (!parser->inner)
1358                 parser->layer = HASH_RXQ_IPV6;
1359         if (spec) {
1360                 unsigned int i;
1361                 uint32_t vtc_flow_val;
1362                 uint32_t vtc_flow_mask;
1363
1364                 if (!mask)
1365                         mask = default_mask;
1366                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1367                        RTE_DIM(ipv6.val.src_ip));
1368                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1369                        RTE_DIM(ipv6.val.dst_ip));
1370                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1371                        RTE_DIM(ipv6.mask.src_ip));
1372                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1373                        RTE_DIM(ipv6.mask.dst_ip));
1374                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1375                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1376                 ipv6.val.flow_label =
1377                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1378                                          IPV6_HDR_FL_SHIFT);
1379                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1380                                          IPV6_HDR_TC_SHIFT;
1381                 ipv6.val.next_hdr = spec->hdr.proto;
1382                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1383                 ipv6.mask.flow_label =
1384                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1385                                          IPV6_HDR_FL_SHIFT);
1386                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1387                                           IPV6_HDR_TC_SHIFT;
1388                 ipv6.mask.next_hdr = mask->hdr.proto;
1389                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1390                 /* Remove unwanted bits from values. */
1391                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1392                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1393                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1394                 }
1395                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1396                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1397                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1398                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1399         }
1400         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1401         return 0;
1402 }
1403
1404 /**
1405  * Convert UDP item to Verbs specification.
1406  *
1407  * @param item[in]
1408  *   Item specification.
1409  * @param default_mask[in]
1410  *   Default bit-masks to use when item->mask is not provided.
1411  * @param data[in, out]
1412  *   User structure.
1413  */
1414 static int
1415 mlx5_flow_create_udp(const struct rte_flow_item *item,
1416                      const void *default_mask,
1417                      void *data)
1418 {
1419         const struct rte_flow_item_udp *spec = item->spec;
1420         const struct rte_flow_item_udp *mask = item->mask;
1421         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1422         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1423         struct ibv_flow_spec_tcp_udp udp = {
1424                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1425                 .size = udp_size,
1426         };
1427
1428         /* Don't update layer for the inner pattern. */
1429         if (!parser->inner) {
1430                 if (parser->layer == HASH_RXQ_IPV4)
1431                         parser->layer = HASH_RXQ_UDPV4;
1432                 else
1433                         parser->layer = HASH_RXQ_UDPV6;
1434         }
1435         if (spec) {
1436                 if (!mask)
1437                         mask = default_mask;
1438                 udp.val.dst_port = spec->hdr.dst_port;
1439                 udp.val.src_port = spec->hdr.src_port;
1440                 udp.mask.dst_port = mask->hdr.dst_port;
1441                 udp.mask.src_port = mask->hdr.src_port;
1442                 /* Remove unwanted bits from values. */
1443                 udp.val.src_port &= udp.mask.src_port;
1444                 udp.val.dst_port &= udp.mask.dst_port;
1445         }
1446         mlx5_flow_create_copy(parser, &udp, udp_size);
1447         return 0;
1448 }
1449
1450 /**
1451  * Convert TCP item to Verbs specification.
1452  *
1453  * @param item[in]
1454  *   Item specification.
1455  * @param default_mask[in]
1456  *   Default bit-masks to use when item->mask is not provided.
1457  * @param data[in, out]
1458  *   User structure.
1459  */
1460 static int
1461 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1462                      const void *default_mask,
1463                      void *data)
1464 {
1465         const struct rte_flow_item_tcp *spec = item->spec;
1466         const struct rte_flow_item_tcp *mask = item->mask;
1467         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1468         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1469         struct ibv_flow_spec_tcp_udp tcp = {
1470                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1471                 .size = tcp_size,
1472         };
1473
1474         /* Don't update layer for the inner pattern. */
1475         if (!parser->inner) {
1476                 if (parser->layer == HASH_RXQ_IPV4)
1477                         parser->layer = HASH_RXQ_TCPV4;
1478                 else
1479                         parser->layer = HASH_RXQ_TCPV6;
1480         }
1481         if (spec) {
1482                 if (!mask)
1483                         mask = default_mask;
1484                 tcp.val.dst_port = spec->hdr.dst_port;
1485                 tcp.val.src_port = spec->hdr.src_port;
1486                 tcp.mask.dst_port = mask->hdr.dst_port;
1487                 tcp.mask.src_port = mask->hdr.src_port;
1488                 /* Remove unwanted bits from values. */
1489                 tcp.val.src_port &= tcp.mask.src_port;
1490                 tcp.val.dst_port &= tcp.mask.dst_port;
1491         }
1492         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1493         return 0;
1494 }
1495
1496 /**
1497  * Convert VXLAN item to Verbs specification.
1498  *
1499  * @param item[in]
1500  *   Item specification.
1501  * @param default_mask[in]
1502  *   Default bit-masks to use when item->mask is not provided.
1503  * @param data[in, out]
1504  *   User structure.
1505  */
1506 static int
1507 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1508                        const void *default_mask,
1509                        void *data)
1510 {
1511         const struct rte_flow_item_vxlan *spec = item->spec;
1512         const struct rte_flow_item_vxlan *mask = item->mask;
1513         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1514         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1515         struct ibv_flow_spec_tunnel vxlan = {
1516                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1517                 .size = size,
1518         };
1519         union vni {
1520                 uint32_t vlan_id;
1521                 uint8_t vni[4];
1522         } id;
1523
1524         id.vni[0] = 0;
1525         parser->inner = IBV_FLOW_SPEC_INNER;
1526         if (spec) {
1527                 if (!mask)
1528                         mask = default_mask;
1529                 memcpy(&id.vni[1], spec->vni, 3);
1530                 vxlan.val.tunnel_id = id.vlan_id;
1531                 memcpy(&id.vni[1], mask->vni, 3);
1532                 vxlan.mask.tunnel_id = id.vlan_id;
1533                 /* Remove unwanted bits from values. */
1534                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1535         }
1536         /*
1537          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1538          * layer is defined in the Verbs specification it is interpreted as
1539          * wildcard and all packets will match this rule, if it follows a full
1540          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1541          * before will also match this rule.
1542          * To avoid such situation, VNI 0 is currently refused.
1543          */
1544         if (!vxlan.val.tunnel_id)
1545                 return EINVAL;
1546         mlx5_flow_create_copy(parser, &vxlan, size);
1547         return 0;
1548 }
1549
1550 /**
1551  * Convert mark/flag action to Verbs specification.
1552  *
1553  * @param parser
1554  *   Internal parser structure.
1555  * @param mark_id
1556  *   Mark identifier.
1557  */
1558 static int
1559 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1560 {
1561         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1562         struct ibv_flow_spec_action_tag tag = {
1563                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1564                 .size = size,
1565                 .tag_id = mlx5_flow_mark_set(mark_id),
1566         };
1567
1568         assert(parser->mark);
1569         mlx5_flow_create_copy(parser, &tag, size);
1570         return 0;
1571 }
1572
1573 /**
1574  * Convert count action to Verbs specification.
1575  *
1576  * @param priv
1577  *   Pointer to private structure.
1578  * @param parser
1579  *   Pointer to MLX5 flow parser structure.
1580  *
1581  * @return
1582  *   0 on success, errno value on failure.
1583  */
1584 static int
1585 mlx5_flow_create_count(struct priv *priv __rte_unused,
1586                        struct mlx5_flow_parse *parser __rte_unused)
1587 {
1588 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1589         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1590         struct ibv_counter_set_init_attr init_attr = {0};
1591         struct ibv_flow_spec_counter_action counter = {
1592                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1593                 .size = size,
1594                 .counter_set_handle = 0,
1595         };
1596
1597         init_attr.counter_set_id = 0;
1598         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1599         if (!parser->cs)
1600                 return EINVAL;
1601         counter.counter_set_handle = parser->cs->handle;
1602         mlx5_flow_create_copy(parser, &counter, size);
1603 #endif
1604         return 0;
1605 }
1606
1607 /**
1608  * Complete flow rule creation with a drop queue.
1609  *
1610  * @param priv
1611  *   Pointer to private structure.
1612  * @param parser
1613  *   Internal parser structure.
1614  * @param flow
1615  *   Pointer to the rte_flow.
1616  * @param[out] error
1617  *   Perform verbose error reporting if not NULL.
1618  *
1619  * @return
1620  *   0 on success, errno value on failure.
1621  */
1622 static int
1623 priv_flow_create_action_queue_drop(struct priv *priv,
1624                                    struct mlx5_flow_parse *parser,
1625                                    struct rte_flow *flow,
1626                                    struct rte_flow_error *error)
1627 {
1628         struct ibv_flow_spec_action_drop *drop;
1629         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1630         int err = 0;
1631
1632         assert(priv->pd);
1633         assert(priv->ctx);
1634         flow->drop = 1;
1635         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1636                         parser->queue[HASH_RXQ_ETH].offset);
1637         *drop = (struct ibv_flow_spec_action_drop){
1638                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1639                         .size = size,
1640         };
1641         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1642         parser->queue[HASH_RXQ_ETH].offset += size;
1643         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1644                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1645         if (parser->count)
1646                 flow->cs = parser->cs;
1647         if (!priv->dev->data->dev_started)
1648                 return 0;
1649         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1650         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1651                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1652                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1653         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1654                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1655                                    NULL, "flow rule creation failure");
1656                 err = ENOMEM;
1657                 goto error;
1658         }
1659         return 0;
1660 error:
1661         assert(flow);
1662         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1663                 claim_zero(mlx5_glue->destroy_flow
1664                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1665                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1666         }
1667         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1668                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1669                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1670         }
1671         if (flow->cs) {
1672                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1673                 flow->cs = NULL;
1674                 parser->cs = NULL;
1675         }
1676         return err;
1677 }
1678
1679 /**
1680  * Create hash Rx queues when RSS is enabled.
1681  *
1682  * @param priv
1683  *   Pointer to private structure.
1684  * @param parser
1685  *   Internal parser structure.
1686  * @param flow
1687  *   Pointer to the rte_flow.
1688  * @param[out] error
1689  *   Perform verbose error reporting if not NULL.
1690  *
1691  * @return
1692  *   0 on success, a errno value otherwise and rte_errno is set.
1693  */
1694 static int
1695 priv_flow_create_action_queue_rss(struct priv *priv,
1696                                   struct mlx5_flow_parse *parser,
1697                                   struct rte_flow *flow,
1698                                   struct rte_flow_error *error)
1699 {
1700         unsigned int i;
1701
1702         for (i = 0; i != hash_rxq_init_n; ++i) {
1703                 uint64_t hash_fields;
1704
1705                 if (!parser->queue[i].ibv_attr)
1706                         continue;
1707                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1708                 parser->queue[i].ibv_attr = NULL;
1709                 hash_fields = hash_rxq_init[i].hash_fields;
1710                 if (!priv->dev->data->dev_started)
1711                         continue;
1712                 flow->frxq[i].hrxq =
1713                         mlx5_priv_hrxq_get(priv,
1714                                            parser->rss_conf.rss_key,
1715                                            parser->rss_conf.rss_key_len,
1716                                            hash_fields,
1717                                            parser->queues,
1718                                            parser->queues_n);
1719                 if (flow->frxq[i].hrxq)
1720                         continue;
1721                 flow->frxq[i].hrxq =
1722                         mlx5_priv_hrxq_new(priv,
1723                                            parser->rss_conf.rss_key,
1724                                            parser->rss_conf.rss_key_len,
1725                                            hash_fields,
1726                                            parser->queues,
1727                                            parser->queues_n);
1728                 if (!flow->frxq[i].hrxq) {
1729                         rte_flow_error_set(error, ENOMEM,
1730                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1731                                            NULL, "cannot create hash rxq");
1732                         return ENOMEM;
1733                 }
1734         }
1735         return 0;
1736 }
1737
1738 /**
1739  * Complete flow rule creation.
1740  *
1741  * @param priv
1742  *   Pointer to private structure.
1743  * @param parser
1744  *   Internal parser structure.
1745  * @param flow
1746  *   Pointer to the rte_flow.
1747  * @param[out] error
1748  *   Perform verbose error reporting if not NULL.
1749  *
1750  * @return
1751  *   0 on success, a errno value otherwise and rte_errno is set.
1752  */
1753 static int
1754 priv_flow_create_action_queue(struct priv *priv,
1755                               struct mlx5_flow_parse *parser,
1756                               struct rte_flow *flow,
1757                               struct rte_flow_error *error)
1758 {
1759         int err = 0;
1760         unsigned int i;
1761
1762         assert(priv->pd);
1763         assert(priv->ctx);
1764         assert(!parser->drop);
1765         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1766         if (err)
1767                 goto error;
1768         if (parser->count)
1769                 flow->cs = parser->cs;
1770         if (!priv->dev->data->dev_started)
1771                 return 0;
1772         for (i = 0; i != hash_rxq_init_n; ++i) {
1773                 if (!flow->frxq[i].hrxq)
1774                         continue;
1775                 flow->frxq[i].ibv_flow =
1776                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1777                                                flow->frxq[i].ibv_attr);
1778                 if (!flow->frxq[i].ibv_flow) {
1779                         rte_flow_error_set(error, ENOMEM,
1780                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1781                                            NULL, "flow rule creation failure");
1782                         err = ENOMEM;
1783                         goto error;
1784                 }
1785                 DEBUG("%p type %d QP %p ibv_flow %p",
1786                       (void *)flow, i,
1787                       (void *)flow->frxq[i].hrxq,
1788                       (void *)flow->frxq[i].ibv_flow);
1789         }
1790         for (i = 0; i != parser->queues_n; ++i) {
1791                 struct mlx5_rxq_data *q =
1792                         (*priv->rxqs)[parser->queues[i]];
1793
1794                 q->mark |= parser->mark;
1795         }
1796         return 0;
1797 error:
1798         assert(flow);
1799         for (i = 0; i != hash_rxq_init_n; ++i) {
1800                 if (flow->frxq[i].ibv_flow) {
1801                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1802
1803                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1804                 }
1805                 if (flow->frxq[i].hrxq)
1806                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1807                 if (flow->frxq[i].ibv_attr)
1808                         rte_free(flow->frxq[i].ibv_attr);
1809         }
1810         if (flow->cs) {
1811                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1812                 flow->cs = NULL;
1813                 parser->cs = NULL;
1814         }
1815         return err;
1816 }
1817
1818 /**
1819  * Convert a flow.
1820  *
1821  * @param priv
1822  *   Pointer to private structure.
1823  * @param list
1824  *   Pointer to a TAILQ flow list.
1825  * @param[in] attr
1826  *   Flow rule attributes.
1827  * @param[in] pattern
1828  *   Pattern specification (list terminated by the END pattern item).
1829  * @param[in] actions
1830  *   Associated actions (list terminated by the END action).
1831  * @param[out] error
1832  *   Perform verbose error reporting if not NULL.
1833  *
1834  * @return
1835  *   A flow on success, NULL otherwise.
1836  */
1837 static struct rte_flow *
1838 priv_flow_create(struct priv *priv,
1839                  struct mlx5_flows *list,
1840                  const struct rte_flow_attr *attr,
1841                  const struct rte_flow_item items[],
1842                  const struct rte_flow_action actions[],
1843                  struct rte_flow_error *error)
1844 {
1845         struct mlx5_flow_parse parser = { .create = 1, };
1846         struct rte_flow *flow = NULL;
1847         unsigned int i;
1848         int err;
1849
1850         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1851         if (err)
1852                 goto exit;
1853         flow = rte_calloc(__func__, 1,
1854                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1855                           0);
1856         if (!flow) {
1857                 rte_flow_error_set(error, ENOMEM,
1858                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1859                                    NULL,
1860                                    "cannot allocate flow memory");
1861                 return NULL;
1862         }
1863         /* Copy queues configuration. */
1864         flow->queues = (uint16_t (*)[])(flow + 1);
1865         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1866         flow->queues_n = parser.queues_n;
1867         flow->mark = parser.mark;
1868         /* Copy RSS configuration. */
1869         flow->rss_conf = parser.rss_conf;
1870         flow->rss_conf.rss_key = flow->rss_key;
1871         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1872         /* finalise the flow. */
1873         if (parser.drop)
1874                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1875                                                          error);
1876         else
1877                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1878         if (err)
1879                 goto exit;
1880         TAILQ_INSERT_TAIL(list, flow, next);
1881         DEBUG("Flow created %p", (void *)flow);
1882         return flow;
1883 exit:
1884         ERROR("flow creation error: %s", error->message);
1885         for (i = 0; i != hash_rxq_init_n; ++i) {
1886                 if (parser.queue[i].ibv_attr)
1887                         rte_free(parser.queue[i].ibv_attr);
1888         }
1889         rte_free(flow);
1890         return NULL;
1891 }
1892
1893 /**
1894  * Validate a flow supported by the NIC.
1895  *
1896  * @see rte_flow_validate()
1897  * @see rte_flow_ops
1898  */
1899 int
1900 mlx5_flow_validate(struct rte_eth_dev *dev,
1901                    const struct rte_flow_attr *attr,
1902                    const struct rte_flow_item items[],
1903                    const struct rte_flow_action actions[],
1904                    struct rte_flow_error *error)
1905 {
1906         struct priv *priv = dev->data->dev_private;
1907         int ret;
1908         struct mlx5_flow_parse parser = { .create = 0, };
1909
1910         priv_lock(priv);
1911         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1912         priv_unlock(priv);
1913         return ret;
1914 }
1915
1916 /**
1917  * Create a flow.
1918  *
1919  * @see rte_flow_create()
1920  * @see rte_flow_ops
1921  */
1922 struct rte_flow *
1923 mlx5_flow_create(struct rte_eth_dev *dev,
1924                  const struct rte_flow_attr *attr,
1925                  const struct rte_flow_item items[],
1926                  const struct rte_flow_action actions[],
1927                  struct rte_flow_error *error)
1928 {
1929         struct priv *priv = dev->data->dev_private;
1930         struct rte_flow *flow;
1931
1932         priv_lock(priv);
1933         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1934                                 error);
1935         priv_unlock(priv);
1936         return flow;
1937 }
1938
1939 /**
1940  * Destroy a flow.
1941  *
1942  * @param priv
1943  *   Pointer to private structure.
1944  * @param list
1945  *   Pointer to a TAILQ flow list.
1946  * @param[in] flow
1947  *   Flow to destroy.
1948  */
1949 static void
1950 priv_flow_destroy(struct priv *priv,
1951                   struct mlx5_flows *list,
1952                   struct rte_flow *flow)
1953 {
1954         unsigned int i;
1955
1956         if (flow->drop || !flow->mark)
1957                 goto free;
1958         for (i = 0; i != flow->queues_n; ++i) {
1959                 struct rte_flow *tmp;
1960                 int mark = 0;
1961
1962                 /*
1963                  * To remove the mark from the queue, the queue must not be
1964                  * present in any other marked flow (RSS or not).
1965                  */
1966                 TAILQ_FOREACH(tmp, list, next) {
1967                         unsigned int j;
1968                         uint16_t *tqs = NULL;
1969                         uint16_t tq_n = 0;
1970
1971                         if (!tmp->mark)
1972                                 continue;
1973                         for (j = 0; j != hash_rxq_init_n; ++j) {
1974                                 if (!tmp->frxq[j].hrxq)
1975                                         continue;
1976                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1977                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1978                         }
1979                         if (!tq_n)
1980                                 continue;
1981                         for (j = 0; (j != tq_n) && !mark; j++)
1982                                 if (tqs[j] == (*flow->queues)[i])
1983                                         mark = 1;
1984                 }
1985                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1986         }
1987 free:
1988         if (flow->drop) {
1989                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
1990                         claim_zero(mlx5_glue->destroy_flow
1991                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1992                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1993         } else {
1994                 for (i = 0; i != hash_rxq_init_n; ++i) {
1995                         struct mlx5_flow *frxq = &flow->frxq[i];
1996
1997                         if (frxq->ibv_flow)
1998                                 claim_zero(mlx5_glue->destroy_flow
1999                                            (frxq->ibv_flow));
2000                         if (frxq->hrxq)
2001                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2002                         if (frxq->ibv_attr)
2003                                 rte_free(frxq->ibv_attr);
2004                 }
2005         }
2006         if (flow->cs) {
2007                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2008                 flow->cs = NULL;
2009         }
2010         TAILQ_REMOVE(list, flow, next);
2011         DEBUG("Flow destroyed %p", (void *)flow);
2012         rte_free(flow);
2013 }
2014
2015 /**
2016  * Destroy all flows.
2017  *
2018  * @param priv
2019  *   Pointer to private structure.
2020  * @param list
2021  *   Pointer to a TAILQ flow list.
2022  */
2023 void
2024 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2025 {
2026         while (!TAILQ_EMPTY(list)) {
2027                 struct rte_flow *flow;
2028
2029                 flow = TAILQ_FIRST(list);
2030                 priv_flow_destroy(priv, list, flow);
2031         }
2032 }
2033
2034 /**
2035  * Create drop queue.
2036  *
2037  * @param priv
2038  *   Pointer to private structure.
2039  *
2040  * @return
2041  *   0 on success.
2042  */
2043 int
2044 priv_flow_create_drop_queue(struct priv *priv)
2045 {
2046         struct mlx5_hrxq_drop *fdq = NULL;
2047
2048         assert(priv->pd);
2049         assert(priv->ctx);
2050         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2051         if (!fdq) {
2052                 WARN("cannot allocate memory for drop queue");
2053                 goto error;
2054         }
2055         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2056         if (!fdq->cq) {
2057                 WARN("cannot allocate CQ for drop queue");
2058                 goto error;
2059         }
2060         fdq->wq = mlx5_glue->create_wq
2061                 (priv->ctx,
2062                  &(struct ibv_wq_init_attr){
2063                         .wq_type = IBV_WQT_RQ,
2064                         .max_wr = 1,
2065                         .max_sge = 1,
2066                         .pd = priv->pd,
2067                         .cq = fdq->cq,
2068                  });
2069         if (!fdq->wq) {
2070                 WARN("cannot allocate WQ for drop queue");
2071                 goto error;
2072         }
2073         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2074                 (priv->ctx,
2075                  &(struct ibv_rwq_ind_table_init_attr){
2076                         .log_ind_tbl_size = 0,
2077                         .ind_tbl = &fdq->wq,
2078                         .comp_mask = 0,
2079                  });
2080         if (!fdq->ind_table) {
2081                 WARN("cannot allocate indirection table for drop queue");
2082                 goto error;
2083         }
2084         fdq->qp = mlx5_glue->create_qp_ex
2085                 (priv->ctx,
2086                  &(struct ibv_qp_init_attr_ex){
2087                         .qp_type = IBV_QPT_RAW_PACKET,
2088                         .comp_mask =
2089                                 IBV_QP_INIT_ATTR_PD |
2090                                 IBV_QP_INIT_ATTR_IND_TABLE |
2091                                 IBV_QP_INIT_ATTR_RX_HASH,
2092                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2093                                 .rx_hash_function =
2094                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2095                                 .rx_hash_key_len = rss_hash_default_key_len,
2096                                 .rx_hash_key = rss_hash_default_key,
2097                                 .rx_hash_fields_mask = 0,
2098                                 },
2099                         .rwq_ind_tbl = fdq->ind_table,
2100                         .pd = priv->pd
2101                  });
2102         if (!fdq->qp) {
2103                 WARN("cannot allocate QP for drop queue");
2104                 goto error;
2105         }
2106         priv->flow_drop_queue = fdq;
2107         return 0;
2108 error:
2109         if (fdq->qp)
2110                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2111         if (fdq->ind_table)
2112                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2113         if (fdq->wq)
2114                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2115         if (fdq->cq)
2116                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2117         if (fdq)
2118                 rte_free(fdq);
2119         priv->flow_drop_queue = NULL;
2120         return -1;
2121 }
2122
2123 /**
2124  * Delete drop queue.
2125  *
2126  * @param priv
2127  *   Pointer to private structure.
2128  */
2129 void
2130 priv_flow_delete_drop_queue(struct priv *priv)
2131 {
2132         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2133
2134         if (!fdq)
2135                 return;
2136         if (fdq->qp)
2137                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2138         if (fdq->ind_table)
2139                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2140         if (fdq->wq)
2141                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2142         if (fdq->cq)
2143                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2144         rte_free(fdq);
2145         priv->flow_drop_queue = NULL;
2146 }
2147
2148 /**
2149  * Remove all flows.
2150  *
2151  * @param priv
2152  *   Pointer to private structure.
2153  * @param list
2154  *   Pointer to a TAILQ flow list.
2155  */
2156 void
2157 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2158 {
2159         struct rte_flow *flow;
2160
2161         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2162                 unsigned int i;
2163                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2164
2165                 if (flow->drop) {
2166                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2167                                 continue;
2168                         claim_zero(mlx5_glue->destroy_flow
2169                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2170                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2171                         DEBUG("Flow %p removed", (void *)flow);
2172                         /* Next flow. */
2173                         continue;
2174                 }
2175                 /* Verify the flow has not already been cleaned. */
2176                 for (i = 0; i != hash_rxq_init_n; ++i) {
2177                         if (!flow->frxq[i].ibv_flow)
2178                                 continue;
2179                         /*
2180                          * Indirection table may be necessary to remove the
2181                          * flags in the Rx queues.
2182                          * This helps to speed-up the process by avoiding
2183                          * another loop.
2184                          */
2185                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2186                         break;
2187                 }
2188                 if (i == hash_rxq_init_n)
2189                         return;
2190                 if (flow->mark) {
2191                         assert(ind_tbl);
2192                         for (i = 0; i != ind_tbl->queues_n; ++i)
2193                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2194                 }
2195                 for (i = 0; i != hash_rxq_init_n; ++i) {
2196                         if (!flow->frxq[i].ibv_flow)
2197                                 continue;
2198                         claim_zero(mlx5_glue->destroy_flow
2199                                    (flow->frxq[i].ibv_flow));
2200                         flow->frxq[i].ibv_flow = NULL;
2201                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2202                         flow->frxq[i].hrxq = NULL;
2203                 }
2204                 DEBUG("Flow %p removed", (void *)flow);
2205         }
2206 }
2207
2208 /**
2209  * Add all flows.
2210  *
2211  * @param priv
2212  *   Pointer to private structure.
2213  * @param list
2214  *   Pointer to a TAILQ flow list.
2215  *
2216  * @return
2217  *   0 on success, a errno value otherwise and rte_errno is set.
2218  */
2219 int
2220 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2221 {
2222         struct rte_flow *flow;
2223
2224         TAILQ_FOREACH(flow, list, next) {
2225                 unsigned int i;
2226
2227                 if (flow->drop) {
2228                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2229                                 mlx5_glue->create_flow
2230                                 (priv->flow_drop_queue->qp,
2231                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2232                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2233                                 DEBUG("Flow %p cannot be applied",
2234                                       (void *)flow);
2235                                 rte_errno = EINVAL;
2236                                 return rte_errno;
2237                         }
2238                         DEBUG("Flow %p applied", (void *)flow);
2239                         /* Next flow. */
2240                         continue;
2241                 }
2242                 for (i = 0; i != hash_rxq_init_n; ++i) {
2243                         if (!flow->frxq[i].ibv_attr)
2244                                 continue;
2245                         flow->frxq[i].hrxq =
2246                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2247                                                    flow->rss_conf.rss_key_len,
2248                                                    hash_rxq_init[i].hash_fields,
2249                                                    (*flow->queues),
2250                                                    flow->queues_n);
2251                         if (flow->frxq[i].hrxq)
2252                                 goto flow_create;
2253                         flow->frxq[i].hrxq =
2254                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2255                                                    flow->rss_conf.rss_key_len,
2256                                                    hash_rxq_init[i].hash_fields,
2257                                                    (*flow->queues),
2258                                                    flow->queues_n);
2259                         if (!flow->frxq[i].hrxq) {
2260                                 DEBUG("Flow %p cannot be applied",
2261                                       (void *)flow);
2262                                 rte_errno = EINVAL;
2263                                 return rte_errno;
2264                         }
2265 flow_create:
2266                         flow->frxq[i].ibv_flow =
2267                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2268                                                        flow->frxq[i].ibv_attr);
2269                         if (!flow->frxq[i].ibv_flow) {
2270                                 DEBUG("Flow %p cannot be applied",
2271                                       (void *)flow);
2272                                 rte_errno = EINVAL;
2273                                 return rte_errno;
2274                         }
2275                         DEBUG("Flow %p applied", (void *)flow);
2276                 }
2277                 if (!flow->mark)
2278                         continue;
2279                 for (i = 0; i != flow->queues_n; ++i)
2280                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2281         }
2282         return 0;
2283 }
2284
2285 /**
2286  * Verify the flow list is empty
2287  *
2288  * @param priv
2289  *  Pointer to private structure.
2290  *
2291  * @return the number of flows not released.
2292  */
2293 int
2294 priv_flow_verify(struct priv *priv)
2295 {
2296         struct rte_flow *flow;
2297         int ret = 0;
2298
2299         TAILQ_FOREACH(flow, &priv->flows, next) {
2300                 DEBUG("%p: flow %p still referenced", (void *)priv,
2301                       (void *)flow);
2302                 ++ret;
2303         }
2304         return ret;
2305 }
2306
2307 /**
2308  * Enable a control flow configured from the control plane.
2309  *
2310  * @param dev
2311  *   Pointer to Ethernet device.
2312  * @param eth_spec
2313  *   An Ethernet flow spec to apply.
2314  * @param eth_mask
2315  *   An Ethernet flow mask to apply.
2316  * @param vlan_spec
2317  *   A VLAN flow spec to apply.
2318  * @param vlan_mask
2319  *   A VLAN flow mask to apply.
2320  *
2321  * @return
2322  *   0 on success.
2323  */
2324 int
2325 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2326                     struct rte_flow_item_eth *eth_spec,
2327                     struct rte_flow_item_eth *eth_mask,
2328                     struct rte_flow_item_vlan *vlan_spec,
2329                     struct rte_flow_item_vlan *vlan_mask)
2330 {
2331         struct priv *priv = dev->data->dev_private;
2332         const struct rte_flow_attr attr = {
2333                 .ingress = 1,
2334                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2335         };
2336         struct rte_flow_item items[] = {
2337                 {
2338                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2339                         .spec = eth_spec,
2340                         .last = NULL,
2341                         .mask = eth_mask,
2342                 },
2343                 {
2344                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2345                                 RTE_FLOW_ITEM_TYPE_END,
2346                         .spec = vlan_spec,
2347                         .last = NULL,
2348                         .mask = vlan_mask,
2349                 },
2350                 {
2351                         .type = RTE_FLOW_ITEM_TYPE_END,
2352                 },
2353         };
2354         struct rte_flow_action actions[] = {
2355                 {
2356                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2357                 },
2358                 {
2359                         .type = RTE_FLOW_ACTION_TYPE_END,
2360                 },
2361         };
2362         struct rte_flow *flow;
2363         struct rte_flow_error error;
2364         unsigned int i;
2365         union {
2366                 struct rte_flow_action_rss rss;
2367                 struct {
2368                         const struct rte_eth_rss_conf *rss_conf;
2369                         uint16_t num;
2370                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2371                 } local;
2372         } action_rss;
2373
2374         if (!priv->reta_idx_n)
2375                 return EINVAL;
2376         for (i = 0; i != priv->reta_idx_n; ++i)
2377                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2378         action_rss.local.rss_conf = &priv->rss_conf;
2379         action_rss.local.num = priv->reta_idx_n;
2380         actions[0].conf = (const void *)&action_rss.rss;
2381         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2382                                 &error);
2383         if (!flow)
2384                 return rte_errno;
2385         return 0;
2386 }
2387
2388 /**
2389  * Enable a flow control configured from the control plane.
2390  *
2391  * @param dev
2392  *   Pointer to Ethernet device.
2393  * @param eth_spec
2394  *   An Ethernet flow spec to apply.
2395  * @param eth_mask
2396  *   An Ethernet flow mask to apply.
2397  *
2398  * @return
2399  *   0 on success.
2400  */
2401 int
2402 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2403                struct rte_flow_item_eth *eth_spec,
2404                struct rte_flow_item_eth *eth_mask)
2405 {
2406         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2407 }
2408
2409 /**
2410  * Destroy a flow.
2411  *
2412  * @see rte_flow_destroy()
2413  * @see rte_flow_ops
2414  */
2415 int
2416 mlx5_flow_destroy(struct rte_eth_dev *dev,
2417                   struct rte_flow *flow,
2418                   struct rte_flow_error *error)
2419 {
2420         struct priv *priv = dev->data->dev_private;
2421
2422         (void)error;
2423         priv_lock(priv);
2424         priv_flow_destroy(priv, &priv->flows, flow);
2425         priv_unlock(priv);
2426         return 0;
2427 }
2428
2429 /**
2430  * Destroy all flows.
2431  *
2432  * @see rte_flow_flush()
2433  * @see rte_flow_ops
2434  */
2435 int
2436 mlx5_flow_flush(struct rte_eth_dev *dev,
2437                 struct rte_flow_error *error)
2438 {
2439         struct priv *priv = dev->data->dev_private;
2440
2441         (void)error;
2442         priv_lock(priv);
2443         priv_flow_flush(priv, &priv->flows);
2444         priv_unlock(priv);
2445         return 0;
2446 }
2447
2448 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2449 /**
2450  * Query flow counter.
2451  *
2452  * @param cs
2453  *   the counter set.
2454  * @param counter_value
2455  *   returned data from the counter.
2456  *
2457  * @return
2458  *   0 on success, a errno value otherwise and rte_errno is set.
2459  */
2460 static int
2461 priv_flow_query_count(struct ibv_counter_set *cs,
2462                       struct mlx5_flow_counter_stats *counter_stats,
2463                       struct rte_flow_query_count *query_count,
2464                       struct rte_flow_error *error)
2465 {
2466         uint64_t counters[2];
2467         struct ibv_query_counter_set_attr query_cs_attr = {
2468                 .cs = cs,
2469                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2470         };
2471         struct ibv_counter_set_data query_out = {
2472                 .out = counters,
2473                 .outlen = 2 * sizeof(uint64_t),
2474         };
2475         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2476
2477         if (res) {
2478                 rte_flow_error_set(error, -res,
2479                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2480                                    NULL,
2481                                    "cannot read counter");
2482                 return -res;
2483         }
2484         query_count->hits_set = 1;
2485         query_count->bytes_set = 1;
2486         query_count->hits = counters[0] - counter_stats->hits;
2487         query_count->bytes = counters[1] - counter_stats->bytes;
2488         if (query_count->reset) {
2489                 counter_stats->hits = counters[0];
2490                 counter_stats->bytes = counters[1];
2491         }
2492         return 0;
2493 }
2494
2495 /**
2496  * Query a flows.
2497  *
2498  * @see rte_flow_query()
2499  * @see rte_flow_ops
2500  */
2501 int
2502 mlx5_flow_query(struct rte_eth_dev *dev,
2503                 struct rte_flow *flow,
2504                 enum rte_flow_action_type action __rte_unused,
2505                 void *data,
2506                 struct rte_flow_error *error)
2507 {
2508         struct priv *priv = dev->data->dev_private;
2509         int res = EINVAL;
2510
2511         priv_lock(priv);
2512         if (flow->cs) {
2513                 res = priv_flow_query_count(flow->cs,
2514                                         &flow->counter_stats,
2515                                         (struct rte_flow_query_count *)data,
2516                                         error);
2517         } else {
2518                 rte_flow_error_set(error, res,
2519                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2520                                    NULL,
2521                                    "no counter found for flow");
2522         }
2523         priv_unlock(priv);
2524         return -res;
2525 }
2526 #endif
2527
2528 /**
2529  * Isolated mode.
2530  *
2531  * @see rte_flow_isolate()
2532  * @see rte_flow_ops
2533  */
2534 int
2535 mlx5_flow_isolate(struct rte_eth_dev *dev,
2536                   int enable,
2537                   struct rte_flow_error *error)
2538 {
2539         struct priv *priv = dev->data->dev_private;
2540
2541         priv_lock(priv);
2542         if (dev->data->dev_started) {
2543                 rte_flow_error_set(error, EBUSY,
2544                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2545                                    NULL,
2546                                    "port must be stopped first");
2547                 priv_unlock(priv);
2548                 return -rte_errno;
2549         }
2550         priv->isolated = !!enable;
2551         if (enable)
2552                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2553         else
2554                 priv->dev->dev_ops = &mlx5_dev_ops;
2555         priv_unlock(priv);
2556         return 0;
2557 }
2558
2559 /**
2560  * Convert a flow director filter to a generic flow.
2561  *
2562  * @param priv
2563  *   Private structure.
2564  * @param fdir_filter
2565  *   Flow director filter to add.
2566  * @param attributes
2567  *   Generic flow parameters structure.
2568  *
2569  * @return
2570  *  0 on success, errno value on error.
2571  */
2572 static int
2573 priv_fdir_filter_convert(struct priv *priv,
2574                          const struct rte_eth_fdir_filter *fdir_filter,
2575                          struct mlx5_fdir *attributes)
2576 {
2577         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2578
2579         /* Validate queue number. */
2580         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2581                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2582                 return EINVAL;
2583         }
2584         attributes->attr.ingress = 1;
2585         attributes->items[0] = (struct rte_flow_item) {
2586                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2587                 .spec = &attributes->l2,
2588                 .mask = &attributes->l2_mask,
2589         };
2590         switch (fdir_filter->action.behavior) {
2591         case RTE_ETH_FDIR_ACCEPT:
2592                 attributes->actions[0] = (struct rte_flow_action){
2593                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2594                         .conf = &attributes->queue,
2595                 };
2596                 break;
2597         case RTE_ETH_FDIR_REJECT:
2598                 attributes->actions[0] = (struct rte_flow_action){
2599                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2600                 };
2601                 break;
2602         default:
2603                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2604                 return ENOTSUP;
2605         }
2606         attributes->queue.index = fdir_filter->action.rx_queue;
2607         switch (fdir_filter->input.flow_type) {
2608         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2609                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2610                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2611                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2612                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2613                         .type_of_service = input->flow.udp4_flow.ip.tos,
2614                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2615                 };
2616                 attributes->l4.udp.hdr = (struct udp_hdr){
2617                         .src_port = input->flow.udp4_flow.src_port,
2618                         .dst_port = input->flow.udp4_flow.dst_port,
2619                 };
2620                 attributes->items[1] = (struct rte_flow_item){
2621                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2622                         .spec = &attributes->l3,
2623                         .mask = &attributes->l3,
2624                 };
2625                 attributes->items[2] = (struct rte_flow_item){
2626                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2627                         .spec = &attributes->l4,
2628                         .mask = &attributes->l4,
2629                 };
2630                 break;
2631         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2632                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2633                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2634                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2635                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2636                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2637                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2638                 };
2639                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2640                         .src_port = input->flow.tcp4_flow.src_port,
2641                         .dst_port = input->flow.tcp4_flow.dst_port,
2642                 };
2643                 attributes->items[1] = (struct rte_flow_item){
2644                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2645                         .spec = &attributes->l3,
2646                         .mask = &attributes->l3,
2647                 };
2648                 attributes->items[2] = (struct rte_flow_item){
2649                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2650                         .spec = &attributes->l4,
2651                         .mask = &attributes->l4,
2652                 };
2653                 break;
2654         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2655                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2656                         .src_addr = input->flow.ip4_flow.src_ip,
2657                         .dst_addr = input->flow.ip4_flow.dst_ip,
2658                         .time_to_live = input->flow.ip4_flow.ttl,
2659                         .type_of_service = input->flow.ip4_flow.tos,
2660                         .next_proto_id = input->flow.ip4_flow.proto,
2661                 };
2662                 attributes->items[1] = (struct rte_flow_item){
2663                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2664                         .spec = &attributes->l3,
2665                         .mask = &attributes->l3,
2666                 };
2667                 break;
2668         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2669                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2670                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2671                         .proto = input->flow.udp6_flow.ip.proto,
2672                 };
2673                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2674                        input->flow.udp6_flow.ip.src_ip,
2675                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2676                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2677                        input->flow.udp6_flow.ip.dst_ip,
2678                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2679                 attributes->l4.udp.hdr = (struct udp_hdr){
2680                         .src_port = input->flow.udp6_flow.src_port,
2681                         .dst_port = input->flow.udp6_flow.dst_port,
2682                 };
2683                 attributes->items[1] = (struct rte_flow_item){
2684                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2685                         .spec = &attributes->l3,
2686                         .mask = &attributes->l3,
2687                 };
2688                 attributes->items[2] = (struct rte_flow_item){
2689                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2690                         .spec = &attributes->l4,
2691                         .mask = &attributes->l4,
2692                 };
2693                 break;
2694         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2695                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2696                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2697                         .proto = input->flow.tcp6_flow.ip.proto,
2698                 };
2699                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2700                        input->flow.tcp6_flow.ip.src_ip,
2701                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2702                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2703                        input->flow.tcp6_flow.ip.dst_ip,
2704                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2705                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2706                         .src_port = input->flow.tcp6_flow.src_port,
2707                         .dst_port = input->flow.tcp6_flow.dst_port,
2708                 };
2709                 attributes->items[1] = (struct rte_flow_item){
2710                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2711                         .spec = &attributes->l3,
2712                         .mask = &attributes->l3,
2713                 };
2714                 attributes->items[2] = (struct rte_flow_item){
2715                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2716                         .spec = &attributes->l4,
2717                         .mask = &attributes->l4,
2718                 };
2719                 break;
2720         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2721                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2722                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2723                         .proto = input->flow.ipv6_flow.proto,
2724                 };
2725                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2726                        input->flow.ipv6_flow.src_ip,
2727                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2728                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2729                        input->flow.ipv6_flow.dst_ip,
2730                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2731                 attributes->items[1] = (struct rte_flow_item){
2732                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2733                         .spec = &attributes->l3,
2734                         .mask = &attributes->l3,
2735                 };
2736                 break;
2737         default:
2738                 ERROR("invalid flow type%d",
2739                       fdir_filter->input.flow_type);
2740                 return ENOTSUP;
2741         }
2742         return 0;
2743 }
2744
2745 /**
2746  * Add new flow director filter and store it in list.
2747  *
2748  * @param priv
2749  *   Private structure.
2750  * @param fdir_filter
2751  *   Flow director filter to add.
2752  *
2753  * @return
2754  *   0 on success, errno value on failure.
2755  */
2756 static int
2757 priv_fdir_filter_add(struct priv *priv,
2758                      const struct rte_eth_fdir_filter *fdir_filter)
2759 {
2760         struct mlx5_fdir attributes = {
2761                 .attr.group = 0,
2762                 .l2_mask = {
2763                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2764                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2765                         .type = 0,
2766                 },
2767         };
2768         struct mlx5_flow_parse parser = {
2769                 .layer = HASH_RXQ_ETH,
2770         };
2771         struct rte_flow_error error;
2772         struct rte_flow *flow;
2773         int ret;
2774
2775         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2776         if (ret)
2777                 return -ret;
2778         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2779                                 attributes.actions, &error, &parser);
2780         if (ret)
2781                 return -ret;
2782         flow = priv_flow_create(priv,
2783                                 &priv->flows,
2784                                 &attributes.attr,
2785                                 attributes.items,
2786                                 attributes.actions,
2787                                 &error);
2788         if (flow) {
2789                 DEBUG("FDIR created %p", (void *)flow);
2790                 return 0;
2791         }
2792         return ENOTSUP;
2793 }
2794
2795 /**
2796  * Delete specific filter.
2797  *
2798  * @param priv
2799  *   Private structure.
2800  * @param fdir_filter
2801  *   Filter to be deleted.
2802  *
2803  * @return
2804  *   0 on success, errno value on failure.
2805  */
2806 static int
2807 priv_fdir_filter_delete(struct priv *priv,
2808                         const struct rte_eth_fdir_filter *fdir_filter)
2809 {
2810         struct mlx5_fdir attributes = {
2811                 .attr.group = 0,
2812         };
2813         struct mlx5_flow_parse parser = {
2814                 .create = 1,
2815                 .layer = HASH_RXQ_ETH,
2816         };
2817         struct rte_flow_error error;
2818         struct rte_flow *flow;
2819         unsigned int i;
2820         int ret;
2821
2822         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2823         if (ret)
2824                 return -ret;
2825         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2826                                 attributes.actions, &error, &parser);
2827         if (ret)
2828                 goto exit;
2829         /*
2830          * Special case for drop action which is only set in the
2831          * specifications when the flow is created.  In this situation the
2832          * drop specification is missing.
2833          */
2834         if (parser.drop) {
2835                 struct ibv_flow_spec_action_drop *drop;
2836
2837                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2838                                 parser.queue[HASH_RXQ_ETH].offset);
2839                 *drop = (struct ibv_flow_spec_action_drop){
2840                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2841                         .size = sizeof(struct ibv_flow_spec_action_drop),
2842                 };
2843                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2844         }
2845         TAILQ_FOREACH(flow, &priv->flows, next) {
2846                 struct ibv_flow_attr *attr;
2847                 struct ibv_spec_header *attr_h;
2848                 void *spec;
2849                 struct ibv_flow_attr *flow_attr;
2850                 struct ibv_spec_header *flow_h;
2851                 void *flow_spec;
2852                 unsigned int specs_n;
2853
2854                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2855                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2856                 /* Compare first the attributes. */
2857                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2858                         continue;
2859                 if (attr->num_of_specs == 0)
2860                         continue;
2861                 spec = (void *)((uintptr_t)attr +
2862                                 sizeof(struct ibv_flow_attr));
2863                 flow_spec = (void *)((uintptr_t)flow_attr +
2864                                      sizeof(struct ibv_flow_attr));
2865                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2866                 for (i = 0; i != specs_n; ++i) {
2867                         attr_h = spec;
2868                         flow_h = flow_spec;
2869                         if (memcmp(spec, flow_spec,
2870                                    RTE_MIN(attr_h->size, flow_h->size)))
2871                                 goto wrong_flow;
2872                         spec = (void *)((uintptr_t)spec + attr_h->size);
2873                         flow_spec = (void *)((uintptr_t)flow_spec +
2874                                              flow_h->size);
2875                 }
2876                 /* At this point, the flow match. */
2877                 break;
2878 wrong_flow:
2879                 /* The flow does not match. */
2880                 continue;
2881         }
2882         if (flow)
2883                 priv_flow_destroy(priv, &priv->flows, flow);
2884 exit:
2885         for (i = 0; i != hash_rxq_init_n; ++i) {
2886                 if (parser.queue[i].ibv_attr)
2887                         rte_free(parser.queue[i].ibv_attr);
2888         }
2889         return -ret;
2890 }
2891
2892 /**
2893  * Update queue for specific filter.
2894  *
2895  * @param priv
2896  *   Private structure.
2897  * @param fdir_filter
2898  *   Filter to be updated.
2899  *
2900  * @return
2901  *   0 on success, errno value on failure.
2902  */
2903 static int
2904 priv_fdir_filter_update(struct priv *priv,
2905                         const struct rte_eth_fdir_filter *fdir_filter)
2906 {
2907         int ret;
2908
2909         ret = priv_fdir_filter_delete(priv, fdir_filter);
2910         if (ret)
2911                 return ret;
2912         ret = priv_fdir_filter_add(priv, fdir_filter);
2913         return ret;
2914 }
2915
2916 /**
2917  * Flush all filters.
2918  *
2919  * @param priv
2920  *   Private structure.
2921  */
2922 static void
2923 priv_fdir_filter_flush(struct priv *priv)
2924 {
2925         priv_flow_flush(priv, &priv->flows);
2926 }
2927
2928 /**
2929  * Get flow director information.
2930  *
2931  * @param priv
2932  *   Private structure.
2933  * @param[out] fdir_info
2934  *   Resulting flow director information.
2935  */
2936 static void
2937 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2938 {
2939         struct rte_eth_fdir_masks *mask =
2940                 &priv->dev->data->dev_conf.fdir_conf.mask;
2941
2942         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2943         fdir_info->guarant_spc = 0;
2944         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2945         fdir_info->max_flexpayload = 0;
2946         fdir_info->flow_types_mask[0] = 0;
2947         fdir_info->flex_payload_unit = 0;
2948         fdir_info->max_flex_payload_segment_num = 0;
2949         fdir_info->flex_payload_limit = 0;
2950         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2951 }
2952
2953 /**
2954  * Deal with flow director operations.
2955  *
2956  * @param priv
2957  *   Pointer to private structure.
2958  * @param filter_op
2959  *   Operation to perform.
2960  * @param arg
2961  *   Pointer to operation-specific structure.
2962  *
2963  * @return
2964  *   0 on success, errno value on failure.
2965  */
2966 static int
2967 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2968 {
2969         enum rte_fdir_mode fdir_mode =
2970                 priv->dev->data->dev_conf.fdir_conf.mode;
2971         int ret = 0;
2972
2973         if (filter_op == RTE_ETH_FILTER_NOP)
2974                 return 0;
2975         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2976             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2977                 ERROR("%p: flow director mode %d not supported",
2978                       (void *)priv, fdir_mode);
2979                 return EINVAL;
2980         }
2981         switch (filter_op) {
2982         case RTE_ETH_FILTER_ADD:
2983                 ret = priv_fdir_filter_add(priv, arg);
2984                 break;
2985         case RTE_ETH_FILTER_UPDATE:
2986                 ret = priv_fdir_filter_update(priv, arg);
2987                 break;
2988         case RTE_ETH_FILTER_DELETE:
2989                 ret = priv_fdir_filter_delete(priv, arg);
2990                 break;
2991         case RTE_ETH_FILTER_FLUSH:
2992                 priv_fdir_filter_flush(priv);
2993                 break;
2994         case RTE_ETH_FILTER_INFO:
2995                 priv_fdir_info_get(priv, arg);
2996                 break;
2997         default:
2998                 DEBUG("%p: unknown operation %u", (void *)priv,
2999                       filter_op);
3000                 ret = EINVAL;
3001                 break;
3002         }
3003         return ret;
3004 }
3005
3006 /**
3007  * Manage filter operations.
3008  *
3009  * @param dev
3010  *   Pointer to Ethernet device structure.
3011  * @param filter_type
3012  *   Filter type.
3013  * @param filter_op
3014  *   Operation to perform.
3015  * @param arg
3016  *   Pointer to operation-specific structure.
3017  *
3018  * @return
3019  *   0 on success, negative errno value on failure.
3020  */
3021 int
3022 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3023                      enum rte_filter_type filter_type,
3024                      enum rte_filter_op filter_op,
3025                      void *arg)
3026 {
3027         int ret = EINVAL;
3028         struct priv *priv = dev->data->dev_private;
3029
3030         switch (filter_type) {
3031         case RTE_ETH_FILTER_GENERIC:
3032                 if (filter_op != RTE_ETH_FILTER_GET)
3033                         return -EINVAL;
3034                 *(const void **)arg = &mlx5_flow_ops;
3035                 return 0;
3036         case RTE_ETH_FILTER_FDIR:
3037                 priv_lock(priv);
3038                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3039                 priv_unlock(priv);
3040                 break;
3041         default:
3042                 ERROR("%p: filter type (%d) not supported",
3043                       (void *)dev, filter_type);
3044                 break;
3045         }
3046         return -ret;
3047 }