86be9291c9eeb8487c53dbf7dac202d38ddecfb8
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Number of Work Queue necessary for the DROP queue. */
56 #ifndef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
57 #define MLX5_DROP_WQ_N 4
58 #else
59 #define MLX5_DROP_WQ_N 1
60 #endif
61
62 static int
63 mlx5_flow_create_eth(const struct rte_flow_item *item,
64                      const void *default_mask,
65                      void *data);
66
67 static int
68 mlx5_flow_create_vlan(const struct rte_flow_item *item,
69                       const void *default_mask,
70                       void *data);
71
72 static int
73 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
74                       const void *default_mask,
75                       void *data);
76
77 static int
78 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
79                       const void *default_mask,
80                       void *data);
81
82 static int
83 mlx5_flow_create_udp(const struct rte_flow_item *item,
84                      const void *default_mask,
85                      void *data);
86
87 static int
88 mlx5_flow_create_tcp(const struct rte_flow_item *item,
89                      const void *default_mask,
90                      void *data);
91
92 static int
93 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
94                        const void *default_mask,
95                        void *data);
96
97 struct rte_flow {
98         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
99         struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
100         struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
101         struct ibv_qp *qp; /**< Verbs queue pair. */
102         struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
103         struct ibv_exp_wq *wq; /**< Verbs work queue. */
104         struct ibv_cq *cq; /**< Verbs completion queue. */
105         uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
106         uint32_t mark:1; /**< Set if the flow is marked. */
107         uint32_t drop:1; /**< Drop queue. */
108         uint64_t hash_fields; /**< Fields that participate in the hash. */
109         struct rxq *rxqs[]; /**< Pointer to the queues array. */
110 };
111
112 /** Static initializer for items. */
113 #define ITEMS(...) \
114         (const enum rte_flow_item_type []){ \
115                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
116         }
117
118 /** Structure to generate a simple graph of layers supported by the NIC. */
119 struct mlx5_flow_items {
120         /** List of possible actions for these items. */
121         const enum rte_flow_action_type *const actions;
122         /** Bit-masks corresponding to the possibilities for the item. */
123         const void *mask;
124         /**
125          * Default bit-masks to use when item->mask is not provided. When
126          * \default_mask is also NULL, the full supported bit-mask (\mask) is
127          * used instead.
128          */
129         const void *default_mask;
130         /** Bit-masks size in bytes. */
131         const unsigned int mask_sz;
132         /**
133          * Conversion function from rte_flow to NIC specific flow.
134          *
135          * @param item
136          *   rte_flow item to convert.
137          * @param default_mask
138          *   Default bit-masks to use when item->mask is not provided.
139          * @param data
140          *   Internal structure to store the conversion.
141          *
142          * @return
143          *   0 on success, negative value otherwise.
144          */
145         int (*convert)(const struct rte_flow_item *item,
146                        const void *default_mask,
147                        void *data);
148         /** Size in bytes of the destination structure. */
149         const unsigned int dst_sz;
150         /** List of possible following items.  */
151         const enum rte_flow_item_type *const items;
152 };
153
154 /** Valid action for this PMD. */
155 static const enum rte_flow_action_type valid_actions[] = {
156         RTE_FLOW_ACTION_TYPE_DROP,
157         RTE_FLOW_ACTION_TYPE_QUEUE,
158         RTE_FLOW_ACTION_TYPE_MARK,
159         RTE_FLOW_ACTION_TYPE_FLAG,
160         RTE_FLOW_ACTION_TYPE_END,
161 };
162
163 /** Graph of supported items and associated actions. */
164 static const struct mlx5_flow_items mlx5_flow_items[] = {
165         [RTE_FLOW_ITEM_TYPE_END] = {
166                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
167                                RTE_FLOW_ITEM_TYPE_VXLAN),
168         },
169         [RTE_FLOW_ITEM_TYPE_ETH] = {
170                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
171                                RTE_FLOW_ITEM_TYPE_IPV4,
172                                RTE_FLOW_ITEM_TYPE_IPV6),
173                 .actions = valid_actions,
174                 .mask = &(const struct rte_flow_item_eth){
175                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
176                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
177                         .type = -1,
178                 },
179                 .default_mask = &rte_flow_item_eth_mask,
180                 .mask_sz = sizeof(struct rte_flow_item_eth),
181                 .convert = mlx5_flow_create_eth,
182                 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
183         },
184         [RTE_FLOW_ITEM_TYPE_VLAN] = {
185                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
186                                RTE_FLOW_ITEM_TYPE_IPV6),
187                 .actions = valid_actions,
188                 .mask = &(const struct rte_flow_item_vlan){
189                         .tci = -1,
190                 },
191                 .default_mask = &rte_flow_item_vlan_mask,
192                 .mask_sz = sizeof(struct rte_flow_item_vlan),
193                 .convert = mlx5_flow_create_vlan,
194                 .dst_sz = 0,
195         },
196         [RTE_FLOW_ITEM_TYPE_IPV4] = {
197                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
198                                RTE_FLOW_ITEM_TYPE_TCP),
199                 .actions = valid_actions,
200                 .mask = &(const struct rte_flow_item_ipv4){
201                         .hdr = {
202                                 .src_addr = -1,
203                                 .dst_addr = -1,
204                                 .type_of_service = -1,
205                                 .next_proto_id = -1,
206                         },
207                 },
208                 .default_mask = &rte_flow_item_ipv4_mask,
209                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
210                 .convert = mlx5_flow_create_ipv4,
211                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
212         },
213         [RTE_FLOW_ITEM_TYPE_IPV6] = {
214                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
215                                RTE_FLOW_ITEM_TYPE_TCP),
216                 .actions = valid_actions,
217                 .mask = &(const struct rte_flow_item_ipv6){
218                         .hdr = {
219                                 .src_addr = {
220                                         0xff, 0xff, 0xff, 0xff,
221                                         0xff, 0xff, 0xff, 0xff,
222                                         0xff, 0xff, 0xff, 0xff,
223                                         0xff, 0xff, 0xff, 0xff,
224                                 },
225                                 .dst_addr = {
226                                         0xff, 0xff, 0xff, 0xff,
227                                         0xff, 0xff, 0xff, 0xff,
228                                         0xff, 0xff, 0xff, 0xff,
229                                         0xff, 0xff, 0xff, 0xff,
230                                 },
231                                 .vtc_flow = -1,
232                                 .proto = -1,
233                                 .hop_limits = -1,
234                         },
235                 },
236                 .default_mask = &rte_flow_item_ipv6_mask,
237                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
238                 .convert = mlx5_flow_create_ipv6,
239                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
240         },
241         [RTE_FLOW_ITEM_TYPE_UDP] = {
242                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
243                 .actions = valid_actions,
244                 .mask = &(const struct rte_flow_item_udp){
245                         .hdr = {
246                                 .src_port = -1,
247                                 .dst_port = -1,
248                         },
249                 },
250                 .default_mask = &rte_flow_item_udp_mask,
251                 .mask_sz = sizeof(struct rte_flow_item_udp),
252                 .convert = mlx5_flow_create_udp,
253                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
254         },
255         [RTE_FLOW_ITEM_TYPE_TCP] = {
256                 .actions = valid_actions,
257                 .mask = &(const struct rte_flow_item_tcp){
258                         .hdr = {
259                                 .src_port = -1,
260                                 .dst_port = -1,
261                         },
262                 },
263                 .default_mask = &rte_flow_item_tcp_mask,
264                 .mask_sz = sizeof(struct rte_flow_item_tcp),
265                 .convert = mlx5_flow_create_tcp,
266                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
267         },
268         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
269                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
270                 .actions = valid_actions,
271                 .mask = &(const struct rte_flow_item_vxlan){
272                         .vni = "\xff\xff\xff",
273                 },
274                 .default_mask = &rte_flow_item_vxlan_mask,
275                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
276                 .convert = mlx5_flow_create_vxlan,
277                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
278         },
279 };
280
281 /** Structure to pass to the conversion function. */
282 struct mlx5_flow {
283         struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
284         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
285         uint32_t inner; /**< Set once VXLAN is encountered. */
286         uint64_t hash_fields; /**< Fields that participate in the hash. */
287 };
288
289 /** Structure for Drop queue. */
290 struct rte_flow_drop {
291         struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
292         struct ibv_qp *qp; /**< Verbs queue pair. */
293         struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
294         struct ibv_cq *cq; /**< Verbs completion queue. */
295 };
296
297 struct mlx5_flow_action {
298         uint32_t queue:1; /**< Target is a receive queue. */
299         uint32_t drop:1; /**< Target is a drop queue. */
300         uint32_t mark:1; /**< Mark is present in the flow. */
301         uint32_t mark_id; /**< Mark identifier. */
302         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
303         uint16_t queues_n; /**< Number of entries in queue[]. */
304 };
305
306 /**
307  * Check support for a given item.
308  *
309  * @param item[in]
310  *   Item specification.
311  * @param mask[in]
312  *   Bit-masks covering supported fields to compare with spec, last and mask in
313  *   \item.
314  * @param size
315  *   Bit-Mask size in bytes.
316  *
317  * @return
318  *   0 on success.
319  */
320 static int
321 mlx5_flow_item_validate(const struct rte_flow_item *item,
322                         const uint8_t *mask, unsigned int size)
323 {
324         int ret = 0;
325
326         if (!item->spec && (item->mask || item->last))
327                 return -1;
328         if (item->spec && !item->mask) {
329                 unsigned int i;
330                 const uint8_t *spec = item->spec;
331
332                 for (i = 0; i < size; ++i)
333                         if ((spec[i] | mask[i]) != mask[i])
334                                 return -1;
335         }
336         if (item->last && !item->mask) {
337                 unsigned int i;
338                 const uint8_t *spec = item->last;
339
340                 for (i = 0; i < size; ++i)
341                         if ((spec[i] | mask[i]) != mask[i])
342                                 return -1;
343         }
344         if (item->mask) {
345                 unsigned int i;
346                 const uint8_t *spec = item->mask;
347
348                 for (i = 0; i < size; ++i)
349                         if ((spec[i] | mask[i]) != mask[i])
350                                 return -1;
351         }
352         if (item->spec && item->last) {
353                 uint8_t spec[size];
354                 uint8_t last[size];
355                 const uint8_t *apply = mask;
356                 unsigned int i;
357
358                 if (item->mask)
359                         apply = item->mask;
360                 for (i = 0; i < size; ++i) {
361                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
362                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
363                 }
364                 ret = memcmp(spec, last, size);
365         }
366         return ret;
367 }
368
369 /**
370  * Validate a flow supported by the NIC.
371  *
372  * @param priv
373  *   Pointer to private structure.
374  * @param[in] attr
375  *   Flow rule attributes.
376  * @param[in] pattern
377  *   Pattern specification (list terminated by the END pattern item).
378  * @param[in] actions
379  *   Associated actions (list terminated by the END action).
380  * @param[out] error
381  *   Perform verbose error reporting if not NULL.
382  * @param[in, out] flow
383  *   Flow structure to update.
384  * @param[in, out] action
385  *   Action structure to update.
386  *
387  * @return
388  *   0 on success, a negative errno value otherwise and rte_errno is set.
389  */
390 static int
391 priv_flow_validate(struct priv *priv,
392                    const struct rte_flow_attr *attr,
393                    const struct rte_flow_item items[],
394                    const struct rte_flow_action actions[],
395                    struct rte_flow_error *error,
396                    struct mlx5_flow *flow,
397                    struct mlx5_flow_action *action)
398 {
399         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
400
401         (void)priv;
402         if (attr->group) {
403                 rte_flow_error_set(error, ENOTSUP,
404                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
405                                    NULL,
406                                    "groups are not supported");
407                 return -rte_errno;
408         }
409         if (attr->priority) {
410                 rte_flow_error_set(error, ENOTSUP,
411                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
412                                    NULL,
413                                    "priorities are not supported");
414                 return -rte_errno;
415         }
416         if (attr->egress) {
417                 rte_flow_error_set(error, ENOTSUP,
418                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
419                                    NULL,
420                                    "egress is not supported");
421                 return -rte_errno;
422         }
423         if (!attr->ingress) {
424                 rte_flow_error_set(error, ENOTSUP,
425                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
426                                    NULL,
427                                    "only ingress is supported");
428                 return -rte_errno;
429         }
430         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
431                 const struct mlx5_flow_items *token = NULL;
432                 unsigned int i;
433                 int err;
434
435                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
436                         continue;
437                 for (i = 0;
438                      cur_item->items &&
439                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
440                      ++i) {
441                         if (cur_item->items[i] == items->type) {
442                                 token = &mlx5_flow_items[items->type];
443                                 break;
444                         }
445                 }
446                 if (!token)
447                         goto exit_item_not_supported;
448                 cur_item = token;
449                 err = mlx5_flow_item_validate(items,
450                                               (const uint8_t *)cur_item->mask,
451                                               cur_item->mask_sz);
452                 if (err)
453                         goto exit_item_not_supported;
454                 if (flow->ibv_attr && cur_item->convert) {
455                         err = cur_item->convert(items,
456                                                 (cur_item->default_mask ?
457                                                  cur_item->default_mask :
458                                                  cur_item->mask),
459                                                 flow);
460                         if (err)
461                                 goto exit_item_not_supported;
462                 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
463                         if (flow->inner) {
464                                 rte_flow_error_set(error, ENOTSUP,
465                                                    RTE_FLOW_ERROR_TYPE_ITEM,
466                                                    items,
467                                                    "cannot recognize multiple"
468                                                    " VXLAN encapsulations");
469                                 return -rte_errno;
470                         }
471                         flow->inner = 1;
472                 }
473                 flow->offset += cur_item->dst_sz;
474         }
475         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
476                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
477                         continue;
478                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
479                         action->drop = 1;
480                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
481                         const struct rte_flow_action_queue *queue =
482                                 (const struct rte_flow_action_queue *)
483                                 actions->conf;
484                         uint16_t n;
485                         uint16_t found = 0;
486
487                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
488                                 goto exit_action_not_supported;
489                         for (n = 0; n < action->queues_n; ++n) {
490                                 if (action->queues[n] == queue->index) {
491                                         found = 1;
492                                         break;
493                                 }
494                         }
495                         if (action->queues_n > 1 && !found) {
496                                 rte_flow_error_set(error, ENOTSUP,
497                                            RTE_FLOW_ERROR_TYPE_ACTION,
498                                            actions,
499                                            "queue action not in RSS queues");
500                                 return -rte_errno;
501                         }
502                         if (!found) {
503                                 action->queue = 1;
504                                 action->queues_n = 1;
505                                 action->queues[0] = queue->index;
506                         }
507                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
508                         const struct rte_flow_action_rss *rss =
509                                 (const struct rte_flow_action_rss *)
510                                 actions->conf;
511                         uint16_t n;
512
513                         if (!rss || !rss->num) {
514                                 rte_flow_error_set(error, EINVAL,
515                                                    RTE_FLOW_ERROR_TYPE_ACTION,
516                                                    actions,
517                                                    "no valid queues");
518                                 return -rte_errno;
519                         }
520                         if (action->queues_n == 1) {
521                                 uint16_t found = 0;
522
523                                 assert(action->queues_n);
524                                 for (n = 0; n < rss->num; ++n) {
525                                         if (action->queues[0] ==
526                                             rss->queue[n]) {
527                                                 found = 1;
528                                                 break;
529                                         }
530                                 }
531                                 if (!found) {
532                                         rte_flow_error_set(error, ENOTSUP,
533                                                    RTE_FLOW_ERROR_TYPE_ACTION,
534                                                    actions,
535                                                    "queue action not in RSS"
536                                                    " queues");
537                                         return -rte_errno;
538                                 }
539                         }
540                         for (n = 0; n < rss->num; ++n) {
541                                 if (rss->queue[n] >= priv->rxqs_n) {
542                                         rte_flow_error_set(error, EINVAL,
543                                                    RTE_FLOW_ERROR_TYPE_ACTION,
544                                                    actions,
545                                                    "queue id > number of"
546                                                    " queues");
547                                         return -rte_errno;
548                                 }
549                         }
550                         action->queue = 1;
551                         for (n = 0; n < rss->num; ++n)
552                                 action->queues[n] = rss->queue[n];
553                         action->queues_n = rss->num;
554                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
555                         const struct rte_flow_action_mark *mark =
556                                 (const struct rte_flow_action_mark *)
557                                 actions->conf;
558
559                         if (!mark) {
560                                 rte_flow_error_set(error, EINVAL,
561                                                    RTE_FLOW_ERROR_TYPE_ACTION,
562                                                    actions,
563                                                    "mark must be defined");
564                                 return -rte_errno;
565                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
566                                 rte_flow_error_set(error, ENOTSUP,
567                                                    RTE_FLOW_ERROR_TYPE_ACTION,
568                                                    actions,
569                                                    "mark must be between 0"
570                                                    " and 16777199");
571                                 return -rte_errno;
572                         }
573                         action->mark = 1;
574                         action->mark_id = mark->id;
575                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
576                         action->mark = 1;
577                 } else {
578                         goto exit_action_not_supported;
579                 }
580         }
581         if (action->mark && !flow->ibv_attr && !action->drop)
582                 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
583 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
584         if (!flow->ibv_attr && action->drop)
585                 flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
586 #endif
587         if (!action->queue && !action->drop) {
588                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
589                                    NULL, "no valid action");
590                 return -rte_errno;
591         }
592         return 0;
593 exit_item_not_supported:
594         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
595                            items, "item not supported");
596         return -rte_errno;
597 exit_action_not_supported:
598         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
599                            actions, "action not supported");
600         return -rte_errno;
601 }
602
603 /**
604  * Validate a flow supported by the NIC.
605  *
606  * @see rte_flow_validate()
607  * @see rte_flow_ops
608  */
609 int
610 mlx5_flow_validate(struct rte_eth_dev *dev,
611                    const struct rte_flow_attr *attr,
612                    const struct rte_flow_item items[],
613                    const struct rte_flow_action actions[],
614                    struct rte_flow_error *error)
615 {
616         struct priv *priv = dev->data->dev_private;
617         int ret;
618         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
619         struct mlx5_flow_action action = {
620                 .queue = 0,
621                 .drop = 0,
622                 .mark = 0,
623                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
624                 .queues_n = 0,
625         };
626
627         priv_lock(priv);
628         ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
629                                  &action);
630         priv_unlock(priv);
631         return ret;
632 }
633
634 /**
635  * Convert Ethernet item to Verbs specification.
636  *
637  * @param item[in]
638  *   Item specification.
639  * @param default_mask[in]
640  *   Default bit-masks to use when item->mask is not provided.
641  * @param data[in, out]
642  *   User structure.
643  */
644 static int
645 mlx5_flow_create_eth(const struct rte_flow_item *item,
646                      const void *default_mask,
647                      void *data)
648 {
649         const struct rte_flow_item_eth *spec = item->spec;
650         const struct rte_flow_item_eth *mask = item->mask;
651         struct mlx5_flow *flow = (struct mlx5_flow *)data;
652         struct ibv_exp_flow_spec_eth *eth;
653         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
654         unsigned int i;
655
656         ++flow->ibv_attr->num_of_specs;
657         flow->ibv_attr->priority = 2;
658         flow->hash_fields = 0;
659         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
660         *eth = (struct ibv_exp_flow_spec_eth) {
661                 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
662                 .size = eth_size,
663         };
664         if (!spec)
665                 return 0;
666         if (!mask)
667                 mask = default_mask;
668         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
669         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
670         eth->val.ether_type = spec->type;
671         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
672         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
673         eth->mask.ether_type = mask->type;
674         /* Remove unwanted bits from values. */
675         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
676                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
677                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
678         }
679         eth->val.ether_type &= eth->mask.ether_type;
680         return 0;
681 }
682
683 /**
684  * Convert VLAN item to Verbs specification.
685  *
686  * @param item[in]
687  *   Item specification.
688  * @param default_mask[in]
689  *   Default bit-masks to use when item->mask is not provided.
690  * @param data[in, out]
691  *   User structure.
692  */
693 static int
694 mlx5_flow_create_vlan(const struct rte_flow_item *item,
695                       const void *default_mask,
696                       void *data)
697 {
698         const struct rte_flow_item_vlan *spec = item->spec;
699         const struct rte_flow_item_vlan *mask = item->mask;
700         struct mlx5_flow *flow = (struct mlx5_flow *)data;
701         struct ibv_exp_flow_spec_eth *eth;
702         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
703
704         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
705         if (!spec)
706                 return 0;
707         if (!mask)
708                 mask = default_mask;
709         eth->val.vlan_tag = spec->tci;
710         eth->mask.vlan_tag = mask->tci;
711         eth->val.vlan_tag &= eth->mask.vlan_tag;
712         return 0;
713 }
714
715 /**
716  * Convert IPv4 item to Verbs specification.
717  *
718  * @param item[in]
719  *   Item specification.
720  * @param default_mask[in]
721  *   Default bit-masks to use when item->mask is not provided.
722  * @param data[in, out]
723  *   User structure.
724  */
725 static int
726 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
727                       const void *default_mask,
728                       void *data)
729 {
730         const struct rte_flow_item_ipv4 *spec = item->spec;
731         const struct rte_flow_item_ipv4 *mask = item->mask;
732         struct mlx5_flow *flow = (struct mlx5_flow *)data;
733         struct ibv_exp_flow_spec_ipv4_ext *ipv4;
734         unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
735
736         ++flow->ibv_attr->num_of_specs;
737         flow->ibv_attr->priority = 1;
738         flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
739                              IBV_EXP_RX_HASH_DST_IPV4);
740         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
741         *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
742                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
743                 .size = ipv4_size,
744         };
745         if (!spec)
746                 return 0;
747         if (!mask)
748                 mask = default_mask;
749         ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
750                 .src_ip = spec->hdr.src_addr,
751                 .dst_ip = spec->hdr.dst_addr,
752                 .proto = spec->hdr.next_proto_id,
753                 .tos = spec->hdr.type_of_service,
754         };
755         ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
756                 .src_ip = mask->hdr.src_addr,
757                 .dst_ip = mask->hdr.dst_addr,
758                 .proto = mask->hdr.next_proto_id,
759                 .tos = mask->hdr.type_of_service,
760         };
761         /* Remove unwanted bits from values. */
762         ipv4->val.src_ip &= ipv4->mask.src_ip;
763         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
764         ipv4->val.proto &= ipv4->mask.proto;
765         ipv4->val.tos &= ipv4->mask.tos;
766         return 0;
767 }
768
769 /**
770  * Convert IPv6 item to Verbs specification.
771  *
772  * @param item[in]
773  *   Item specification.
774  * @param default_mask[in]
775  *   Default bit-masks to use when item->mask is not provided.
776  * @param data[in, out]
777  *   User structure.
778  */
779 static int
780 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
781                       const void *default_mask,
782                       void *data)
783 {
784         const struct rte_flow_item_ipv6 *spec = item->spec;
785         const struct rte_flow_item_ipv6 *mask = item->mask;
786         struct mlx5_flow *flow = (struct mlx5_flow *)data;
787         struct ibv_exp_flow_spec_ipv6_ext *ipv6;
788         unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
789         unsigned int i;
790
791         ++flow->ibv_attr->num_of_specs;
792         flow->ibv_attr->priority = 1;
793         flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
794                              IBV_EXP_RX_HASH_DST_IPV6);
795         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
796         *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
797                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
798                 .size = ipv6_size,
799         };
800         if (!spec)
801                 return 0;
802         if (!mask)
803                 mask = default_mask;
804         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
805                RTE_DIM(ipv6->val.src_ip));
806         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
807                RTE_DIM(ipv6->val.dst_ip));
808         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
809                RTE_DIM(ipv6->mask.src_ip));
810         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
811                RTE_DIM(ipv6->mask.dst_ip));
812         ipv6->mask.flow_label = mask->hdr.vtc_flow;
813         ipv6->mask.next_hdr = mask->hdr.proto;
814         ipv6->mask.hop_limit = mask->hdr.hop_limits;
815         /* Remove unwanted bits from values. */
816         for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
817                 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
818                 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
819         }
820         ipv6->val.flow_label &= ipv6->mask.flow_label;
821         ipv6->val.next_hdr &= ipv6->mask.next_hdr;
822         ipv6->val.hop_limit &= ipv6->mask.hop_limit;
823         return 0;
824 }
825
826 /**
827  * Convert UDP item to Verbs specification.
828  *
829  * @param item[in]
830  *   Item specification.
831  * @param default_mask[in]
832  *   Default bit-masks to use when item->mask is not provided.
833  * @param data[in, out]
834  *   User structure.
835  */
836 static int
837 mlx5_flow_create_udp(const struct rte_flow_item *item,
838                      const void *default_mask,
839                      void *data)
840 {
841         const struct rte_flow_item_udp *spec = item->spec;
842         const struct rte_flow_item_udp *mask = item->mask;
843         struct mlx5_flow *flow = (struct mlx5_flow *)data;
844         struct ibv_exp_flow_spec_tcp_udp *udp;
845         unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
846
847         ++flow->ibv_attr->num_of_specs;
848         flow->ibv_attr->priority = 0;
849         flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
850                               IBV_EXP_RX_HASH_DST_PORT_UDP);
851         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
852         *udp = (struct ibv_exp_flow_spec_tcp_udp) {
853                 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
854                 .size = udp_size,
855         };
856         if (!spec)
857                 return 0;
858         if (!mask)
859                 mask = default_mask;
860         udp->val.dst_port = spec->hdr.dst_port;
861         udp->val.src_port = spec->hdr.src_port;
862         udp->mask.dst_port = mask->hdr.dst_port;
863         udp->mask.src_port = mask->hdr.src_port;
864         /* Remove unwanted bits from values. */
865         udp->val.src_port &= udp->mask.src_port;
866         udp->val.dst_port &= udp->mask.dst_port;
867         return 0;
868 }
869
870 /**
871  * Convert TCP item to Verbs specification.
872  *
873  * @param item[in]
874  *   Item specification.
875  * @param default_mask[in]
876  *   Default bit-masks to use when item->mask is not provided.
877  * @param data[in, out]
878  *   User structure.
879  */
880 static int
881 mlx5_flow_create_tcp(const struct rte_flow_item *item,
882                      const void *default_mask,
883                      void *data)
884 {
885         const struct rte_flow_item_tcp *spec = item->spec;
886         const struct rte_flow_item_tcp *mask = item->mask;
887         struct mlx5_flow *flow = (struct mlx5_flow *)data;
888         struct ibv_exp_flow_spec_tcp_udp *tcp;
889         unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
890
891         ++flow->ibv_attr->num_of_specs;
892         flow->ibv_attr->priority = 0;
893         flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
894                               IBV_EXP_RX_HASH_DST_PORT_TCP);
895         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
896         *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
897                 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
898                 .size = tcp_size,
899         };
900         if (!spec)
901                 return 0;
902         if (!mask)
903                 mask = default_mask;
904         tcp->val.dst_port = spec->hdr.dst_port;
905         tcp->val.src_port = spec->hdr.src_port;
906         tcp->mask.dst_port = mask->hdr.dst_port;
907         tcp->mask.src_port = mask->hdr.src_port;
908         /* Remove unwanted bits from values. */
909         tcp->val.src_port &= tcp->mask.src_port;
910         tcp->val.dst_port &= tcp->mask.dst_port;
911         return 0;
912 }
913
914 /**
915  * Convert VXLAN item to Verbs specification.
916  *
917  * @param item[in]
918  *   Item specification.
919  * @param default_mask[in]
920  *   Default bit-masks to use when item->mask is not provided.
921  * @param data[in, out]
922  *   User structure.
923  */
924 static int
925 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
926                        const void *default_mask,
927                        void *data)
928 {
929         const struct rte_flow_item_vxlan *spec = item->spec;
930         const struct rte_flow_item_vxlan *mask = item->mask;
931         struct mlx5_flow *flow = (struct mlx5_flow *)data;
932         struct ibv_exp_flow_spec_tunnel *vxlan;
933         unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
934         union vni {
935                 uint32_t vlan_id;
936                 uint8_t vni[4];
937         } id;
938
939         ++flow->ibv_attr->num_of_specs;
940         flow->ibv_attr->priority = 0;
941         id.vni[0] = 0;
942         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
943         *vxlan = (struct ibv_exp_flow_spec_tunnel) {
944                 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
945                 .size = size,
946         };
947         flow->inner = IBV_EXP_FLOW_SPEC_INNER;
948         if (!spec)
949                 return 0;
950         if (!mask)
951                 mask = default_mask;
952         memcpy(&id.vni[1], spec->vni, 3);
953         vxlan->val.tunnel_id = id.vlan_id;
954         memcpy(&id.vni[1], mask->vni, 3);
955         vxlan->mask.tunnel_id = id.vlan_id;
956         /* Remove unwanted bits from values. */
957         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
958         return 0;
959 }
960
961 /**
962  * Convert mark/flag action to Verbs specification.
963  *
964  * @param flow
965  *   Pointer to MLX5 flow structure.
966  * @param mark_id
967  *   Mark identifier.
968  */
969 static int
970 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
971 {
972         struct ibv_exp_flow_spec_action_tag *tag;
973         unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
974
975         tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
976         *tag = (struct ibv_exp_flow_spec_action_tag){
977                 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
978                 .size = size,
979                 .tag_id = mlx5_flow_mark_set(mark_id),
980         };
981         ++flow->ibv_attr->num_of_specs;
982         return 0;
983 }
984
985 /**
986  * Complete flow rule creation with a drop queue.
987  *
988  * @param priv
989  *   Pointer to private structure.
990  * @param flow
991  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
992  * @param[out] error
993  *   Perform verbose error reporting if not NULL.
994  *
995  * @return
996  *   A flow if the rule could be created.
997  */
998 static struct rte_flow *
999 priv_flow_create_action_queue_drop(struct priv *priv,
1000                                    struct mlx5_flow *flow,
1001                                    struct rte_flow_error *error)
1002 {
1003         struct rte_flow *rte_flow;
1004 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
1005         struct ibv_exp_flow_spec_action_drop *drop;
1006         unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
1007 #endif
1008
1009         assert(priv->pd);
1010         assert(priv->ctx);
1011         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1012         if (!rte_flow) {
1013                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1014                                    NULL, "cannot allocate flow memory");
1015                 return NULL;
1016         }
1017         rte_flow->drop = 1;
1018 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
1019         drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1020         *drop = (struct ibv_exp_flow_spec_action_drop){
1021                         .type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
1022                         .size = size,
1023         };
1024         ++flow->ibv_attr->num_of_specs;
1025         flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
1026 #endif
1027         rte_flow->ibv_attr = flow->ibv_attr;
1028         if (!priv->started)
1029                 return rte_flow;
1030         rte_flow->qp = priv->flow_drop_queue->qp;
1031         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1032                                                  rte_flow->ibv_attr);
1033         if (!rte_flow->ibv_flow) {
1034                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1035                                    NULL, "flow rule creation failure");
1036                 goto error;
1037         }
1038         return rte_flow;
1039 error:
1040         assert(rte_flow);
1041         rte_free(rte_flow);
1042         return NULL;
1043 }
1044
1045 /**
1046  * Complete flow rule creation.
1047  *
1048  * @param priv
1049  *   Pointer to private structure.
1050  * @param flow
1051  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1052  * @param action
1053  *   Target action structure.
1054  * @param[out] error
1055  *   Perform verbose error reporting if not NULL.
1056  *
1057  * @return
1058  *   A flow if the rule could be created.
1059  */
1060 static struct rte_flow *
1061 priv_flow_create_action_queue(struct priv *priv,
1062                               struct mlx5_flow *flow,
1063                               struct mlx5_flow_action *action,
1064                               struct rte_flow_error *error)
1065 {
1066         struct rte_flow *rte_flow;
1067         unsigned int i;
1068         unsigned int j;
1069         const unsigned int wqs_n = 1 << log2above(action->queues_n);
1070         struct ibv_exp_wq *wqs[wqs_n];
1071
1072         assert(priv->pd);
1073         assert(priv->ctx);
1074         assert(!action->drop);
1075         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1076                               sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1077         if (!rte_flow) {
1078                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1079                                    NULL, "cannot allocate flow memory");
1080                 return NULL;
1081         }
1082         for (i = 0; i < action->queues_n; ++i) {
1083                 struct rxq_ctrl *rxq;
1084
1085                 rxq = container_of((*priv->rxqs)[action->queues[i]],
1086                                    struct rxq_ctrl, rxq);
1087                 wqs[i] = rxq->wq;
1088                 rte_flow->rxqs[i] = &rxq->rxq;
1089                 ++rte_flow->rxqs_n;
1090                 rxq->rxq.mark |= action->mark;
1091         }
1092         /* finalise indirection table. */
1093         for (j = 0; i < wqs_n; ++i, ++j) {
1094                 wqs[i] = wqs[j];
1095                 if (j == action->queues_n)
1096                         j = 0;
1097         }
1098         rte_flow->mark = action->mark;
1099         rte_flow->ibv_attr = flow->ibv_attr;
1100         rte_flow->hash_fields = flow->hash_fields;
1101         rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1102                 priv->ctx,
1103                 &(struct ibv_exp_rwq_ind_table_init_attr){
1104                         .pd = priv->pd,
1105                         .log_ind_tbl_size = log2above(action->queues_n),
1106                         .ind_tbl = wqs,
1107                         .comp_mask = 0,
1108                 });
1109         if (!rte_flow->ind_table) {
1110                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1111                                    NULL, "cannot allocate indirection table");
1112                 goto error;
1113         }
1114         rte_flow->qp = ibv_exp_create_qp(
1115                 priv->ctx,
1116                 &(struct ibv_exp_qp_init_attr){
1117                         .qp_type = IBV_QPT_RAW_PACKET,
1118                         .comp_mask =
1119                                 IBV_EXP_QP_INIT_ATTR_PD |
1120                                 IBV_EXP_QP_INIT_ATTR_PORT |
1121                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1122                         .pd = priv->pd,
1123                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1124                                 .rx_hash_function =
1125                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1126                                 .rx_hash_key_len = rss_hash_default_key_len,
1127                                 .rx_hash_key = rss_hash_default_key,
1128                                 .rx_hash_fields_mask = rte_flow->hash_fields,
1129                                 .rwq_ind_tbl = rte_flow->ind_table,
1130                         },
1131                         .port_num = priv->port,
1132                 });
1133         if (!rte_flow->qp) {
1134                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1135                                    NULL, "cannot allocate QP");
1136                 goto error;
1137         }
1138         if (!priv->started)
1139                 return rte_flow;
1140         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1141                                                  rte_flow->ibv_attr);
1142         if (!rte_flow->ibv_flow) {
1143                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1144                                    NULL, "flow rule creation failure");
1145                 goto error;
1146         }
1147         return rte_flow;
1148 error:
1149         assert(rte_flow);
1150         if (rte_flow->qp)
1151                 ibv_destroy_qp(rte_flow->qp);
1152         if (rte_flow->ind_table)
1153                 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1154         rte_free(rte_flow);
1155         return NULL;
1156 }
1157
1158 /**
1159  * Convert a flow.
1160  *
1161  * @param priv
1162  *   Pointer to private structure.
1163  * @param[in] attr
1164  *   Flow rule attributes.
1165  * @param[in] pattern
1166  *   Pattern specification (list terminated by the END pattern item).
1167  * @param[in] actions
1168  *   Associated actions (list terminated by the END action).
1169  * @param[out] error
1170  *   Perform verbose error reporting if not NULL.
1171  *
1172  * @return
1173  *   A flow on success, NULL otherwise.
1174  */
1175 static struct rte_flow *
1176 priv_flow_create(struct priv *priv,
1177                  const struct rte_flow_attr *attr,
1178                  const struct rte_flow_item items[],
1179                  const struct rte_flow_action actions[],
1180                  struct rte_flow_error *error)
1181 {
1182         struct rte_flow *rte_flow;
1183         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1184         struct mlx5_flow_action action = {
1185                 .queue = 0,
1186                 .drop = 0,
1187                 .mark = 0,
1188                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1189                 .queues_n = 0,
1190         };
1191         int err;
1192
1193         err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1194                                  &action);
1195         if (err)
1196                 goto exit;
1197         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1198         flow.offset = sizeof(struct ibv_exp_flow_attr);
1199         if (!flow.ibv_attr) {
1200                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1201                                    NULL, "cannot allocate ibv_attr memory");
1202                 goto exit;
1203         }
1204         *flow.ibv_attr = (struct ibv_exp_flow_attr){
1205                 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1206                 .size = sizeof(struct ibv_exp_flow_attr),
1207                 .priority = attr->priority,
1208                 .num_of_specs = 0,
1209                 .port = 0,
1210                 .flags = 0,
1211                 .reserved = 0,
1212         };
1213         flow.inner = 0;
1214         flow.hash_fields = 0;
1215         claim_zero(priv_flow_validate(priv, attr, items, actions,
1216                                       error, &flow, &action));
1217         if (action.mark && !action.drop) {
1218                 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1219                 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1220         }
1221         if (action.drop)
1222                 rte_flow =
1223                         priv_flow_create_action_queue_drop(priv, &flow, error);
1224         else
1225                 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1226                                                          error);
1227         if (!rte_flow)
1228                 goto exit;
1229         return rte_flow;
1230 exit:
1231         rte_free(flow.ibv_attr);
1232         return NULL;
1233 }
1234
1235 /**
1236  * Create a flow.
1237  *
1238  * @see rte_flow_create()
1239  * @see rte_flow_ops
1240  */
1241 struct rte_flow *
1242 mlx5_flow_create(struct rte_eth_dev *dev,
1243                  const struct rte_flow_attr *attr,
1244                  const struct rte_flow_item items[],
1245                  const struct rte_flow_action actions[],
1246                  struct rte_flow_error *error)
1247 {
1248         struct priv *priv = dev->data->dev_private;
1249         struct rte_flow *flow;
1250
1251         priv_lock(priv);
1252         flow = priv_flow_create(priv, attr, items, actions, error);
1253         if (flow) {
1254                 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1255                 DEBUG("Flow created %p", (void *)flow);
1256         }
1257         priv_unlock(priv);
1258         return flow;
1259 }
1260
1261 /**
1262  * Destroy a flow.
1263  *
1264  * @param priv
1265  *   Pointer to private structure.
1266  * @param[in] flow
1267  *   Flow to destroy.
1268  */
1269 static void
1270 priv_flow_destroy(struct priv *priv,
1271                   struct rte_flow *flow)
1272 {
1273         TAILQ_REMOVE(&priv->flows, flow, next);
1274         if (flow->ibv_flow)
1275                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1276         if (flow->drop)
1277                 goto free;
1278         if (flow->qp)
1279                 claim_zero(ibv_destroy_qp(flow->qp));
1280         if (flow->ind_table)
1281                 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1282         if (flow->drop && flow->wq)
1283                 claim_zero(ibv_exp_destroy_wq(flow->wq));
1284         if (flow->drop && flow->cq)
1285                 claim_zero(ibv_destroy_cq(flow->cq));
1286         if (flow->mark) {
1287                 struct rte_flow *tmp;
1288                 struct rxq *rxq;
1289                 uint32_t mark_n = 0;
1290                 uint32_t queue_n;
1291
1292                 /*
1293                  * To remove the mark from the queue, the queue must not be
1294                  * present in any other marked flow (RSS or not).
1295                  */
1296                 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1297                         rxq = flow->rxqs[queue_n];
1298                         for (tmp = TAILQ_FIRST(&priv->flows);
1299                              tmp;
1300                              tmp = TAILQ_NEXT(tmp, next)) {
1301                                 uint32_t tqueue_n;
1302
1303                                 if (tmp->drop)
1304                                         continue;
1305                                 for (tqueue_n = 0;
1306                                      tqueue_n < tmp->rxqs_n;
1307                                      ++tqueue_n) {
1308                                         struct rxq *trxq;
1309
1310                                         trxq = tmp->rxqs[tqueue_n];
1311                                         if (rxq == trxq)
1312                                                 ++mark_n;
1313                                 }
1314                         }
1315                         rxq->mark = !!mark_n;
1316                 }
1317         }
1318 free:
1319         rte_free(flow->ibv_attr);
1320         DEBUG("Flow destroyed %p", (void *)flow);
1321         rte_free(flow);
1322 }
1323
1324 /**
1325  * Destroy a flow.
1326  *
1327  * @see rte_flow_destroy()
1328  * @see rte_flow_ops
1329  */
1330 int
1331 mlx5_flow_destroy(struct rte_eth_dev *dev,
1332                   struct rte_flow *flow,
1333                   struct rte_flow_error *error)
1334 {
1335         struct priv *priv = dev->data->dev_private;
1336
1337         (void)error;
1338         priv_lock(priv);
1339         priv_flow_destroy(priv, flow);
1340         priv_unlock(priv);
1341         return 0;
1342 }
1343
1344 /**
1345  * Destroy all flows.
1346  *
1347  * @param priv
1348  *   Pointer to private structure.
1349  */
1350 static void
1351 priv_flow_flush(struct priv *priv)
1352 {
1353         while (!TAILQ_EMPTY(&priv->flows)) {
1354                 struct rte_flow *flow;
1355
1356                 flow = TAILQ_FIRST(&priv->flows);
1357                 priv_flow_destroy(priv, flow);
1358         }
1359 }
1360
1361 /**
1362  * Destroy all flows.
1363  *
1364  * @see rte_flow_flush()
1365  * @see rte_flow_ops
1366  */
1367 int
1368 mlx5_flow_flush(struct rte_eth_dev *dev,
1369                 struct rte_flow_error *error)
1370 {
1371         struct priv *priv = dev->data->dev_private;
1372
1373         (void)error;
1374         priv_lock(priv);
1375         priv_flow_flush(priv);
1376         priv_unlock(priv);
1377         return 0;
1378 }
1379
1380 /**
1381  * Create drop queue.
1382  *
1383  * @param priv
1384  *   Pointer to private structure.
1385  *
1386  * @return
1387  *   0 on success.
1388  */
1389 static int
1390 priv_flow_create_drop_queue(struct priv *priv)
1391 {
1392         struct rte_flow_drop *fdq = NULL;
1393         unsigned int i;
1394
1395         assert(priv->pd);
1396         assert(priv->ctx);
1397         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1398         if (!fdq) {
1399                 WARN("cannot allocate memory for drop queue");
1400                 goto error;
1401         }
1402         fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1403                         &(struct ibv_exp_cq_init_attr){
1404                         .comp_mask = 0,
1405                         });
1406         if (!fdq->cq) {
1407                 WARN("cannot allocate CQ for drop queue");
1408                 goto error;
1409         }
1410         for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1411                 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1412                                 &(struct ibv_exp_wq_init_attr){
1413                                 .wq_type = IBV_EXP_WQT_RQ,
1414                                 .max_recv_wr = 1,
1415                                 .max_recv_sge = 1,
1416                                 .pd = priv->pd,
1417                                 .cq = fdq->cq,
1418                                 });
1419                 if (!fdq->wqs[i]) {
1420                         WARN("cannot allocate WQ for drop queue");
1421                         goto error;
1422                 }
1423         }
1424         fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1425                         &(struct ibv_exp_rwq_ind_table_init_attr){
1426                         .pd = priv->pd,
1427                         .log_ind_tbl_size = 0,
1428                         .ind_tbl = fdq->wqs,
1429                         .comp_mask = 0,
1430                         });
1431         if (!fdq->ind_table) {
1432                 WARN("cannot allocate indirection table for drop queue");
1433                 goto error;
1434         }
1435         fdq->qp = ibv_exp_create_qp(priv->ctx,
1436                 &(struct ibv_exp_qp_init_attr){
1437                         .qp_type = IBV_QPT_RAW_PACKET,
1438                         .comp_mask =
1439                                 IBV_EXP_QP_INIT_ATTR_PD |
1440                                 IBV_EXP_QP_INIT_ATTR_PORT |
1441                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1442                         .pd = priv->pd,
1443                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1444                                 .rx_hash_function =
1445                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1446                                 .rx_hash_key_len = rss_hash_default_key_len,
1447                                 .rx_hash_key = rss_hash_default_key,
1448                                 .rx_hash_fields_mask = 0,
1449                                 .rwq_ind_tbl = fdq->ind_table,
1450                                 },
1451                         .port_num = priv->port,
1452                         });
1453         if (!fdq->qp) {
1454                 WARN("cannot allocate QP for drop queue");
1455                 goto error;
1456         }
1457         priv->flow_drop_queue = fdq;
1458         return 0;
1459 error:
1460         if (fdq->qp)
1461                 claim_zero(ibv_destroy_qp(fdq->qp));
1462         if (fdq->ind_table)
1463                 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1464         for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1465                 if (fdq->wqs[i])
1466                         claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1467         }
1468         if (fdq->cq)
1469                 claim_zero(ibv_destroy_cq(fdq->cq));
1470         if (fdq)
1471                 rte_free(fdq);
1472         priv->flow_drop_queue = NULL;
1473         return -1;
1474 }
1475
1476 /**
1477  * Delete drop queue.
1478  *
1479  * @param priv
1480  *   Pointer to private structure.
1481  */
1482 static void
1483 priv_flow_delete_drop_queue(struct priv *priv)
1484 {
1485         struct rte_flow_drop *fdq = priv->flow_drop_queue;
1486         unsigned int i;
1487
1488         if (!fdq)
1489                 return;
1490         if (fdq->qp)
1491                 claim_zero(ibv_destroy_qp(fdq->qp));
1492         if (fdq->ind_table)
1493                 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1494         for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1495                 if (fdq->wqs[i])
1496                         claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1497         }
1498         if (fdq->cq)
1499                 claim_zero(ibv_destroy_cq(fdq->cq));
1500         rte_free(fdq);
1501         priv->flow_drop_queue = NULL;
1502 }
1503
1504 /**
1505  * Remove all flows.
1506  *
1507  * Called by dev_stop() to remove all flows.
1508  *
1509  * @param priv
1510  *   Pointer to private structure.
1511  */
1512 void
1513 priv_flow_stop(struct priv *priv)
1514 {
1515         struct rte_flow *flow;
1516
1517         TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1518                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1519                 flow->ibv_flow = NULL;
1520                 if (flow->mark) {
1521                         unsigned int n;
1522
1523                         for (n = 0; n < flow->rxqs_n; ++n)
1524                                 flow->rxqs[n]->mark = 0;
1525                 }
1526                 DEBUG("Flow %p removed", (void *)flow);
1527         }
1528         priv_flow_delete_drop_queue(priv);
1529 }
1530
1531 /**
1532  * Add all flows.
1533  *
1534  * @param priv
1535  *   Pointer to private structure.
1536  *
1537  * @return
1538  *   0 on success, a errno value otherwise and rte_errno is set.
1539  */
1540 int
1541 priv_flow_start(struct priv *priv)
1542 {
1543         int ret;
1544         struct rte_flow *flow;
1545
1546         ret = priv_flow_create_drop_queue(priv);
1547         if (ret)
1548                 return -1;
1549         TAILQ_FOREACH(flow, &priv->flows, next) {
1550                 struct ibv_qp *qp;
1551
1552                 if (flow->drop)
1553                         qp = priv->flow_drop_queue->qp;
1554                 else
1555                         qp = flow->qp;
1556                 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1557                 if (!flow->ibv_flow) {
1558                         DEBUG("Flow %p cannot be applied", (void *)flow);
1559                         rte_errno = EINVAL;
1560                         return rte_errno;
1561                 }
1562                 DEBUG("Flow %p applied", (void *)flow);
1563                 if (flow->mark) {
1564                         unsigned int n;
1565
1566                         for (n = 0; n < flow->rxqs_n; ++n)
1567                                 flow->rxqs[n]->mark = 1;
1568                 }
1569         }
1570         return 0;
1571 }
1572
1573 /**
1574  * Verify if the Rx queue is used in a flow.
1575  *
1576  * @param priv
1577  *   Pointer to private structure.
1578  * @param rxq
1579  *   Pointer to the queue to search.
1580  *
1581  * @return
1582  *   Nonzero if the queue is used by a flow.
1583  */
1584 int
1585 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1586 {
1587         struct rte_flow *flow;
1588
1589         for (flow = TAILQ_FIRST(&priv->flows);
1590              flow;
1591              flow = TAILQ_NEXT(flow, next)) {
1592                 unsigned int n;
1593
1594                 if (flow->drop)
1595                         continue;
1596                 for (n = 0; n < flow->rxqs_n; ++n) {
1597                         if (flow->rxqs[n] == rxq)
1598                                 return 1;
1599                 }
1600         }
1601         return 0;
1602 }
1603
1604 /**
1605  * Isolated mode.
1606  *
1607  * @see rte_flow_isolate()
1608  * @see rte_flow_ops
1609  */
1610 int
1611 mlx5_flow_isolate(struct rte_eth_dev *dev,
1612                   int enable,
1613                   struct rte_flow_error *error)
1614 {
1615         struct priv *priv = dev->data->dev_private;
1616
1617         priv_lock(priv);
1618         if (priv->started) {
1619                 rte_flow_error_set(error, EBUSY,
1620                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1621                                    NULL,
1622                                    "port must be stopped first");
1623                 priv_unlock(priv);
1624                 return -rte_errno;
1625         }
1626         priv->isolated = !!enable;
1627         priv_unlock(priv);
1628         return 0;
1629 }