New upstream version 18.11-rc4
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 #define VERBS_SPEC_INNER(item_flags) \
37         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55                           struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58         struct priv *priv = dev->data->dev_private;
59         struct ibv_counter_set_init_attr init = {
60                          .counter_set_id = counter->id};
61
62         counter->cs = mlx5_glue->create_counter_set(priv->ctx, &init);
63         if (!counter->cs) {
64                 rte_errno = ENOTSUP;
65                 return -ENOTSUP;
66         }
67         return 0;
68 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
69         struct priv *priv = dev->data->dev_private;
70         struct ibv_counters_init_attr init = {0};
71         struct ibv_counter_attach_attr attach;
72         int ret;
73
74         memset(&attach, 0, sizeof(attach));
75         counter->cs = mlx5_glue->create_counters(priv->ctx, &init);
76         if (!counter->cs) {
77                 rte_errno = ENOTSUP;
78                 return -ENOTSUP;
79         }
80         attach.counter_desc = IBV_COUNTER_PACKETS;
81         attach.index = 0;
82         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
83         if (!ret) {
84                 attach.counter_desc = IBV_COUNTER_BYTES;
85                 attach.index = 1;
86                 ret = mlx5_glue->attach_counters
87                                         (counter->cs, &attach, NULL);
88         }
89         if (ret) {
90                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
91                 counter->cs = NULL;
92                 rte_errno = ret;
93                 return -ret;
94         }
95         return 0;
96 #else
97         (void)dev;
98         (void)counter;
99         rte_errno = ENOTSUP;
100         return -ENOTSUP;
101 #endif
102 }
103
104 /**
105  * Get a flow counter.
106  *
107  * @param[in] dev
108  *   Pointer to the Ethernet device structure.
109  * @param[in] shared
110  *   Indicate if this counter is shared with other flows.
111  * @param[in] id
112  *   Counter identifier.
113  *
114  * @return
115  *   A pointer to the counter, NULL otherwise and rte_errno is set.
116  */
117 static struct mlx5_flow_counter *
118 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
119 {
120         struct priv *priv = dev->data->dev_private;
121         struct mlx5_flow_counter *cnt;
122         int ret;
123
124         LIST_FOREACH(cnt, &priv->flow_counters, next) {
125                 if (!cnt->shared || cnt->shared != shared)
126                         continue;
127                 if (cnt->id != id)
128                         continue;
129                 cnt->ref_cnt++;
130                 return cnt;
131         }
132         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
133         if (!cnt) {
134                 rte_errno = ENOMEM;
135                 return NULL;
136         }
137         cnt->id = id;
138         cnt->shared = shared;
139         cnt->ref_cnt = 1;
140         cnt->hits = 0;
141         cnt->bytes = 0;
142         /* Create counter with Verbs. */
143         ret = flow_verbs_counter_create(dev, cnt);
144         if (!ret) {
145                 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
146                 return cnt;
147         }
148         /* Some error occurred in Verbs library. */
149         rte_free(cnt);
150         rte_errno = -ret;
151         return NULL;
152 }
153
154 /**
155  * Release a flow counter.
156  *
157  * @param[in] counter
158  *   Pointer to the counter handler.
159  */
160 static void
161 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
162 {
163         if (--counter->ref_cnt == 0) {
164 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
165                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
166 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
167                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
168 #endif
169                 LIST_REMOVE(counter, next);
170                 rte_free(counter);
171         }
172 }
173
174 /**
175  * Query a flow counter via Verbs library call.
176  *
177  * @see rte_flow_query()
178  * @see rte_flow_ops
179  */
180 static int
181 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
182                          struct rte_flow *flow, void *data,
183                          struct rte_flow_error *error)
184 {
185 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
186         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
187         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
188                 struct rte_flow_query_count *qc = data;
189                 uint64_t counters[2] = {0, 0};
190 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
191                 struct ibv_query_counter_set_attr query_cs_attr = {
192                         .cs = flow->counter->cs,
193                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
194                 };
195                 struct ibv_counter_set_data query_out = {
196                         .out = counters,
197                         .outlen = 2 * sizeof(uint64_t),
198                 };
199                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
200                                                        &query_out);
201 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
202                 int err = mlx5_glue->query_counters
203                                (flow->counter->cs, counters,
204                                 RTE_DIM(counters),
205                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
206 #endif
207                 if (err)
208                         return rte_flow_error_set
209                                 (error, err,
210                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
211                                  NULL,
212                                  "cannot read counter");
213                 qc->hits_set = 1;
214                 qc->bytes_set = 1;
215                 qc->hits = counters[0] - flow->counter->hits;
216                 qc->bytes = counters[1] - flow->counter->bytes;
217                 if (qc->reset) {
218                         flow->counter->hits = counters[0];
219                         flow->counter->bytes = counters[1];
220                 }
221                 return 0;
222         }
223         return rte_flow_error_set(error, EINVAL,
224                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
225                                   NULL,
226                                   "flow does not have counter");
227 #else
228         (void)flow;
229         (void)data;
230         return rte_flow_error_set(error, ENOTSUP,
231                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
232                                   NULL,
233                                   "counters are not available");
234 #endif
235 }
236
237 /**
238  * Add a verbs item specification into @p verbs.
239  *
240  * @param[out] verbs
241  *   Pointer to verbs structure.
242  * @param[in] src
243  *   Create specification.
244  * @param[in] size
245  *   Size in bytes of the specification to copy.
246  */
247 static void
248 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
249 {
250         void *dst;
251
252         if (!verbs)
253                 return;
254         assert(verbs->specs);
255         dst = (void *)(verbs->specs + verbs->size);
256         memcpy(dst, src, size);
257         ++verbs->attr->num_of_specs;
258         verbs->size += size;
259 }
260
261 /**
262  * Convert the @p item into a Verbs specification. This function assumes that
263  * the input is valid and that there is space to insert the requested item
264  * into the flow.
265  *
266  * @param[in, out] dev_flow
267  *   Pointer to dev_flow structure.
268  * @param[in] item
269  *   Item specification.
270  * @param[in] item_flags
271  *   Parsed item flags.
272  */
273 static void
274 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
275                               const struct rte_flow_item *item,
276                               uint64_t item_flags)
277 {
278         const struct rte_flow_item_eth *spec = item->spec;
279         const struct rte_flow_item_eth *mask = item->mask;
280         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
281         struct ibv_flow_spec_eth eth = {
282                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
283                 .size = size,
284         };
285
286         if (!mask)
287                 mask = &rte_flow_item_eth_mask;
288         if (spec) {
289                 unsigned int i;
290
291                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
292                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
293                 eth.val.ether_type = spec->type;
294                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
295                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
296                 eth.mask.ether_type = mask->type;
297                 /* Remove unwanted bits from values. */
298                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
299                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
300                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
301                 }
302                 eth.val.ether_type &= eth.mask.ether_type;
303         }
304         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
305 }
306
307 /**
308  * Update the VLAN tag in the Verbs Ethernet specification.
309  * This function assumes that the input is valid and there is space to add
310  * the requested item.
311  *
312  * @param[in, out] attr
313  *   Pointer to Verbs attributes structure.
314  * @param[in] eth
315  *   Verbs structure containing the VLAN information to copy.
316  */
317 static void
318 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
319                             struct ibv_flow_spec_eth *eth)
320 {
321         unsigned int i;
322         const enum ibv_flow_spec_type search = eth->type;
323         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
324                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
325
326         for (i = 0; i != attr->num_of_specs; ++i) {
327                 if (hdr->type == search) {
328                         struct ibv_flow_spec_eth *e =
329                                 (struct ibv_flow_spec_eth *)hdr;
330
331                         e->val.vlan_tag = eth->val.vlan_tag;
332                         e->mask.vlan_tag = eth->mask.vlan_tag;
333                         e->val.ether_type = eth->val.ether_type;
334                         e->mask.ether_type = eth->mask.ether_type;
335                         break;
336                 }
337                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
338         }
339 }
340
341 /**
342  * Convert the @p item into a Verbs specification. This function assumes that
343  * the input is valid and that there is space to insert the requested item
344  * into the flow.
345  *
346  * @param[in, out] dev_flow
347  *   Pointer to dev_flow structure.
348  * @param[in] item
349  *   Item specification.
350  * @param[in] item_flags
351  *   Parsed item flags.
352  */
353 static void
354 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
355                                const struct rte_flow_item *item,
356                                uint64_t item_flags)
357 {
358         const struct rte_flow_item_vlan *spec = item->spec;
359         const struct rte_flow_item_vlan *mask = item->mask;
360         unsigned int size = sizeof(struct ibv_flow_spec_eth);
361         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
362         struct ibv_flow_spec_eth eth = {
363                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
364                 .size = size,
365         };
366         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
367                                       MLX5_FLOW_LAYER_OUTER_L2;
368
369         if (!mask)
370                 mask = &rte_flow_item_vlan_mask;
371         if (spec) {
372                 eth.val.vlan_tag = spec->tci;
373                 eth.mask.vlan_tag = mask->tci;
374                 eth.val.vlan_tag &= eth.mask.vlan_tag;
375                 eth.val.ether_type = spec->inner_type;
376                 eth.mask.ether_type = mask->inner_type;
377                 eth.val.ether_type &= eth.mask.ether_type;
378         }
379         if (!(item_flags & l2m))
380                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
381         else
382                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
383 }
384
385 /**
386  * Convert the @p item into a Verbs specification. This function assumes that
387  * the input is valid and that there is space to insert the requested item
388  * into the flow.
389  *
390  * @param[in, out] dev_flow
391  *   Pointer to dev_flow structure.
392  * @param[in] item
393  *   Item specification.
394  * @param[in] item_flags
395  *   Parsed item flags.
396  */
397 static void
398 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
399                                const struct rte_flow_item *item,
400                                uint64_t item_flags)
401 {
402         const struct rte_flow_item_ipv4 *spec = item->spec;
403         const struct rte_flow_item_ipv4 *mask = item->mask;
404         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
405         struct ibv_flow_spec_ipv4_ext ipv4 = {
406                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
407                 .size = size,
408         };
409
410         if (!mask)
411                 mask = &rte_flow_item_ipv4_mask;
412         if (spec) {
413                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
414                         .src_ip = spec->hdr.src_addr,
415                         .dst_ip = spec->hdr.dst_addr,
416                         .proto = spec->hdr.next_proto_id,
417                         .tos = spec->hdr.type_of_service,
418                 };
419                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
420                         .src_ip = mask->hdr.src_addr,
421                         .dst_ip = mask->hdr.dst_addr,
422                         .proto = mask->hdr.next_proto_id,
423                         .tos = mask->hdr.type_of_service,
424                 };
425                 /* Remove unwanted bits from values. */
426                 ipv4.val.src_ip &= ipv4.mask.src_ip;
427                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
428                 ipv4.val.proto &= ipv4.mask.proto;
429                 ipv4.val.tos &= ipv4.mask.tos;
430         }
431         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
432 }
433
434 /**
435  * Convert the @p item into a Verbs specification. This function assumes that
436  * the input is valid and that there is space to insert the requested item
437  * into the flow.
438  *
439  * @param[in, out] dev_flow
440  *   Pointer to dev_flow structure.
441  * @param[in] item
442  *   Item specification.
443  * @param[in] item_flags
444  *   Parsed item flags.
445  */
446 static void
447 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
448                                const struct rte_flow_item *item,
449                                uint64_t item_flags)
450 {
451         const struct rte_flow_item_ipv6 *spec = item->spec;
452         const struct rte_flow_item_ipv6 *mask = item->mask;
453         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
454         struct ibv_flow_spec_ipv6 ipv6 = {
455                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
456                 .size = size,
457         };
458
459         if (!mask)
460                 mask = &rte_flow_item_ipv6_mask;
461         if (spec) {
462                 unsigned int i;
463                 uint32_t vtc_flow_val;
464                 uint32_t vtc_flow_mask;
465
466                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
467                        RTE_DIM(ipv6.val.src_ip));
468                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
469                        RTE_DIM(ipv6.val.dst_ip));
470                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
471                        RTE_DIM(ipv6.mask.src_ip));
472                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
473                        RTE_DIM(ipv6.mask.dst_ip));
474                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
475                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
476                 ipv6.val.flow_label =
477                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
478                                          IPV6_HDR_FL_SHIFT);
479                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
480                                          IPV6_HDR_TC_SHIFT;
481                 ipv6.val.next_hdr = spec->hdr.proto;
482                 ipv6.val.hop_limit = spec->hdr.hop_limits;
483                 ipv6.mask.flow_label =
484                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
485                                          IPV6_HDR_FL_SHIFT);
486                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
487                                           IPV6_HDR_TC_SHIFT;
488                 ipv6.mask.next_hdr = mask->hdr.proto;
489                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
490                 /* Remove unwanted bits from values. */
491                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
492                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
493                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
494                 }
495                 ipv6.val.flow_label &= ipv6.mask.flow_label;
496                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
497                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
498                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
499         }
500         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
501 }
502
503 /**
504  * Convert the @p item into a Verbs specification. This function assumes that
505  * the input is valid and that there is space to insert the requested item
506  * into the flow.
507  *
508  * @param[in, out] dev_flow
509  *   Pointer to dev_flow structure.
510  * @param[in] item
511  *   Item specification.
512  * @param[in] item_flags
513  *   Parsed item flags.
514  */
515 static void
516 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
517                               const struct rte_flow_item *item,
518                               uint64_t item_flags __rte_unused)
519 {
520         const struct rte_flow_item_tcp *spec = item->spec;
521         const struct rte_flow_item_tcp *mask = item->mask;
522         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
523         struct ibv_flow_spec_tcp_udp tcp = {
524                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
525                 .size = size,
526         };
527
528         if (!mask)
529                 mask = &rte_flow_item_tcp_mask;
530         if (spec) {
531                 tcp.val.dst_port = spec->hdr.dst_port;
532                 tcp.val.src_port = spec->hdr.src_port;
533                 tcp.mask.dst_port = mask->hdr.dst_port;
534                 tcp.mask.src_port = mask->hdr.src_port;
535                 /* Remove unwanted bits from values. */
536                 tcp.val.src_port &= tcp.mask.src_port;
537                 tcp.val.dst_port &= tcp.mask.dst_port;
538         }
539         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
540 }
541
542 /**
543  * Convert the @p item into a Verbs specification. This function assumes that
544  * the input is valid and that there is space to insert the requested item
545  * into the flow.
546  *
547  * @param[in, out] dev_flow
548  *   Pointer to dev_flow structure.
549  * @param[in] item
550  *   Item specification.
551  * @param[in] item_flags
552  *   Parsed item flags.
553  */
554 static void
555 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
556                               const struct rte_flow_item *item,
557                               uint64_t item_flags __rte_unused)
558 {
559         const struct rte_flow_item_udp *spec = item->spec;
560         const struct rte_flow_item_udp *mask = item->mask;
561         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
562         struct ibv_flow_spec_tcp_udp udp = {
563                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
564                 .size = size,
565         };
566
567         if (!mask)
568                 mask = &rte_flow_item_udp_mask;
569         if (spec) {
570                 udp.val.dst_port = spec->hdr.dst_port;
571                 udp.val.src_port = spec->hdr.src_port;
572                 udp.mask.dst_port = mask->hdr.dst_port;
573                 udp.mask.src_port = mask->hdr.src_port;
574                 /* Remove unwanted bits from values. */
575                 udp.val.src_port &= udp.mask.src_port;
576                 udp.val.dst_port &= udp.mask.dst_port;
577         }
578         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
579 }
580
581 /**
582  * Convert the @p item into a Verbs specification. This function assumes that
583  * the input is valid and that there is space to insert the requested item
584  * into the flow.
585  *
586  * @param[in, out] dev_flow
587  *   Pointer to dev_flow structure.
588  * @param[in] item
589  *   Item specification.
590  * @param[in] item_flags
591  *   Parsed item flags.
592  */
593 static void
594 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
595                                 const struct rte_flow_item *item,
596                                 uint64_t item_flags __rte_unused)
597 {
598         const struct rte_flow_item_vxlan *spec = item->spec;
599         const struct rte_flow_item_vxlan *mask = item->mask;
600         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
601         struct ibv_flow_spec_tunnel vxlan = {
602                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
603                 .size = size,
604         };
605         union vni {
606                 uint32_t vlan_id;
607                 uint8_t vni[4];
608         } id = { .vlan_id = 0, };
609
610         if (!mask)
611                 mask = &rte_flow_item_vxlan_mask;
612         if (spec) {
613                 memcpy(&id.vni[1], spec->vni, 3);
614                 vxlan.val.tunnel_id = id.vlan_id;
615                 memcpy(&id.vni[1], mask->vni, 3);
616                 vxlan.mask.tunnel_id = id.vlan_id;
617                 /* Remove unwanted bits from values. */
618                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
619         }
620         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
621 }
622
623 /**
624  * Convert the @p item into a Verbs specification. This function assumes that
625  * the input is valid and that there is space to insert the requested item
626  * into the flow.
627  *
628  * @param[in, out] dev_flow
629  *   Pointer to dev_flow structure.
630  * @param[in] item
631  *   Item specification.
632  * @param[in] item_flags
633  *   Parsed item flags.
634  */
635 static void
636 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
637                                     const struct rte_flow_item *item,
638                                     uint64_t item_flags __rte_unused)
639 {
640         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
641         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
642         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
643         struct ibv_flow_spec_tunnel vxlan_gpe = {
644                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
645                 .size = size,
646         };
647         union vni {
648                 uint32_t vlan_id;
649                 uint8_t vni[4];
650         } id = { .vlan_id = 0, };
651
652         if (!mask)
653                 mask = &rte_flow_item_vxlan_gpe_mask;
654         if (spec) {
655                 memcpy(&id.vni[1], spec->vni, 3);
656                 vxlan_gpe.val.tunnel_id = id.vlan_id;
657                 memcpy(&id.vni[1], mask->vni, 3);
658                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
659                 /* Remove unwanted bits from values. */
660                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
661         }
662         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
663 }
664
665 /**
666  * Update the protocol in Verbs IPv4/IPv6 spec.
667  *
668  * @param[in, out] attr
669  *   Pointer to Verbs attributes structure.
670  * @param[in] search
671  *   Specification type to search in order to update the IP protocol.
672  * @param[in] protocol
673  *   Protocol value to set if none is present in the specification.
674  */
675 static void
676 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
677                                        enum ibv_flow_spec_type search,
678                                        uint8_t protocol)
679 {
680         unsigned int i;
681         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
682                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
683
684         if (!attr)
685                 return;
686         for (i = 0; i != attr->num_of_specs; ++i) {
687                 if (hdr->type == search) {
688                         union {
689                                 struct ibv_flow_spec_ipv4_ext *ipv4;
690                                 struct ibv_flow_spec_ipv6 *ipv6;
691                         } ip;
692
693                         switch (search) {
694                         case IBV_FLOW_SPEC_IPV4_EXT:
695                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
696                                 if (!ip.ipv4->val.proto) {
697                                         ip.ipv4->val.proto = protocol;
698                                         ip.ipv4->mask.proto = 0xff;
699                                 }
700                                 break;
701                         case IBV_FLOW_SPEC_IPV6:
702                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
703                                 if (!ip.ipv6->val.next_hdr) {
704                                         ip.ipv6->val.next_hdr = protocol;
705                                         ip.ipv6->mask.next_hdr = 0xff;
706                                 }
707                                 break;
708                         default:
709                                 break;
710                         }
711                         break;
712                 }
713                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
714         }
715 }
716
717 /**
718  * Convert the @p item into a Verbs specification. This function assumes that
719  * the input is valid and that there is space to insert the requested item
720  * into the flow.
721  *
722  * @param[in, out] dev_flow
723  *   Pointer to dev_flow structure.
724  * @param[in] item
725  *   Item specification.
726  * @param[in] item_flags
727  *   Parsed item flags.
728  */
729 static void
730 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
731                               const struct rte_flow_item *item __rte_unused,
732                               uint64_t item_flags)
733 {
734         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
735 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
736         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
737         struct ibv_flow_spec_tunnel tunnel = {
738                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
739                 .size = size,
740         };
741 #else
742         const struct rte_flow_item_gre *spec = item->spec;
743         const struct rte_flow_item_gre *mask = item->mask;
744         unsigned int size = sizeof(struct ibv_flow_spec_gre);
745         struct ibv_flow_spec_gre tunnel = {
746                 .type = IBV_FLOW_SPEC_GRE,
747                 .size = size,
748         };
749
750         if (!mask)
751                 mask = &rte_flow_item_gre_mask;
752         if (spec) {
753                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
754                 tunnel.val.protocol = spec->protocol;
755                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
756                 tunnel.mask.protocol = mask->protocol;
757                 /* Remove unwanted bits from values. */
758                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
759                 tunnel.val.protocol &= tunnel.mask.protocol;
760                 tunnel.val.key &= tunnel.mask.key;
761         }
762 #endif
763         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
764                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
765                                                        IBV_FLOW_SPEC_IPV4_EXT,
766                                                        IPPROTO_GRE);
767         else
768                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
769                                                        IBV_FLOW_SPEC_IPV6,
770                                                        IPPROTO_GRE);
771         flow_verbs_spec_add(verbs, &tunnel, size);
772 }
773
774 /**
775  * Convert the @p action into a Verbs specification. This function assumes that
776  * the input is valid and that there is space to insert the requested action
777  * into the flow. This function also return the action that was added.
778  *
779  * @param[in, out] dev_flow
780  *   Pointer to dev_flow structure.
781  * @param[in] item
782  *   Item specification.
783  * @param[in] item_flags
784  *   Parsed item flags.
785  */
786 static void
787 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
788                                const struct rte_flow_item *item __rte_unused,
789                                uint64_t item_flags __rte_unused)
790 {
791 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
792         const struct rte_flow_item_mpls *spec = item->spec;
793         const struct rte_flow_item_mpls *mask = item->mask;
794         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
795         struct ibv_flow_spec_mpls mpls = {
796                 .type = IBV_FLOW_SPEC_MPLS,
797                 .size = size,
798         };
799
800         if (!mask)
801                 mask = &rte_flow_item_mpls_mask;
802         if (spec) {
803                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
804                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
805                 /* Remove unwanted bits from values.  */
806                 mpls.val.label &= mpls.mask.label;
807         }
808         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
809 #endif
810 }
811
812 /**
813  * Convert the @p action into a Verbs specification. This function assumes that
814  * the input is valid and that there is space to insert the requested action
815  * into the flow.
816  *
817  * @param[in] dev_flow
818  *   Pointer to mlx5_flow.
819  * @param[in] action
820  *   Action configuration.
821  */
822 static void
823 flow_verbs_translate_action_drop
824         (struct mlx5_flow *dev_flow,
825          const struct rte_flow_action *action __rte_unused)
826 {
827         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
828         struct ibv_flow_spec_action_drop drop = {
829                         .type = IBV_FLOW_SPEC_ACTION_DROP,
830                         .size = size,
831         };
832
833         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
834 }
835
836 /**
837  * Convert the @p action into a Verbs specification. This function assumes that
838  * the input is valid and that there is space to insert the requested action
839  * into the flow.
840  *
841  * @param[in] dev_flow
842  *   Pointer to mlx5_flow.
843  * @param[in] action
844  *   Action configuration.
845  */
846 static void
847 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
848                                   const struct rte_flow_action *action)
849 {
850         const struct rte_flow_action_queue *queue = action->conf;
851         struct rte_flow *flow = dev_flow->flow;
852
853         if (flow->queue)
854                 (*flow->queue)[0] = queue->index;
855         flow->rss.queue_num = 1;
856 }
857
858 /**
859  * Convert the @p action into a Verbs specification. This function assumes that
860  * the input is valid and that there is space to insert the requested action
861  * into the flow.
862  *
863  * @param[in] action
864  *   Action configuration.
865  * @param[in, out] action_flags
866  *   Pointer to the detected actions.
867  * @param[in] dev_flow
868  *   Pointer to mlx5_flow.
869  */
870 static void
871 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
872                                 const struct rte_flow_action *action)
873 {
874         const struct rte_flow_action_rss *rss = action->conf;
875         const uint8_t *rss_key;
876         struct rte_flow *flow = dev_flow->flow;
877
878         if (flow->queue)
879                 memcpy((*flow->queue), rss->queue,
880                        rss->queue_num * sizeof(uint16_t));
881         flow->rss.queue_num = rss->queue_num;
882         /* NULL RSS key indicates default RSS key. */
883         rss_key = !rss->key ? rss_hash_default_key : rss->key;
884         memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
885         /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
886         flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
887         flow->rss.level = rss->level;
888 }
889
890 /**
891  * Convert the @p action into a Verbs specification. This function assumes that
892  * the input is valid and that there is space to insert the requested action
893  * into the flow.
894  *
895  * @param[in] dev_flow
896  *   Pointer to mlx5_flow.
897  * @param[in] action
898  *   Action configuration.
899  */
900 static void
901 flow_verbs_translate_action_flag
902         (struct mlx5_flow *dev_flow,
903          const struct rte_flow_action *action __rte_unused)
904 {
905         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
906         struct ibv_flow_spec_action_tag tag = {
907                 .type = IBV_FLOW_SPEC_ACTION_TAG,
908                 .size = size,
909                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
910         };
911
912         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
913 }
914
915 /**
916  * Convert the @p action into a Verbs specification. This function assumes that
917  * the input is valid and that there is space to insert the requested action
918  * into the flow.
919  *
920  * @param[in] dev_flow
921  *   Pointer to mlx5_flow.
922  * @param[in] action
923  *   Action configuration.
924  */
925 static void
926 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
927                                  const struct rte_flow_action *action)
928 {
929         const struct rte_flow_action_mark *mark = action->conf;
930         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
931         struct ibv_flow_spec_action_tag tag = {
932                 .type = IBV_FLOW_SPEC_ACTION_TAG,
933                 .size = size,
934                 .tag_id = mlx5_flow_mark_set(mark->id),
935         };
936
937         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
938 }
939
940 /**
941  * Convert the @p action into a Verbs specification. This function assumes that
942  * the input is valid and that there is space to insert the requested action
943  * into the flow.
944  *
945  * @param[in] dev
946  *   Pointer to the Ethernet device structure.
947  * @param[in] action
948  *   Action configuration.
949  * @param[in] dev_flow
950  *   Pointer to mlx5_flow.
951  * @param[out] error
952  *   Pointer to error structure.
953  *
954  * @return
955  *   0 On success else a negative errno value is returned and rte_errno is set.
956  */
957 static int
958 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
959                                   const struct rte_flow_action *action,
960                                   struct rte_eth_dev *dev,
961                                   struct rte_flow_error *error)
962 {
963         const struct rte_flow_action_count *count = action->conf;
964         struct rte_flow *flow = dev_flow->flow;
965 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
966         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
967         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
968         struct ibv_flow_spec_counter_action counter = {
969                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
970                 .size = size,
971         };
972 #endif
973
974         if (!flow->counter) {
975                 flow->counter = flow_verbs_counter_new(dev, count->shared,
976                                                        count->id);
977                 if (!flow->counter)
978                         return rte_flow_error_set(error, rte_errno,
979                                                   RTE_FLOW_ERROR_TYPE_ACTION,
980                                                   action,
981                                                   "cannot get counter"
982                                                   " context.");
983         }
984 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
985         counter.counter_set_handle = flow->counter->cs->handle;
986         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
987 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
988         counter.counters = flow->counter->cs;
989         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
990 #endif
991         return 0;
992 }
993
994 /**
995  * Internal validation function. For validating both actions and items.
996  *
997  * @param[in] dev
998  *   Pointer to the Ethernet device structure.
999  * @param[in] attr
1000  *   Pointer to the flow attributes.
1001  * @param[in] items
1002  *   Pointer to the list of items.
1003  * @param[in] actions
1004  *   Pointer to the list of actions.
1005  * @param[out] error
1006  *   Pointer to the error structure.
1007  *
1008  * @return
1009  *   0 on success, a negative errno value otherwise and rte_errno is set.
1010  */
1011 static int
1012 flow_verbs_validate(struct rte_eth_dev *dev,
1013                     const struct rte_flow_attr *attr,
1014                     const struct rte_flow_item items[],
1015                     const struct rte_flow_action actions[],
1016                     struct rte_flow_error *error)
1017 {
1018         int ret;
1019         uint64_t action_flags = 0;
1020         uint64_t item_flags = 0;
1021         uint64_t last_item = 0;
1022         uint8_t next_protocol = 0xff;
1023
1024         if (items == NULL)
1025                 return -1;
1026         ret = mlx5_flow_validate_attributes(dev, attr, error);
1027         if (ret < 0)
1028                 return ret;
1029         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1030                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1031                 int ret = 0;
1032
1033                 switch (items->type) {
1034                 case RTE_FLOW_ITEM_TYPE_VOID:
1035                         break;
1036                 case RTE_FLOW_ITEM_TYPE_ETH:
1037                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1038                                                           error);
1039                         if (ret < 0)
1040                                 return ret;
1041                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1042                                              MLX5_FLOW_LAYER_OUTER_L2;
1043                         break;
1044                 case RTE_FLOW_ITEM_TYPE_VLAN:
1045                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1046                                                            error);
1047                         if (ret < 0)
1048                                 return ret;
1049                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1050                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1051                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1052                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1053                         break;
1054                 case RTE_FLOW_ITEM_TYPE_IPV4:
1055                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1056                                                            error);
1057                         if (ret < 0)
1058                                 return ret;
1059                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1060                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1061                         if (items->mask != NULL &&
1062                             ((const struct rte_flow_item_ipv4 *)
1063                              items->mask)->hdr.next_proto_id) {
1064                                 next_protocol =
1065                                         ((const struct rte_flow_item_ipv4 *)
1066                                          (items->spec))->hdr.next_proto_id;
1067                                 next_protocol &=
1068                                         ((const struct rte_flow_item_ipv4 *)
1069                                          (items->mask))->hdr.next_proto_id;
1070                         } else {
1071                                 /* Reset for inner layer. */
1072                                 next_protocol = 0xff;
1073                         }
1074                         break;
1075                 case RTE_FLOW_ITEM_TYPE_IPV6:
1076                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1077                                                            error);
1078                         if (ret < 0)
1079                                 return ret;
1080                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1081                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1082                         if (items->mask != NULL &&
1083                             ((const struct rte_flow_item_ipv6 *)
1084                              items->mask)->hdr.proto) {
1085                                 next_protocol =
1086                                         ((const struct rte_flow_item_ipv6 *)
1087                                          items->spec)->hdr.proto;
1088                                 next_protocol &=
1089                                         ((const struct rte_flow_item_ipv6 *)
1090                                          items->mask)->hdr.proto;
1091                         } else {
1092                                 /* Reset for inner layer. */
1093                                 next_protocol = 0xff;
1094                         }
1095                         break;
1096                 case RTE_FLOW_ITEM_TYPE_UDP:
1097                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1098                                                           next_protocol,
1099                                                           error);
1100                         if (ret < 0)
1101                                 return ret;
1102                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1103                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1104                         break;
1105                 case RTE_FLOW_ITEM_TYPE_TCP:
1106                         ret = mlx5_flow_validate_item_tcp
1107                                                 (items, item_flags,
1108                                                  next_protocol,
1109                                                  &rte_flow_item_tcp_mask,
1110                                                  error);
1111                         if (ret < 0)
1112                                 return ret;
1113                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1114                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1115                         break;
1116                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1117                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1118                                                             error);
1119                         if (ret < 0)
1120                                 return ret;
1121                         last_item = MLX5_FLOW_LAYER_VXLAN;
1122                         break;
1123                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1124                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1125                                                                 item_flags,
1126                                                                 dev, error);
1127                         if (ret < 0)
1128                                 return ret;
1129                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1130                         break;
1131                 case RTE_FLOW_ITEM_TYPE_GRE:
1132                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1133                                                           next_protocol, error);
1134                         if (ret < 0)
1135                                 return ret;
1136                         last_item = MLX5_FLOW_LAYER_GRE;
1137                         break;
1138                 case RTE_FLOW_ITEM_TYPE_MPLS:
1139                         ret = mlx5_flow_validate_item_mpls(dev, items,
1140                                                            item_flags,
1141                                                            last_item, error);
1142                         if (ret < 0)
1143                                 return ret;
1144                         last_item = MLX5_FLOW_LAYER_MPLS;
1145                         break;
1146                 default:
1147                         return rte_flow_error_set(error, ENOTSUP,
1148                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1149                                                   NULL, "item not supported");
1150                 }
1151                 item_flags |= last_item;
1152         }
1153         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1154                 switch (actions->type) {
1155                 case RTE_FLOW_ACTION_TYPE_VOID:
1156                         break;
1157                 case RTE_FLOW_ACTION_TYPE_FLAG:
1158                         ret = mlx5_flow_validate_action_flag(action_flags,
1159                                                              attr,
1160                                                              error);
1161                         if (ret < 0)
1162                                 return ret;
1163                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1164                         break;
1165                 case RTE_FLOW_ACTION_TYPE_MARK:
1166                         ret = mlx5_flow_validate_action_mark(actions,
1167                                                              action_flags,
1168                                                              attr,
1169                                                              error);
1170                         if (ret < 0)
1171                                 return ret;
1172                         action_flags |= MLX5_FLOW_ACTION_MARK;
1173                         break;
1174                 case RTE_FLOW_ACTION_TYPE_DROP:
1175                         ret = mlx5_flow_validate_action_drop(action_flags,
1176                                                              attr,
1177                                                              error);
1178                         if (ret < 0)
1179                                 return ret;
1180                         action_flags |= MLX5_FLOW_ACTION_DROP;
1181                         break;
1182                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1183                         ret = mlx5_flow_validate_action_queue(actions,
1184                                                               action_flags, dev,
1185                                                               attr,
1186                                                               error);
1187                         if (ret < 0)
1188                                 return ret;
1189                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1190                         break;
1191                 case RTE_FLOW_ACTION_TYPE_RSS:
1192                         ret = mlx5_flow_validate_action_rss(actions,
1193                                                             action_flags, dev,
1194                                                             attr,
1195                                                             error);
1196                         if (ret < 0)
1197                                 return ret;
1198                         action_flags |= MLX5_FLOW_ACTION_RSS;
1199                         break;
1200                 case RTE_FLOW_ACTION_TYPE_COUNT:
1201                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1202                         if (ret < 0)
1203                                 return ret;
1204                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1205                         break;
1206                 default:
1207                         return rte_flow_error_set(error, ENOTSUP,
1208                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1209                                                   actions,
1210                                                   "action not supported");
1211                 }
1212         }
1213         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1214                 return rte_flow_error_set(error, EINVAL,
1215                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1216                                           "no fate action is found");
1217         return 0;
1218 }
1219
1220 /**
1221  * Calculate the required bytes that are needed for the action part of the verbs
1222  * flow.
1223  *
1224  * @param[in] actions
1225  *   Pointer to the list of actions.
1226  *
1227  * @return
1228  *   The size of the memory needed for all actions.
1229  */
1230 static int
1231 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1232 {
1233         int size = 0;
1234
1235         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1236                 switch (actions->type) {
1237                 case RTE_FLOW_ACTION_TYPE_VOID:
1238                         break;
1239                 case RTE_FLOW_ACTION_TYPE_FLAG:
1240                         size += sizeof(struct ibv_flow_spec_action_tag);
1241                         break;
1242                 case RTE_FLOW_ACTION_TYPE_MARK:
1243                         size += sizeof(struct ibv_flow_spec_action_tag);
1244                         break;
1245                 case RTE_FLOW_ACTION_TYPE_DROP:
1246                         size += sizeof(struct ibv_flow_spec_action_drop);
1247                         break;
1248                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1249                         break;
1250                 case RTE_FLOW_ACTION_TYPE_RSS:
1251                         break;
1252                 case RTE_FLOW_ACTION_TYPE_COUNT:
1253 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1254         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1255                         size += sizeof(struct ibv_flow_spec_counter_action);
1256 #endif
1257                         break;
1258                 default:
1259                         break;
1260                 }
1261         }
1262         return size;
1263 }
1264
1265 /**
1266  * Calculate the required bytes that are needed for the item part of the verbs
1267  * flow.
1268  *
1269  * @param[in] items
1270  *   Pointer to the list of items.
1271  *
1272  * @return
1273  *   The size of the memory needed for all items.
1274  */
1275 static int
1276 flow_verbs_get_items_size(const struct rte_flow_item items[])
1277 {
1278         int size = 0;
1279
1280         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1281                 switch (items->type) {
1282                 case RTE_FLOW_ITEM_TYPE_VOID:
1283                         break;
1284                 case RTE_FLOW_ITEM_TYPE_ETH:
1285                         size += sizeof(struct ibv_flow_spec_eth);
1286                         break;
1287                 case RTE_FLOW_ITEM_TYPE_VLAN:
1288                         size += sizeof(struct ibv_flow_spec_eth);
1289                         break;
1290                 case RTE_FLOW_ITEM_TYPE_IPV4:
1291                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1292                         break;
1293                 case RTE_FLOW_ITEM_TYPE_IPV6:
1294                         size += sizeof(struct ibv_flow_spec_ipv6);
1295                         break;
1296                 case RTE_FLOW_ITEM_TYPE_UDP:
1297                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1298                         break;
1299                 case RTE_FLOW_ITEM_TYPE_TCP:
1300                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1301                         break;
1302                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1303                         size += sizeof(struct ibv_flow_spec_tunnel);
1304                         break;
1305                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1306                         size += sizeof(struct ibv_flow_spec_tunnel);
1307                         break;
1308 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1309                 case RTE_FLOW_ITEM_TYPE_GRE:
1310                         size += sizeof(struct ibv_flow_spec_gre);
1311                         break;
1312                 case RTE_FLOW_ITEM_TYPE_MPLS:
1313                         size += sizeof(struct ibv_flow_spec_mpls);
1314                         break;
1315 #else
1316                 case RTE_FLOW_ITEM_TYPE_GRE:
1317                         size += sizeof(struct ibv_flow_spec_tunnel);
1318                         break;
1319 #endif
1320                 default:
1321                         break;
1322                 }
1323         }
1324         return size;
1325 }
1326
1327 /**
1328  * Internal preparation function. Allocate mlx5_flow with the required size.
1329  * The required size is calculate based on the actions and items. This function
1330  * also returns the detected actions and items for later use.
1331  *
1332  * @param[in] attr
1333  *   Pointer to the flow attributes.
1334  * @param[in] items
1335  *   Pointer to the list of items.
1336  * @param[in] actions
1337  *   Pointer to the list of actions.
1338  * @param[out] error
1339  *   Pointer to the error structure.
1340  *
1341  * @return
1342  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1343  *   is set.
1344  */
1345 static struct mlx5_flow *
1346 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1347                    const struct rte_flow_item items[],
1348                    const struct rte_flow_action actions[],
1349                    struct rte_flow_error *error)
1350 {
1351         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1352         struct mlx5_flow *flow;
1353
1354         size += flow_verbs_get_actions_size(actions);
1355         size += flow_verbs_get_items_size(items);
1356         flow = rte_calloc(__func__, 1, size, 0);
1357         if (!flow) {
1358                 rte_flow_error_set(error, ENOMEM,
1359                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1360                                    "not enough memory to create flow");
1361                 return NULL;
1362         }
1363         flow->verbs.attr = (void *)(flow + 1);
1364         flow->verbs.specs =
1365                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1366         return flow;
1367 }
1368
1369 /**
1370  * Fill the flow with verb spec.
1371  *
1372  * @param[in] dev
1373  *   Pointer to Ethernet device.
1374  * @param[in, out] dev_flow
1375  *   Pointer to the mlx5 flow.
1376  * @param[in] attr
1377  *   Pointer to the flow attributes.
1378  * @param[in] items
1379  *   Pointer to the list of items.
1380  * @param[in] actions
1381  *   Pointer to the list of actions.
1382  * @param[out] error
1383  *   Pointer to the error structure.
1384  *
1385  * @return
1386  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1387  */
1388 static int
1389 flow_verbs_translate(struct rte_eth_dev *dev,
1390                      struct mlx5_flow *dev_flow,
1391                      const struct rte_flow_attr *attr,
1392                      const struct rte_flow_item items[],
1393                      const struct rte_flow_action actions[],
1394                      struct rte_flow_error *error)
1395 {
1396         struct rte_flow *flow = dev_flow->flow;
1397         uint64_t item_flags = 0;
1398         uint64_t action_flags = 0;
1399         uint64_t priority = attr->priority;
1400         uint32_t subpriority = 0;
1401         struct priv *priv = dev->data->dev_private;
1402
1403         if (priority == MLX5_FLOW_PRIO_RSVD)
1404                 priority = priv->config.flow_prio - 1;
1405         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1406                 int ret;
1407
1408                 switch (actions->type) {
1409                 case RTE_FLOW_ACTION_TYPE_VOID:
1410                         break;
1411                 case RTE_FLOW_ACTION_TYPE_FLAG:
1412                         flow_verbs_translate_action_flag(dev_flow, actions);
1413                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1414                         break;
1415                 case RTE_FLOW_ACTION_TYPE_MARK:
1416                         flow_verbs_translate_action_mark(dev_flow, actions);
1417                         action_flags |= MLX5_FLOW_ACTION_MARK;
1418                         break;
1419                 case RTE_FLOW_ACTION_TYPE_DROP:
1420                         flow_verbs_translate_action_drop(dev_flow, actions);
1421                         action_flags |= MLX5_FLOW_ACTION_DROP;
1422                         break;
1423                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1424                         flow_verbs_translate_action_queue(dev_flow, actions);
1425                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1426                         break;
1427                 case RTE_FLOW_ACTION_TYPE_RSS:
1428                         flow_verbs_translate_action_rss(dev_flow, actions);
1429                         action_flags |= MLX5_FLOW_ACTION_RSS;
1430                         break;
1431                 case RTE_FLOW_ACTION_TYPE_COUNT:
1432                         ret = flow_verbs_translate_action_count(dev_flow,
1433                                                                 actions,
1434                                                                 dev, error);
1435                         if (ret < 0)
1436                                 return ret;
1437                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1438                         break;
1439                 default:
1440                         return rte_flow_error_set(error, ENOTSUP,
1441                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1442                                                   actions,
1443                                                   "action not supported");
1444                 }
1445         }
1446         flow->actions = action_flags;
1447         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1448                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1449
1450                 switch (items->type) {
1451                 case RTE_FLOW_ITEM_TYPE_VOID:
1452                         break;
1453                 case RTE_FLOW_ITEM_TYPE_ETH:
1454                         flow_verbs_translate_item_eth(dev_flow, items,
1455                                                       item_flags);
1456                         subpriority = MLX5_PRIORITY_MAP_L2;
1457                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1458                                                MLX5_FLOW_LAYER_OUTER_L2;
1459                         break;
1460                 case RTE_FLOW_ITEM_TYPE_VLAN:
1461                         flow_verbs_translate_item_vlan(dev_flow, items,
1462                                                        item_flags);
1463                         subpriority = MLX5_PRIORITY_MAP_L2;
1464                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1465                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1466                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1467                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1468                         break;
1469                 case RTE_FLOW_ITEM_TYPE_IPV4:
1470                         flow_verbs_translate_item_ipv4(dev_flow, items,
1471                                                        item_flags);
1472                         subpriority = MLX5_PRIORITY_MAP_L3;
1473                         dev_flow->verbs.hash_fields |=
1474                                 mlx5_flow_hashfields_adjust
1475                                         (dev_flow, tunnel,
1476                                          MLX5_IPV4_LAYER_TYPES,
1477                                          MLX5_IPV4_IBV_RX_HASH);
1478                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1479                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1480                         break;
1481                 case RTE_FLOW_ITEM_TYPE_IPV6:
1482                         flow_verbs_translate_item_ipv6(dev_flow, items,
1483                                                        item_flags);
1484                         subpriority = MLX5_PRIORITY_MAP_L3;
1485                         dev_flow->verbs.hash_fields |=
1486                                 mlx5_flow_hashfields_adjust
1487                                         (dev_flow, tunnel,
1488                                          MLX5_IPV6_LAYER_TYPES,
1489                                          MLX5_IPV6_IBV_RX_HASH);
1490                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1491                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1492                         break;
1493                 case RTE_FLOW_ITEM_TYPE_TCP:
1494                         flow_verbs_translate_item_tcp(dev_flow, items,
1495                                                       item_flags);
1496                         subpriority = MLX5_PRIORITY_MAP_L4;
1497                         dev_flow->verbs.hash_fields |=
1498                                 mlx5_flow_hashfields_adjust
1499                                         (dev_flow, tunnel, ETH_RSS_TCP,
1500                                          (IBV_RX_HASH_SRC_PORT_TCP |
1501                                           IBV_RX_HASH_DST_PORT_TCP));
1502                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1503                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1504                         break;
1505                 case RTE_FLOW_ITEM_TYPE_UDP:
1506                         flow_verbs_translate_item_udp(dev_flow, items,
1507                                                       item_flags);
1508                         subpriority = MLX5_PRIORITY_MAP_L4;
1509                         dev_flow->verbs.hash_fields |=
1510                                 mlx5_flow_hashfields_adjust
1511                                         (dev_flow, tunnel, ETH_RSS_UDP,
1512                                          (IBV_RX_HASH_SRC_PORT_UDP |
1513                                           IBV_RX_HASH_DST_PORT_UDP));
1514                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1515                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1516                         break;
1517                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1518                         flow_verbs_translate_item_vxlan(dev_flow, items,
1519                                                         item_flags);
1520                         subpriority = MLX5_PRIORITY_MAP_L2;
1521                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1522                         break;
1523                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1524                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1525                                                             item_flags);
1526                         subpriority = MLX5_PRIORITY_MAP_L2;
1527                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1528                         break;
1529                 case RTE_FLOW_ITEM_TYPE_GRE:
1530                         flow_verbs_translate_item_gre(dev_flow, items,
1531                                                       item_flags);
1532                         subpriority = MLX5_PRIORITY_MAP_L2;
1533                         item_flags |= MLX5_FLOW_LAYER_GRE;
1534                         break;
1535                 case RTE_FLOW_ITEM_TYPE_MPLS:
1536                         flow_verbs_translate_item_mpls(dev_flow, items,
1537                                                        item_flags);
1538                         subpriority = MLX5_PRIORITY_MAP_L2;
1539                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1540                         break;
1541                 default:
1542                         return rte_flow_error_set(error, ENOTSUP,
1543                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1544                                                   NULL,
1545                                                   "item not supported");
1546                 }
1547         }
1548         dev_flow->layers = item_flags;
1549         dev_flow->verbs.attr->priority =
1550                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1551         return 0;
1552 }
1553
1554 /**
1555  * Remove the flow from the NIC but keeps it in memory.
1556  *
1557  * @param[in] dev
1558  *   Pointer to the Ethernet device structure.
1559  * @param[in, out] flow
1560  *   Pointer to flow structure.
1561  */
1562 static void
1563 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1564 {
1565         struct mlx5_flow_verbs *verbs;
1566         struct mlx5_flow *dev_flow;
1567
1568         if (!flow)
1569                 return;
1570         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1571                 verbs = &dev_flow->verbs;
1572                 if (verbs->flow) {
1573                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1574                         verbs->flow = NULL;
1575                 }
1576                 if (verbs->hrxq) {
1577                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1578                                 mlx5_hrxq_drop_release(dev);
1579                         else
1580                                 mlx5_hrxq_release(dev, verbs->hrxq);
1581                         verbs->hrxq = NULL;
1582                 }
1583         }
1584 }
1585
1586 /**
1587  * Remove the flow from the NIC and the memory.
1588  *
1589  * @param[in] dev
1590  *   Pointer to the Ethernet device structure.
1591  * @param[in, out] flow
1592  *   Pointer to flow structure.
1593  */
1594 static void
1595 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1596 {
1597         struct mlx5_flow *dev_flow;
1598
1599         if (!flow)
1600                 return;
1601         flow_verbs_remove(dev, flow);
1602         while (!LIST_EMPTY(&flow->dev_flows)) {
1603                 dev_flow = LIST_FIRST(&flow->dev_flows);
1604                 LIST_REMOVE(dev_flow, next);
1605                 rte_free(dev_flow);
1606         }
1607         if (flow->counter) {
1608                 flow_verbs_counter_release(flow->counter);
1609                 flow->counter = NULL;
1610         }
1611 }
1612
1613 /**
1614  * Apply the flow to the NIC.
1615  *
1616  * @param[in] dev
1617  *   Pointer to the Ethernet device structure.
1618  * @param[in, out] flow
1619  *   Pointer to flow structure.
1620  * @param[out] error
1621  *   Pointer to error structure.
1622  *
1623  * @return
1624  *   0 on success, a negative errno value otherwise and rte_errno is set.
1625  */
1626 static int
1627 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1628                  struct rte_flow_error *error)
1629 {
1630         struct mlx5_flow_verbs *verbs;
1631         struct mlx5_flow *dev_flow;
1632         int err;
1633
1634         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1635                 verbs = &dev_flow->verbs;
1636                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1637                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1638                         if (!verbs->hrxq) {
1639                                 rte_flow_error_set
1640                                         (error, errno,
1641                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1642                                          "cannot get drop hash queue");
1643                                 goto error;
1644                         }
1645                 } else {
1646                         struct mlx5_hrxq *hrxq;
1647
1648                         hrxq = mlx5_hrxq_get(dev, flow->key,
1649                                              MLX5_RSS_HASH_KEY_LEN,
1650                                              verbs->hash_fields,
1651                                              (*flow->queue),
1652                                              flow->rss.queue_num);
1653                         if (!hrxq)
1654                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1655                                                      MLX5_RSS_HASH_KEY_LEN,
1656                                                      verbs->hash_fields,
1657                                                      (*flow->queue),
1658                                                      flow->rss.queue_num,
1659                                                      !!(dev_flow->layers &
1660                                                       MLX5_FLOW_LAYER_TUNNEL));
1661                         if (!hrxq) {
1662                                 rte_flow_error_set
1663                                         (error, rte_errno,
1664                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1665                                          "cannot get hash queue");
1666                                 goto error;
1667                         }
1668                         verbs->hrxq = hrxq;
1669                 }
1670                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1671                                                      verbs->attr);
1672                 if (!verbs->flow) {
1673                         rte_flow_error_set(error, errno,
1674                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1675                                            NULL,
1676                                            "hardware refuses to create flow");
1677                         goto error;
1678                 }
1679         }
1680         return 0;
1681 error:
1682         err = rte_errno; /* Save rte_errno before cleanup. */
1683         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1684                 verbs = &dev_flow->verbs;
1685                 if (verbs->hrxq) {
1686                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1687                                 mlx5_hrxq_drop_release(dev);
1688                         else
1689                                 mlx5_hrxq_release(dev, verbs->hrxq);
1690                         verbs->hrxq = NULL;
1691                 }
1692         }
1693         rte_errno = err; /* Restore rte_errno. */
1694         return -rte_errno;
1695 }
1696
1697 /**
1698  * Query a flow.
1699  *
1700  * @see rte_flow_query()
1701  * @see rte_flow_ops
1702  */
1703 static int
1704 flow_verbs_query(struct rte_eth_dev *dev,
1705                  struct rte_flow *flow,
1706                  const struct rte_flow_action *actions,
1707                  void *data,
1708                  struct rte_flow_error *error)
1709 {
1710         int ret = -EINVAL;
1711
1712         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1713                 switch (actions->type) {
1714                 case RTE_FLOW_ACTION_TYPE_VOID:
1715                         break;
1716                 case RTE_FLOW_ACTION_TYPE_COUNT:
1717                         ret = flow_verbs_counter_query(dev, flow, data, error);
1718                         break;
1719                 default:
1720                         return rte_flow_error_set(error, ENOTSUP,
1721                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1722                                                   actions,
1723                                                   "action not supported");
1724                 }
1725         }
1726         return ret;
1727 }
1728
1729 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1730         .validate = flow_verbs_validate,
1731         .prepare = flow_verbs_prepare,
1732         .translate = flow_verbs_translate,
1733         .apply = flow_verbs_apply,
1734         .remove = flow_verbs_remove,
1735         .destroy = flow_verbs_destroy,
1736         .query = flow_verbs_query,
1737 };