New upstream version 18.11-rc3
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 #define VERBS_SPEC_INNER(item_flags) \
37         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55                           struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58         struct priv *priv = dev->data->dev_private;
59         struct ibv_counter_set_init_attr init = {
60                          .counter_set_id = counter->id};
61
62         counter->cs = mlx5_glue->create_counter_set(priv->ctx, &init);
63         if (!counter->cs) {
64                 rte_errno = ENOTSUP;
65                 return -ENOTSUP;
66         }
67         return 0;
68 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
69         struct priv *priv = dev->data->dev_private;
70         struct ibv_counters_init_attr init = {0};
71         struct ibv_counter_attach_attr attach = {0};
72         int ret;
73
74         counter->cs = mlx5_glue->create_counters(priv->ctx, &init);
75         if (!counter->cs) {
76                 rte_errno = ENOTSUP;
77                 return -ENOTSUP;
78         }
79         attach.counter_desc = IBV_COUNTER_PACKETS;
80         attach.index = 0;
81         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
82         if (!ret) {
83                 attach.counter_desc = IBV_COUNTER_BYTES;
84                 attach.index = 1;
85                 ret = mlx5_glue->attach_counters
86                                         (counter->cs, &attach, NULL);
87         }
88         if (ret) {
89                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
90                 counter->cs = NULL;
91                 rte_errno = ret;
92                 return -ret;
93         }
94         return 0;
95 #else
96         (void)dev;
97         (void)counter;
98         rte_errno = ENOTSUP;
99         return -ENOTSUP;
100 #endif
101 }
102
103 /**
104  * Get a flow counter.
105  *
106  * @param[in] dev
107  *   Pointer to the Ethernet device structure.
108  * @param[in] shared
109  *   Indicate if this counter is shared with other flows.
110  * @param[in] id
111  *   Counter identifier.
112  *
113  * @return
114  *   A pointer to the counter, NULL otherwise and rte_errno is set.
115  */
116 static struct mlx5_flow_counter *
117 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
118 {
119         struct priv *priv = dev->data->dev_private;
120         struct mlx5_flow_counter *cnt;
121         int ret;
122
123         LIST_FOREACH(cnt, &priv->flow_counters, next) {
124                 if (!cnt->shared || cnt->shared != shared)
125                         continue;
126                 if (cnt->id != id)
127                         continue;
128                 cnt->ref_cnt++;
129                 return cnt;
130         }
131         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
132         if (!cnt) {
133                 rte_errno = ENOMEM;
134                 return NULL;
135         }
136         cnt->id = id;
137         cnt->shared = shared;
138         cnt->ref_cnt = 1;
139         cnt->hits = 0;
140         cnt->bytes = 0;
141         /* Create counter with Verbs. */
142         ret = flow_verbs_counter_create(dev, cnt);
143         if (!ret) {
144                 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
145                 return cnt;
146         }
147         /* Some error occurred in Verbs library. */
148         rte_free(cnt);
149         rte_errno = -ret;
150         return NULL;
151 }
152
153 /**
154  * Release a flow counter.
155  *
156  * @param[in] counter
157  *   Pointer to the counter handler.
158  */
159 static void
160 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
161 {
162         if (--counter->ref_cnt == 0) {
163 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
164                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
165 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
166                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
167 #endif
168                 LIST_REMOVE(counter, next);
169                 rte_free(counter);
170         }
171 }
172
173 /**
174  * Query a flow counter via Verbs library call.
175  *
176  * @see rte_flow_query()
177  * @see rte_flow_ops
178  */
179 static int
180 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
181                          struct rte_flow *flow, void *data,
182                          struct rte_flow_error *error)
183 {
184 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
185         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
186         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
187                 struct rte_flow_query_count *qc = data;
188                 uint64_t counters[2] = {0, 0};
189 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
190                 struct ibv_query_counter_set_attr query_cs_attr = {
191                         .cs = flow->counter->cs,
192                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
193                 };
194                 struct ibv_counter_set_data query_out = {
195                         .out = counters,
196                         .outlen = 2 * sizeof(uint64_t),
197                 };
198                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
199                                                        &query_out);
200 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
201                 int err = mlx5_glue->query_counters
202                                (flow->counter->cs, counters,
203                                 RTE_DIM(counters),
204                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
205 #endif
206                 if (err)
207                         return rte_flow_error_set
208                                 (error, err,
209                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
210                                  NULL,
211                                  "cannot read counter");
212                 qc->hits_set = 1;
213                 qc->bytes_set = 1;
214                 qc->hits = counters[0] - flow->counter->hits;
215                 qc->bytes = counters[1] - flow->counter->bytes;
216                 if (qc->reset) {
217                         flow->counter->hits = counters[0];
218                         flow->counter->bytes = counters[1];
219                 }
220                 return 0;
221         }
222         return rte_flow_error_set(error, EINVAL,
223                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
224                                   NULL,
225                                   "flow does not have counter");
226 #else
227         (void)flow;
228         (void)data;
229         return rte_flow_error_set(error, ENOTSUP,
230                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
231                                   NULL,
232                                   "counters are not available");
233 #endif
234 }
235
236 /**
237  * Add a verbs item specification into @p verbs.
238  *
239  * @param[out] verbs
240  *   Pointer to verbs structure.
241  * @param[in] src
242  *   Create specification.
243  * @param[in] size
244  *   Size in bytes of the specification to copy.
245  */
246 static void
247 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
248 {
249         void *dst;
250
251         if (!verbs)
252                 return;
253         assert(verbs->specs);
254         dst = (void *)(verbs->specs + verbs->size);
255         memcpy(dst, src, size);
256         ++verbs->attr->num_of_specs;
257         verbs->size += size;
258 }
259
260 /**
261  * Convert the @p item into a Verbs specification. This function assumes that
262  * the input is valid and that there is space to insert the requested item
263  * into the flow.
264  *
265  * @param[in, out] dev_flow
266  *   Pointer to dev_flow structure.
267  * @param[in] item
268  *   Item specification.
269  * @param[in] item_flags
270  *   Parsed item flags.
271  */
272 static void
273 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
274                               const struct rte_flow_item *item,
275                               uint64_t item_flags)
276 {
277         const struct rte_flow_item_eth *spec = item->spec;
278         const struct rte_flow_item_eth *mask = item->mask;
279         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
280         struct ibv_flow_spec_eth eth = {
281                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
282                 .size = size,
283         };
284
285         if (!mask)
286                 mask = &rte_flow_item_eth_mask;
287         if (spec) {
288                 unsigned int i;
289
290                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
291                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
292                 eth.val.ether_type = spec->type;
293                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
294                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
295                 eth.mask.ether_type = mask->type;
296                 /* Remove unwanted bits from values. */
297                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
298                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
299                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
300                 }
301                 eth.val.ether_type &= eth.mask.ether_type;
302         }
303         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
304 }
305
306 /**
307  * Update the VLAN tag in the Verbs Ethernet specification.
308  * This function assumes that the input is valid and there is space to add
309  * the requested item.
310  *
311  * @param[in, out] attr
312  *   Pointer to Verbs attributes structure.
313  * @param[in] eth
314  *   Verbs structure containing the VLAN information to copy.
315  */
316 static void
317 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
318                             struct ibv_flow_spec_eth *eth)
319 {
320         unsigned int i;
321         const enum ibv_flow_spec_type search = eth->type;
322         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
323                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
324
325         for (i = 0; i != attr->num_of_specs; ++i) {
326                 if (hdr->type == search) {
327                         struct ibv_flow_spec_eth *e =
328                                 (struct ibv_flow_spec_eth *)hdr;
329
330                         e->val.vlan_tag = eth->val.vlan_tag;
331                         e->mask.vlan_tag = eth->mask.vlan_tag;
332                         e->val.ether_type = eth->val.ether_type;
333                         e->mask.ether_type = eth->mask.ether_type;
334                         break;
335                 }
336                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
337         }
338 }
339
340 /**
341  * Convert the @p item into a Verbs specification. This function assumes that
342  * the input is valid and that there is space to insert the requested item
343  * into the flow.
344  *
345  * @param[in, out] dev_flow
346  *   Pointer to dev_flow structure.
347  * @param[in] item
348  *   Item specification.
349  * @param[in] item_flags
350  *   Parsed item flags.
351  */
352 static void
353 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
354                                const struct rte_flow_item *item,
355                                uint64_t item_flags)
356 {
357         const struct rte_flow_item_vlan *spec = item->spec;
358         const struct rte_flow_item_vlan *mask = item->mask;
359         unsigned int size = sizeof(struct ibv_flow_spec_eth);
360         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
361         struct ibv_flow_spec_eth eth = {
362                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
363                 .size = size,
364         };
365         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
366                                       MLX5_FLOW_LAYER_OUTER_L2;
367
368         if (!mask)
369                 mask = &rte_flow_item_vlan_mask;
370         if (spec) {
371                 eth.val.vlan_tag = spec->tci;
372                 eth.mask.vlan_tag = mask->tci;
373                 eth.val.vlan_tag &= eth.mask.vlan_tag;
374                 eth.val.ether_type = spec->inner_type;
375                 eth.mask.ether_type = mask->inner_type;
376                 eth.val.ether_type &= eth.mask.ether_type;
377         }
378         if (!(item_flags & l2m))
379                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
380         else
381                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
382 }
383
384 /**
385  * Convert the @p item into a Verbs specification. This function assumes that
386  * the input is valid and that there is space to insert the requested item
387  * into the flow.
388  *
389  * @param[in, out] dev_flow
390  *   Pointer to dev_flow structure.
391  * @param[in] item
392  *   Item specification.
393  * @param[in] item_flags
394  *   Parsed item flags.
395  */
396 static void
397 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
398                                const struct rte_flow_item *item,
399                                uint64_t item_flags)
400 {
401         const struct rte_flow_item_ipv4 *spec = item->spec;
402         const struct rte_flow_item_ipv4 *mask = item->mask;
403         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
404         struct ibv_flow_spec_ipv4_ext ipv4 = {
405                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
406                 .size = size,
407         };
408
409         if (!mask)
410                 mask = &rte_flow_item_ipv4_mask;
411         if (spec) {
412                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
413                         .src_ip = spec->hdr.src_addr,
414                         .dst_ip = spec->hdr.dst_addr,
415                         .proto = spec->hdr.next_proto_id,
416                         .tos = spec->hdr.type_of_service,
417                 };
418                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
419                         .src_ip = mask->hdr.src_addr,
420                         .dst_ip = mask->hdr.dst_addr,
421                         .proto = mask->hdr.next_proto_id,
422                         .tos = mask->hdr.type_of_service,
423                 };
424                 /* Remove unwanted bits from values. */
425                 ipv4.val.src_ip &= ipv4.mask.src_ip;
426                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
427                 ipv4.val.proto &= ipv4.mask.proto;
428                 ipv4.val.tos &= ipv4.mask.tos;
429         }
430         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
431 }
432
433 /**
434  * Convert the @p item into a Verbs specification. This function assumes that
435  * the input is valid and that there is space to insert the requested item
436  * into the flow.
437  *
438  * @param[in, out] dev_flow
439  *   Pointer to dev_flow structure.
440  * @param[in] item
441  *   Item specification.
442  * @param[in] item_flags
443  *   Parsed item flags.
444  */
445 static void
446 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
447                                const struct rte_flow_item *item,
448                                uint64_t item_flags)
449 {
450         const struct rte_flow_item_ipv6 *spec = item->spec;
451         const struct rte_flow_item_ipv6 *mask = item->mask;
452         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
453         struct ibv_flow_spec_ipv6 ipv6 = {
454                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
455                 .size = size,
456         };
457
458         if (!mask)
459                 mask = &rte_flow_item_ipv6_mask;
460         if (spec) {
461                 unsigned int i;
462                 uint32_t vtc_flow_val;
463                 uint32_t vtc_flow_mask;
464
465                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
466                        RTE_DIM(ipv6.val.src_ip));
467                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
468                        RTE_DIM(ipv6.val.dst_ip));
469                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
470                        RTE_DIM(ipv6.mask.src_ip));
471                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
472                        RTE_DIM(ipv6.mask.dst_ip));
473                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
474                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
475                 ipv6.val.flow_label =
476                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
477                                          IPV6_HDR_FL_SHIFT);
478                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
479                                          IPV6_HDR_TC_SHIFT;
480                 ipv6.val.next_hdr = spec->hdr.proto;
481                 ipv6.val.hop_limit = spec->hdr.hop_limits;
482                 ipv6.mask.flow_label =
483                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
484                                          IPV6_HDR_FL_SHIFT);
485                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
486                                           IPV6_HDR_TC_SHIFT;
487                 ipv6.mask.next_hdr = mask->hdr.proto;
488                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
489                 /* Remove unwanted bits from values. */
490                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
491                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
492                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
493                 }
494                 ipv6.val.flow_label &= ipv6.mask.flow_label;
495                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
496                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
497                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
498         }
499         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
500 }
501
502 /**
503  * Convert the @p item into a Verbs specification. This function assumes that
504  * the input is valid and that there is space to insert the requested item
505  * into the flow.
506  *
507  * @param[in, out] dev_flow
508  *   Pointer to dev_flow structure.
509  * @param[in] item
510  *   Item specification.
511  * @param[in] item_flags
512  *   Parsed item flags.
513  */
514 static void
515 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
516                               const struct rte_flow_item *item,
517                               uint64_t item_flags __rte_unused)
518 {
519         const struct rte_flow_item_tcp *spec = item->spec;
520         const struct rte_flow_item_tcp *mask = item->mask;
521         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
522         struct ibv_flow_spec_tcp_udp tcp = {
523                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
524                 .size = size,
525         };
526
527         if (!mask)
528                 mask = &rte_flow_item_tcp_mask;
529         if (spec) {
530                 tcp.val.dst_port = spec->hdr.dst_port;
531                 tcp.val.src_port = spec->hdr.src_port;
532                 tcp.mask.dst_port = mask->hdr.dst_port;
533                 tcp.mask.src_port = mask->hdr.src_port;
534                 /* Remove unwanted bits from values. */
535                 tcp.val.src_port &= tcp.mask.src_port;
536                 tcp.val.dst_port &= tcp.mask.dst_port;
537         }
538         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
539 }
540
541 /**
542  * Convert the @p item into a Verbs specification. This function assumes that
543  * the input is valid and that there is space to insert the requested item
544  * into the flow.
545  *
546  * @param[in, out] dev_flow
547  *   Pointer to dev_flow structure.
548  * @param[in] item
549  *   Item specification.
550  * @param[in] item_flags
551  *   Parsed item flags.
552  */
553 static void
554 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
555                               const struct rte_flow_item *item,
556                               uint64_t item_flags __rte_unused)
557 {
558         const struct rte_flow_item_udp *spec = item->spec;
559         const struct rte_flow_item_udp *mask = item->mask;
560         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
561         struct ibv_flow_spec_tcp_udp udp = {
562                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
563                 .size = size,
564         };
565
566         if (!mask)
567                 mask = &rte_flow_item_udp_mask;
568         if (spec) {
569                 udp.val.dst_port = spec->hdr.dst_port;
570                 udp.val.src_port = spec->hdr.src_port;
571                 udp.mask.dst_port = mask->hdr.dst_port;
572                 udp.mask.src_port = mask->hdr.src_port;
573                 /* Remove unwanted bits from values. */
574                 udp.val.src_port &= udp.mask.src_port;
575                 udp.val.dst_port &= udp.mask.dst_port;
576         }
577         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
578 }
579
580 /**
581  * Convert the @p item into a Verbs specification. This function assumes that
582  * the input is valid and that there is space to insert the requested item
583  * into the flow.
584  *
585  * @param[in, out] dev_flow
586  *   Pointer to dev_flow structure.
587  * @param[in] item
588  *   Item specification.
589  * @param[in] item_flags
590  *   Parsed item flags.
591  */
592 static void
593 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
594                                 const struct rte_flow_item *item,
595                                 uint64_t item_flags __rte_unused)
596 {
597         const struct rte_flow_item_vxlan *spec = item->spec;
598         const struct rte_flow_item_vxlan *mask = item->mask;
599         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
600         struct ibv_flow_spec_tunnel vxlan = {
601                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
602                 .size = size,
603         };
604         union vni {
605                 uint32_t vlan_id;
606                 uint8_t vni[4];
607         } id = { .vlan_id = 0, };
608
609         if (!mask)
610                 mask = &rte_flow_item_vxlan_mask;
611         if (spec) {
612                 memcpy(&id.vni[1], spec->vni, 3);
613                 vxlan.val.tunnel_id = id.vlan_id;
614                 memcpy(&id.vni[1], mask->vni, 3);
615                 vxlan.mask.tunnel_id = id.vlan_id;
616                 /* Remove unwanted bits from values. */
617                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
618         }
619         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
620 }
621
622 /**
623  * Convert the @p item into a Verbs specification. This function assumes that
624  * the input is valid and that there is space to insert the requested item
625  * into the flow.
626  *
627  * @param[in, out] dev_flow
628  *   Pointer to dev_flow structure.
629  * @param[in] item
630  *   Item specification.
631  * @param[in] item_flags
632  *   Parsed item flags.
633  */
634 static void
635 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
636                                     const struct rte_flow_item *item,
637                                     uint64_t item_flags __rte_unused)
638 {
639         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
640         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
641         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
642         struct ibv_flow_spec_tunnel vxlan_gpe = {
643                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
644                 .size = size,
645         };
646         union vni {
647                 uint32_t vlan_id;
648                 uint8_t vni[4];
649         } id = { .vlan_id = 0, };
650
651         if (!mask)
652                 mask = &rte_flow_item_vxlan_gpe_mask;
653         if (spec) {
654                 memcpy(&id.vni[1], spec->vni, 3);
655                 vxlan_gpe.val.tunnel_id = id.vlan_id;
656                 memcpy(&id.vni[1], mask->vni, 3);
657                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
658                 /* Remove unwanted bits from values. */
659                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
660         }
661         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
662 }
663
664 /**
665  * Update the protocol in Verbs IPv4/IPv6 spec.
666  *
667  * @param[in, out] attr
668  *   Pointer to Verbs attributes structure.
669  * @param[in] search
670  *   Specification type to search in order to update the IP protocol.
671  * @param[in] protocol
672  *   Protocol value to set if none is present in the specification.
673  */
674 static void
675 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
676                                        enum ibv_flow_spec_type search,
677                                        uint8_t protocol)
678 {
679         unsigned int i;
680         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
681                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
682
683         if (!attr)
684                 return;
685         for (i = 0; i != attr->num_of_specs; ++i) {
686                 if (hdr->type == search) {
687                         union {
688                                 struct ibv_flow_spec_ipv4_ext *ipv4;
689                                 struct ibv_flow_spec_ipv6 *ipv6;
690                         } ip;
691
692                         switch (search) {
693                         case IBV_FLOW_SPEC_IPV4_EXT:
694                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
695                                 if (!ip.ipv4->val.proto) {
696                                         ip.ipv4->val.proto = protocol;
697                                         ip.ipv4->mask.proto = 0xff;
698                                 }
699                                 break;
700                         case IBV_FLOW_SPEC_IPV6:
701                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
702                                 if (!ip.ipv6->val.next_hdr) {
703                                         ip.ipv6->val.next_hdr = protocol;
704                                         ip.ipv6->mask.next_hdr = 0xff;
705                                 }
706                                 break;
707                         default:
708                                 break;
709                         }
710                         break;
711                 }
712                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
713         }
714 }
715
716 /**
717  * Convert the @p item into a Verbs specification. This function assumes that
718  * the input is valid and that there is space to insert the requested item
719  * into the flow.
720  *
721  * @param[in, out] dev_flow
722  *   Pointer to dev_flow structure.
723  * @param[in] item
724  *   Item specification.
725  * @param[in] item_flags
726  *   Parsed item flags.
727  */
728 static void
729 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
730                               const struct rte_flow_item *item __rte_unused,
731                               uint64_t item_flags)
732 {
733         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
734 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
735         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
736         struct ibv_flow_spec_tunnel tunnel = {
737                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
738                 .size = size,
739         };
740 #else
741         const struct rte_flow_item_gre *spec = item->spec;
742         const struct rte_flow_item_gre *mask = item->mask;
743         unsigned int size = sizeof(struct ibv_flow_spec_gre);
744         struct ibv_flow_spec_gre tunnel = {
745                 .type = IBV_FLOW_SPEC_GRE,
746                 .size = size,
747         };
748
749         if (!mask)
750                 mask = &rte_flow_item_gre_mask;
751         if (spec) {
752                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
753                 tunnel.val.protocol = spec->protocol;
754                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
755                 tunnel.mask.protocol = mask->protocol;
756                 /* Remove unwanted bits from values. */
757                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
758                 tunnel.val.protocol &= tunnel.mask.protocol;
759                 tunnel.val.key &= tunnel.mask.key;
760         }
761 #endif
762         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
763                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
764                                                        IBV_FLOW_SPEC_IPV4_EXT,
765                                                        IPPROTO_GRE);
766         else
767                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
768                                                        IBV_FLOW_SPEC_IPV6,
769                                                        IPPROTO_GRE);
770         flow_verbs_spec_add(verbs, &tunnel, size);
771 }
772
773 /**
774  * Convert the @p action into a Verbs specification. This function assumes that
775  * the input is valid and that there is space to insert the requested action
776  * into the flow. This function also return the action that was added.
777  *
778  * @param[in, out] dev_flow
779  *   Pointer to dev_flow structure.
780  * @param[in] item
781  *   Item specification.
782  * @param[in] item_flags
783  *   Parsed item flags.
784  */
785 static void
786 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
787                                const struct rte_flow_item *item __rte_unused,
788                                uint64_t item_flags __rte_unused)
789 {
790 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
791         const struct rte_flow_item_mpls *spec = item->spec;
792         const struct rte_flow_item_mpls *mask = item->mask;
793         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
794         struct ibv_flow_spec_mpls mpls = {
795                 .type = IBV_FLOW_SPEC_MPLS,
796                 .size = size,
797         };
798
799         if (!mask)
800                 mask = &rte_flow_item_mpls_mask;
801         if (spec) {
802                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
803                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
804                 /* Remove unwanted bits from values.  */
805                 mpls.val.label &= mpls.mask.label;
806         }
807         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
808 #endif
809 }
810
811 /**
812  * Convert the @p action into a Verbs specification. This function assumes that
813  * the input is valid and that there is space to insert the requested action
814  * into the flow.
815  *
816  * @param[in] dev_flow
817  *   Pointer to mlx5_flow.
818  * @param[in] action
819  *   Action configuration.
820  */
821 static void
822 flow_verbs_translate_action_drop
823         (struct mlx5_flow *dev_flow,
824          const struct rte_flow_action *action __rte_unused)
825 {
826         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
827         struct ibv_flow_spec_action_drop drop = {
828                         .type = IBV_FLOW_SPEC_ACTION_DROP,
829                         .size = size,
830         };
831
832         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
833 }
834
835 /**
836  * Convert the @p action into a Verbs specification. This function assumes that
837  * the input is valid and that there is space to insert the requested action
838  * into the flow.
839  *
840  * @param[in] dev_flow
841  *   Pointer to mlx5_flow.
842  * @param[in] action
843  *   Action configuration.
844  */
845 static void
846 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
847                                   const struct rte_flow_action *action)
848 {
849         const struct rte_flow_action_queue *queue = action->conf;
850         struct rte_flow *flow = dev_flow->flow;
851
852         if (flow->queue)
853                 (*flow->queue)[0] = queue->index;
854         flow->rss.queue_num = 1;
855 }
856
857 /**
858  * Convert the @p action into a Verbs specification. This function assumes that
859  * the input is valid and that there is space to insert the requested action
860  * into the flow.
861  *
862  * @param[in] action
863  *   Action configuration.
864  * @param[in, out] action_flags
865  *   Pointer to the detected actions.
866  * @param[in] dev_flow
867  *   Pointer to mlx5_flow.
868  */
869 static void
870 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
871                                 const struct rte_flow_action *action)
872 {
873         const struct rte_flow_action_rss *rss = action->conf;
874         const uint8_t *rss_key;
875         struct rte_flow *flow = dev_flow->flow;
876
877         if (flow->queue)
878                 memcpy((*flow->queue), rss->queue,
879                        rss->queue_num * sizeof(uint16_t));
880         flow->rss.queue_num = rss->queue_num;
881         /* NULL RSS key indicates default RSS key. */
882         rss_key = !rss->key ? rss_hash_default_key : rss->key;
883         memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
884         /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
885         flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
886         flow->rss.level = rss->level;
887 }
888
889 /**
890  * Convert the @p action into a Verbs specification. This function assumes that
891  * the input is valid and that there is space to insert the requested action
892  * into the flow.
893  *
894  * @param[in] dev_flow
895  *   Pointer to mlx5_flow.
896  * @param[in] action
897  *   Action configuration.
898  */
899 static void
900 flow_verbs_translate_action_flag
901         (struct mlx5_flow *dev_flow,
902          const struct rte_flow_action *action __rte_unused)
903 {
904         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
905         struct ibv_flow_spec_action_tag tag = {
906                 .type = IBV_FLOW_SPEC_ACTION_TAG,
907                 .size = size,
908                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
909         };
910
911         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
912 }
913
914 /**
915  * Convert the @p action into a Verbs specification. This function assumes that
916  * the input is valid and that there is space to insert the requested action
917  * into the flow.
918  *
919  * @param[in] dev_flow
920  *   Pointer to mlx5_flow.
921  * @param[in] action
922  *   Action configuration.
923  */
924 static void
925 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
926                                  const struct rte_flow_action *action)
927 {
928         const struct rte_flow_action_mark *mark = action->conf;
929         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
930         struct ibv_flow_spec_action_tag tag = {
931                 .type = IBV_FLOW_SPEC_ACTION_TAG,
932                 .size = size,
933                 .tag_id = mlx5_flow_mark_set(mark->id),
934         };
935
936         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
937 }
938
939 /**
940  * Convert the @p action into a Verbs specification. This function assumes that
941  * the input is valid and that there is space to insert the requested action
942  * into the flow.
943  *
944  * @param[in] dev
945  *   Pointer to the Ethernet device structure.
946  * @param[in] action
947  *   Action configuration.
948  * @param[in] dev_flow
949  *   Pointer to mlx5_flow.
950  * @param[out] error
951  *   Pointer to error structure.
952  *
953  * @return
954  *   0 On success else a negative errno value is returned and rte_errno is set.
955  */
956 static int
957 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
958                                   const struct rte_flow_action *action,
959                                   struct rte_eth_dev *dev,
960                                   struct rte_flow_error *error)
961 {
962         const struct rte_flow_action_count *count = action->conf;
963         struct rte_flow *flow = dev_flow->flow;
964 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
965         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
966         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
967         struct ibv_flow_spec_counter_action counter = {
968                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
969                 .size = size,
970         };
971 #endif
972
973         if (!flow->counter) {
974                 flow->counter = flow_verbs_counter_new(dev, count->shared,
975                                                        count->id);
976                 if (!flow->counter)
977                         return rte_flow_error_set(error, rte_errno,
978                                                   RTE_FLOW_ERROR_TYPE_ACTION,
979                                                   action,
980                                                   "cannot get counter"
981                                                   " context.");
982         }
983 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
984         counter.counter_set_handle = flow->counter->cs->handle;
985         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
986 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
987         counter.counters = flow->counter->cs;
988         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
989 #endif
990         return 0;
991 }
992
993 /**
994  * Internal validation function. For validating both actions and items.
995  *
996  * @param[in] dev
997  *   Pointer to the Ethernet device structure.
998  * @param[in] attr
999  *   Pointer to the flow attributes.
1000  * @param[in] items
1001  *   Pointer to the list of items.
1002  * @param[in] actions
1003  *   Pointer to the list of actions.
1004  * @param[out] error
1005  *   Pointer to the error structure.
1006  *
1007  * @return
1008  *   0 on success, a negative errno value otherwise and rte_errno is set.
1009  */
1010 static int
1011 flow_verbs_validate(struct rte_eth_dev *dev,
1012                     const struct rte_flow_attr *attr,
1013                     const struct rte_flow_item items[],
1014                     const struct rte_flow_action actions[],
1015                     struct rte_flow_error *error)
1016 {
1017         int ret;
1018         uint64_t action_flags = 0;
1019         uint64_t item_flags = 0;
1020         uint8_t next_protocol = 0xff;
1021
1022         if (items == NULL)
1023                 return -1;
1024         ret = mlx5_flow_validate_attributes(dev, attr, error);
1025         if (ret < 0)
1026                 return ret;
1027         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1028                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1029                 int ret = 0;
1030
1031                 switch (items->type) {
1032                 case RTE_FLOW_ITEM_TYPE_VOID:
1033                         break;
1034                 case RTE_FLOW_ITEM_TYPE_ETH:
1035                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1036                                                           error);
1037                         if (ret < 0)
1038                                 return ret;
1039                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1040                                                MLX5_FLOW_LAYER_OUTER_L2;
1041                         break;
1042                 case RTE_FLOW_ITEM_TYPE_VLAN:
1043                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1044                                                            error);
1045                         if (ret < 0)
1046                                 return ret;
1047                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1048                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1049                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1050                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1051                         break;
1052                 case RTE_FLOW_ITEM_TYPE_IPV4:
1053                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1054                                                            error);
1055                         if (ret < 0)
1056                                 return ret;
1057                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1058                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1059                         if (items->mask != NULL &&
1060                             ((const struct rte_flow_item_ipv4 *)
1061                              items->mask)->hdr.next_proto_id) {
1062                                 next_protocol =
1063                                         ((const struct rte_flow_item_ipv4 *)
1064                                          (items->spec))->hdr.next_proto_id;
1065                                 next_protocol &=
1066                                         ((const struct rte_flow_item_ipv4 *)
1067                                          (items->mask))->hdr.next_proto_id;
1068                         } else {
1069                                 /* Reset for inner layer. */
1070                                 next_protocol = 0xff;
1071                         }
1072                         break;
1073                 case RTE_FLOW_ITEM_TYPE_IPV6:
1074                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1075                                                            error);
1076                         if (ret < 0)
1077                                 return ret;
1078                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1079                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1080                         if (items->mask != NULL &&
1081                             ((const struct rte_flow_item_ipv6 *)
1082                              items->mask)->hdr.proto) {
1083                                 next_protocol =
1084                                         ((const struct rte_flow_item_ipv6 *)
1085                                          items->spec)->hdr.proto;
1086                                 next_protocol &=
1087                                         ((const struct rte_flow_item_ipv6 *)
1088                                          items->mask)->hdr.proto;
1089                         } else {
1090                                 /* Reset for inner layer. */
1091                                 next_protocol = 0xff;
1092                         }
1093                         break;
1094                 case RTE_FLOW_ITEM_TYPE_UDP:
1095                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1096                                                           next_protocol,
1097                                                           error);
1098                         if (ret < 0)
1099                                 return ret;
1100                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1101                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1102                         break;
1103                 case RTE_FLOW_ITEM_TYPE_TCP:
1104                         ret = mlx5_flow_validate_item_tcp
1105                                                 (items, item_flags,
1106                                                  next_protocol,
1107                                                  &rte_flow_item_tcp_mask,
1108                                                  error);
1109                         if (ret < 0)
1110                                 return ret;
1111                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1112                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1113                         break;
1114                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1115                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1116                                                             error);
1117                         if (ret < 0)
1118                                 return ret;
1119                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1120                         break;
1121                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1122                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1123                                                                 item_flags,
1124                                                                 dev, error);
1125                         if (ret < 0)
1126                                 return ret;
1127                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1128                         break;
1129                 case RTE_FLOW_ITEM_TYPE_GRE:
1130                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1131                                                           next_protocol, error);
1132                         if (ret < 0)
1133                                 return ret;
1134                         item_flags |= MLX5_FLOW_LAYER_GRE;
1135                         break;
1136                 case RTE_FLOW_ITEM_TYPE_MPLS:
1137                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
1138                                                            next_protocol,
1139                                                            error);
1140                         if (ret < 0)
1141                                 return ret;
1142                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1143                         break;
1144                 default:
1145                         return rte_flow_error_set(error, ENOTSUP,
1146                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1147                                                   NULL, "item not supported");
1148                 }
1149         }
1150         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1151                 switch (actions->type) {
1152                 case RTE_FLOW_ACTION_TYPE_VOID:
1153                         break;
1154                 case RTE_FLOW_ACTION_TYPE_FLAG:
1155                         ret = mlx5_flow_validate_action_flag(action_flags,
1156                                                              attr,
1157                                                              error);
1158                         if (ret < 0)
1159                                 return ret;
1160                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1161                         break;
1162                 case RTE_FLOW_ACTION_TYPE_MARK:
1163                         ret = mlx5_flow_validate_action_mark(actions,
1164                                                              action_flags,
1165                                                              attr,
1166                                                              error);
1167                         if (ret < 0)
1168                                 return ret;
1169                         action_flags |= MLX5_FLOW_ACTION_MARK;
1170                         break;
1171                 case RTE_FLOW_ACTION_TYPE_DROP:
1172                         ret = mlx5_flow_validate_action_drop(action_flags,
1173                                                              attr,
1174                                                              error);
1175                         if (ret < 0)
1176                                 return ret;
1177                         action_flags |= MLX5_FLOW_ACTION_DROP;
1178                         break;
1179                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1180                         ret = mlx5_flow_validate_action_queue(actions,
1181                                                               action_flags, dev,
1182                                                               attr,
1183                                                               error);
1184                         if (ret < 0)
1185                                 return ret;
1186                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1187                         break;
1188                 case RTE_FLOW_ACTION_TYPE_RSS:
1189                         ret = mlx5_flow_validate_action_rss(actions,
1190                                                             action_flags, dev,
1191                                                             attr,
1192                                                             error);
1193                         if (ret < 0)
1194                                 return ret;
1195                         action_flags |= MLX5_FLOW_ACTION_RSS;
1196                         break;
1197                 case RTE_FLOW_ACTION_TYPE_COUNT:
1198                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1199                         if (ret < 0)
1200                                 return ret;
1201                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1202                         break;
1203                 default:
1204                         return rte_flow_error_set(error, ENOTSUP,
1205                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1206                                                   actions,
1207                                                   "action not supported");
1208                 }
1209         }
1210         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1211                 return rte_flow_error_set(error, EINVAL,
1212                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1213                                           "no fate action is found");
1214         return 0;
1215 }
1216
1217 /**
1218  * Calculate the required bytes that are needed for the action part of the verbs
1219  * flow.
1220  *
1221  * @param[in] actions
1222  *   Pointer to the list of actions.
1223  *
1224  * @return
1225  *   The size of the memory needed for all actions.
1226  */
1227 static int
1228 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1229 {
1230         int size = 0;
1231
1232         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1233                 switch (actions->type) {
1234                 case RTE_FLOW_ACTION_TYPE_VOID:
1235                         break;
1236                 case RTE_FLOW_ACTION_TYPE_FLAG:
1237                         size += sizeof(struct ibv_flow_spec_action_tag);
1238                         break;
1239                 case RTE_FLOW_ACTION_TYPE_MARK:
1240                         size += sizeof(struct ibv_flow_spec_action_tag);
1241                         break;
1242                 case RTE_FLOW_ACTION_TYPE_DROP:
1243                         size += sizeof(struct ibv_flow_spec_action_drop);
1244                         break;
1245                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1246                         break;
1247                 case RTE_FLOW_ACTION_TYPE_RSS:
1248                         break;
1249                 case RTE_FLOW_ACTION_TYPE_COUNT:
1250 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1251         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1252                         size += sizeof(struct ibv_flow_spec_counter_action);
1253 #endif
1254                         break;
1255                 default:
1256                         break;
1257                 }
1258         }
1259         return size;
1260 }
1261
1262 /**
1263  * Calculate the required bytes that are needed for the item part of the verbs
1264  * flow.
1265  *
1266  * @param[in] items
1267  *   Pointer to the list of items.
1268  *
1269  * @return
1270  *   The size of the memory needed for all items.
1271  */
1272 static int
1273 flow_verbs_get_items_size(const struct rte_flow_item items[])
1274 {
1275         int size = 0;
1276
1277         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1278                 switch (items->type) {
1279                 case RTE_FLOW_ITEM_TYPE_VOID:
1280                         break;
1281                 case RTE_FLOW_ITEM_TYPE_ETH:
1282                         size += sizeof(struct ibv_flow_spec_eth);
1283                         break;
1284                 case RTE_FLOW_ITEM_TYPE_VLAN:
1285                         size += sizeof(struct ibv_flow_spec_eth);
1286                         break;
1287                 case RTE_FLOW_ITEM_TYPE_IPV4:
1288                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1289                         break;
1290                 case RTE_FLOW_ITEM_TYPE_IPV6:
1291                         size += sizeof(struct ibv_flow_spec_ipv6);
1292                         break;
1293                 case RTE_FLOW_ITEM_TYPE_UDP:
1294                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1295                         break;
1296                 case RTE_FLOW_ITEM_TYPE_TCP:
1297                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1298                         break;
1299                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1300                         size += sizeof(struct ibv_flow_spec_tunnel);
1301                         break;
1302                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1303                         size += sizeof(struct ibv_flow_spec_tunnel);
1304                         break;
1305 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1306                 case RTE_FLOW_ITEM_TYPE_GRE:
1307                         size += sizeof(struct ibv_flow_spec_gre);
1308                         break;
1309                 case RTE_FLOW_ITEM_TYPE_MPLS:
1310                         size += sizeof(struct ibv_flow_spec_mpls);
1311                         break;
1312 #else
1313                 case RTE_FLOW_ITEM_TYPE_GRE:
1314                         size += sizeof(struct ibv_flow_spec_tunnel);
1315                         break;
1316 #endif
1317                 default:
1318                         break;
1319                 }
1320         }
1321         return size;
1322 }
1323
1324 /**
1325  * Internal preparation function. Allocate mlx5_flow with the required size.
1326  * The required size is calculate based on the actions and items. This function
1327  * also returns the detected actions and items for later use.
1328  *
1329  * @param[in] attr
1330  *   Pointer to the flow attributes.
1331  * @param[in] items
1332  *   Pointer to the list of items.
1333  * @param[in] actions
1334  *   Pointer to the list of actions.
1335  * @param[out] error
1336  *   Pointer to the error structure.
1337  *
1338  * @return
1339  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1340  *   is set.
1341  */
1342 static struct mlx5_flow *
1343 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1344                    const struct rte_flow_item items[],
1345                    const struct rte_flow_action actions[],
1346                    struct rte_flow_error *error)
1347 {
1348         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1349         struct mlx5_flow *flow;
1350
1351         size += flow_verbs_get_actions_size(actions);
1352         size += flow_verbs_get_items_size(items);
1353         flow = rte_calloc(__func__, 1, size, 0);
1354         if (!flow) {
1355                 rte_flow_error_set(error, ENOMEM,
1356                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1357                                    "not enough memory to create flow");
1358                 return NULL;
1359         }
1360         flow->verbs.attr = (void *)(flow + 1);
1361         flow->verbs.specs =
1362                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1363         return flow;
1364 }
1365
1366 /**
1367  * Fill the flow with verb spec.
1368  *
1369  * @param[in] dev
1370  *   Pointer to Ethernet device.
1371  * @param[in, out] dev_flow
1372  *   Pointer to the mlx5 flow.
1373  * @param[in] attr
1374  *   Pointer to the flow attributes.
1375  * @param[in] items
1376  *   Pointer to the list of items.
1377  * @param[in] actions
1378  *   Pointer to the list of actions.
1379  * @param[out] error
1380  *   Pointer to the error structure.
1381  *
1382  * @return
1383  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1384  */
1385 static int
1386 flow_verbs_translate(struct rte_eth_dev *dev,
1387                      struct mlx5_flow *dev_flow,
1388                      const struct rte_flow_attr *attr,
1389                      const struct rte_flow_item items[],
1390                      const struct rte_flow_action actions[],
1391                      struct rte_flow_error *error)
1392 {
1393         struct rte_flow *flow = dev_flow->flow;
1394         uint64_t item_flags = 0;
1395         uint64_t action_flags = 0;
1396         uint64_t priority = attr->priority;
1397         uint32_t subpriority = 0;
1398         struct priv *priv = dev->data->dev_private;
1399
1400         if (priority == MLX5_FLOW_PRIO_RSVD)
1401                 priority = priv->config.flow_prio - 1;
1402         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1403                 int ret;
1404
1405                 switch (actions->type) {
1406                 case RTE_FLOW_ACTION_TYPE_VOID:
1407                         break;
1408                 case RTE_FLOW_ACTION_TYPE_FLAG:
1409                         flow_verbs_translate_action_flag(dev_flow, actions);
1410                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1411                         break;
1412                 case RTE_FLOW_ACTION_TYPE_MARK:
1413                         flow_verbs_translate_action_mark(dev_flow, actions);
1414                         action_flags |= MLX5_FLOW_ACTION_MARK;
1415                         break;
1416                 case RTE_FLOW_ACTION_TYPE_DROP:
1417                         flow_verbs_translate_action_drop(dev_flow, actions);
1418                         action_flags |= MLX5_FLOW_ACTION_DROP;
1419                         break;
1420                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1421                         flow_verbs_translate_action_queue(dev_flow, actions);
1422                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1423                         break;
1424                 case RTE_FLOW_ACTION_TYPE_RSS:
1425                         flow_verbs_translate_action_rss(dev_flow, actions);
1426                         action_flags |= MLX5_FLOW_ACTION_RSS;
1427                         break;
1428                 case RTE_FLOW_ACTION_TYPE_COUNT:
1429                         ret = flow_verbs_translate_action_count(dev_flow,
1430                                                                 actions,
1431                                                                 dev, error);
1432                         if (ret < 0)
1433                                 return ret;
1434                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1435                         break;
1436                 default:
1437                         return rte_flow_error_set(error, ENOTSUP,
1438                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1439                                                   actions,
1440                                                   "action not supported");
1441                 }
1442         }
1443         flow->actions = action_flags;
1444         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1445                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1446
1447                 switch (items->type) {
1448                 case RTE_FLOW_ITEM_TYPE_VOID:
1449                         break;
1450                 case RTE_FLOW_ITEM_TYPE_ETH:
1451                         flow_verbs_translate_item_eth(dev_flow, items,
1452                                                       item_flags);
1453                         subpriority = MLX5_PRIORITY_MAP_L2;
1454                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1455                                                MLX5_FLOW_LAYER_OUTER_L2;
1456                         break;
1457                 case RTE_FLOW_ITEM_TYPE_VLAN:
1458                         flow_verbs_translate_item_vlan(dev_flow, items,
1459                                                        item_flags);
1460                         subpriority = MLX5_PRIORITY_MAP_L2;
1461                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1462                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1463                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1464                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1465                         break;
1466                 case RTE_FLOW_ITEM_TYPE_IPV4:
1467                         flow_verbs_translate_item_ipv4(dev_flow, items,
1468                                                        item_flags);
1469                         subpriority = MLX5_PRIORITY_MAP_L3;
1470                         dev_flow->verbs.hash_fields |=
1471                                 mlx5_flow_hashfields_adjust
1472                                         (dev_flow, tunnel,
1473                                          MLX5_IPV4_LAYER_TYPES,
1474                                          MLX5_IPV4_IBV_RX_HASH);
1475                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1476                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1477                         break;
1478                 case RTE_FLOW_ITEM_TYPE_IPV6:
1479                         flow_verbs_translate_item_ipv6(dev_flow, items,
1480                                                        item_flags);
1481                         subpriority = MLX5_PRIORITY_MAP_L3;
1482                         dev_flow->verbs.hash_fields |=
1483                                 mlx5_flow_hashfields_adjust
1484                                         (dev_flow, tunnel,
1485                                          MLX5_IPV6_LAYER_TYPES,
1486                                          MLX5_IPV6_IBV_RX_HASH);
1487                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1488                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1489                         break;
1490                 case RTE_FLOW_ITEM_TYPE_TCP:
1491                         flow_verbs_translate_item_tcp(dev_flow, items,
1492                                                       item_flags);
1493                         subpriority = MLX5_PRIORITY_MAP_L4;
1494                         dev_flow->verbs.hash_fields |=
1495                                 mlx5_flow_hashfields_adjust
1496                                         (dev_flow, tunnel, ETH_RSS_TCP,
1497                                          (IBV_RX_HASH_SRC_PORT_TCP |
1498                                           IBV_RX_HASH_DST_PORT_TCP));
1499                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1500                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1501                         break;
1502                 case RTE_FLOW_ITEM_TYPE_UDP:
1503                         flow_verbs_translate_item_udp(dev_flow, items,
1504                                                       item_flags);
1505                         subpriority = MLX5_PRIORITY_MAP_L4;
1506                         dev_flow->verbs.hash_fields |=
1507                                 mlx5_flow_hashfields_adjust
1508                                         (dev_flow, tunnel, ETH_RSS_UDP,
1509                                          (IBV_RX_HASH_SRC_PORT_UDP |
1510                                           IBV_RX_HASH_DST_PORT_UDP));
1511                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1512                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1513                         break;
1514                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1515                         flow_verbs_translate_item_vxlan(dev_flow, items,
1516                                                         item_flags);
1517                         subpriority = MLX5_PRIORITY_MAP_L2;
1518                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1519                         break;
1520                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1521                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1522                                                             item_flags);
1523                         subpriority = MLX5_PRIORITY_MAP_L2;
1524                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1525                         break;
1526                 case RTE_FLOW_ITEM_TYPE_GRE:
1527                         flow_verbs_translate_item_gre(dev_flow, items,
1528                                                       item_flags);
1529                         subpriority = MLX5_PRIORITY_MAP_L2;
1530                         item_flags |= MLX5_FLOW_LAYER_GRE;
1531                         break;
1532                 case RTE_FLOW_ITEM_TYPE_MPLS:
1533                         flow_verbs_translate_item_mpls(dev_flow, items,
1534                                                        item_flags);
1535                         subpriority = MLX5_PRIORITY_MAP_L2;
1536                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1537                         break;
1538                 default:
1539                         return rte_flow_error_set(error, ENOTSUP,
1540                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1541                                                   NULL,
1542                                                   "item not supported");
1543                 }
1544         }
1545         dev_flow->layers = item_flags;
1546         dev_flow->verbs.attr->priority =
1547                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1548         return 0;
1549 }
1550
1551 /**
1552  * Remove the flow from the NIC but keeps it in memory.
1553  *
1554  * @param[in] dev
1555  *   Pointer to the Ethernet device structure.
1556  * @param[in, out] flow
1557  *   Pointer to flow structure.
1558  */
1559 static void
1560 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1561 {
1562         struct mlx5_flow_verbs *verbs;
1563         struct mlx5_flow *dev_flow;
1564
1565         if (!flow)
1566                 return;
1567         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1568                 verbs = &dev_flow->verbs;
1569                 if (verbs->flow) {
1570                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1571                         verbs->flow = NULL;
1572                 }
1573                 if (verbs->hrxq) {
1574                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1575                                 mlx5_hrxq_drop_release(dev);
1576                         else
1577                                 mlx5_hrxq_release(dev, verbs->hrxq);
1578                         verbs->hrxq = NULL;
1579                 }
1580         }
1581 }
1582
1583 /**
1584  * Remove the flow from the NIC and the memory.
1585  *
1586  * @param[in] dev
1587  *   Pointer to the Ethernet device structure.
1588  * @param[in, out] flow
1589  *   Pointer to flow structure.
1590  */
1591 static void
1592 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1593 {
1594         struct mlx5_flow *dev_flow;
1595
1596         if (!flow)
1597                 return;
1598         flow_verbs_remove(dev, flow);
1599         while (!LIST_EMPTY(&flow->dev_flows)) {
1600                 dev_flow = LIST_FIRST(&flow->dev_flows);
1601                 LIST_REMOVE(dev_flow, next);
1602                 rte_free(dev_flow);
1603         }
1604         if (flow->counter) {
1605                 flow_verbs_counter_release(flow->counter);
1606                 flow->counter = NULL;
1607         }
1608 }
1609
1610 /**
1611  * Apply the flow to the NIC.
1612  *
1613  * @param[in] dev
1614  *   Pointer to the Ethernet device structure.
1615  * @param[in, out] flow
1616  *   Pointer to flow structure.
1617  * @param[out] error
1618  *   Pointer to error structure.
1619  *
1620  * @return
1621  *   0 on success, a negative errno value otherwise and rte_errno is set.
1622  */
1623 static int
1624 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1625                  struct rte_flow_error *error)
1626 {
1627         struct mlx5_flow_verbs *verbs;
1628         struct mlx5_flow *dev_flow;
1629         int err;
1630
1631         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1632                 verbs = &dev_flow->verbs;
1633                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1634                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1635                         if (!verbs->hrxq) {
1636                                 rte_flow_error_set
1637                                         (error, errno,
1638                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1639                                          "cannot get drop hash queue");
1640                                 goto error;
1641                         }
1642                 } else {
1643                         struct mlx5_hrxq *hrxq;
1644
1645                         hrxq = mlx5_hrxq_get(dev, flow->key,
1646                                              MLX5_RSS_HASH_KEY_LEN,
1647                                              verbs->hash_fields,
1648                                              (*flow->queue),
1649                                              flow->rss.queue_num);
1650                         if (!hrxq)
1651                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1652                                                      MLX5_RSS_HASH_KEY_LEN,
1653                                                      verbs->hash_fields,
1654                                                      (*flow->queue),
1655                                                      flow->rss.queue_num,
1656                                                      !!(dev_flow->layers &
1657                                                       MLX5_FLOW_LAYER_TUNNEL));
1658                         if (!hrxq) {
1659                                 rte_flow_error_set
1660                                         (error, rte_errno,
1661                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1662                                          "cannot get hash queue");
1663                                 goto error;
1664                         }
1665                         verbs->hrxq = hrxq;
1666                 }
1667                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1668                                                      verbs->attr);
1669                 if (!verbs->flow) {
1670                         rte_flow_error_set(error, errno,
1671                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1672                                            NULL,
1673                                            "hardware refuses to create flow");
1674                         goto error;
1675                 }
1676         }
1677         return 0;
1678 error:
1679         err = rte_errno; /* Save rte_errno before cleanup. */
1680         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1681                 verbs = &dev_flow->verbs;
1682                 if (verbs->hrxq) {
1683                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1684                                 mlx5_hrxq_drop_release(dev);
1685                         else
1686                                 mlx5_hrxq_release(dev, verbs->hrxq);
1687                         verbs->hrxq = NULL;
1688                 }
1689         }
1690         rte_errno = err; /* Restore rte_errno. */
1691         return -rte_errno;
1692 }
1693
1694 /**
1695  * Query a flow.
1696  *
1697  * @see rte_flow_query()
1698  * @see rte_flow_ops
1699  */
1700 static int
1701 flow_verbs_query(struct rte_eth_dev *dev,
1702                  struct rte_flow *flow,
1703                  const struct rte_flow_action *actions,
1704                  void *data,
1705                  struct rte_flow_error *error)
1706 {
1707         int ret = -EINVAL;
1708
1709         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1710                 switch (actions->type) {
1711                 case RTE_FLOW_ACTION_TYPE_VOID:
1712                         break;
1713                 case RTE_FLOW_ACTION_TYPE_COUNT:
1714                         ret = flow_verbs_counter_query(dev, flow, data, error);
1715                         break;
1716                 default:
1717                         return rte_flow_error_set(error, ENOTSUP,
1718                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1719                                                   actions,
1720                                                   "action not supported");
1721                 }
1722         }
1723         return ret;
1724 }
1725
1726 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1727         .validate = flow_verbs_validate,
1728         .prepare = flow_verbs_prepare,
1729         .translate = flow_verbs_translate,
1730         .apply = flow_verbs_apply,
1731         .remove = flow_verbs_remove,
1732         .destroy = flow_verbs_destroy,
1733         .query = flow_verbs_query,
1734 };