New upstream version 18.02
[deb_dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox
4  */
5
6 /**
7  * @file
8  * Flow API operations for mlx4 driver.
9  */
10
11 #include <arpa/inet.h>
12 #include <assert.h>
13 #include <errno.h>
14 #include <stdalign.h>
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <string.h>
18 #include <sys/queue.h>
19
20 /* Verbs headers do not support -pedantic. */
21 #ifdef PEDANTIC
22 #pragma GCC diagnostic ignored "-Wpedantic"
23 #endif
24 #include <infiniband/verbs.h>
25 #ifdef PEDANTIC
26 #pragma GCC diagnostic error "-Wpedantic"
27 #endif
28
29 #include <rte_byteorder.h>
30 #include <rte_errno.h>
31 #include <rte_eth_ctrl.h>
32 #include <rte_ethdev_driver.h>
33 #include <rte_ether.h>
34 #include <rte_flow.h>
35 #include <rte_flow_driver.h>
36 #include <rte_malloc.h>
37
38 /* PMD headers. */
39 #include "mlx4.h"
40 #include "mlx4_glue.h"
41 #include "mlx4_flow.h"
42 #include "mlx4_rxtx.h"
43 #include "mlx4_utils.h"
44
45 /** Static initializer for a list of subsequent item types. */
46 #define NEXT_ITEM(...) \
47         (const enum rte_flow_item_type []){ \
48                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
49         }
50
51 /** Processor structure associated with a flow item. */
52 struct mlx4_flow_proc_item {
53         /** Bit-mask for fields supported by this PMD. */
54         const void *mask_support;
55         /** Bit-mask to use when @p item->mask is not provided. */
56         const void *mask_default;
57         /** Size in bytes for @p mask_support and @p mask_default. */
58         const unsigned int mask_sz;
59         /** Merge a pattern item into a flow rule handle. */
60         int (*merge)(struct rte_flow *flow,
61                      const struct rte_flow_item *item,
62                      const struct mlx4_flow_proc_item *proc,
63                      struct rte_flow_error *error);
64         /** Size in bytes of the destination structure. */
65         const unsigned int dst_sz;
66         /** List of possible subsequent items. */
67         const enum rte_flow_item_type *const next_item;
68 };
69
70 /** Shared resources for drop flow rules. */
71 struct mlx4_drop {
72         struct ibv_qp *qp; /**< QP target. */
73         struct ibv_cq *cq; /**< CQ associated with above QP. */
74         struct priv *priv; /**< Back pointer to private data. */
75         uint32_t refcnt; /**< Reference count. */
76 };
77
78 /**
79  * Convert DPDK RSS hash fields to their Verbs equivalent.
80  *
81  * This function returns the supported (default) set when @p rss_hf has
82  * special value (uint64_t)-1.
83  *
84  * @param priv
85  *   Pointer to private structure.
86  * @param rss_hf
87  *   Hash fields in DPDK format (see struct rte_eth_rss_conf).
88  *
89  * @return
90  *   A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1
91  *   otherwise and rte_errno is set.
92  */
93 uint64_t
94 mlx4_conv_rss_hf(struct priv *priv, uint64_t rss_hf)
95 {
96         enum { IPV4, IPV6, TCP, UDP, };
97         const uint64_t in[] = {
98                 [IPV4] = (ETH_RSS_IPV4 |
99                           ETH_RSS_FRAG_IPV4 |
100                           ETH_RSS_NONFRAG_IPV4_TCP |
101                           ETH_RSS_NONFRAG_IPV4_UDP |
102                           ETH_RSS_NONFRAG_IPV4_OTHER),
103                 [IPV6] = (ETH_RSS_IPV6 |
104                           ETH_RSS_FRAG_IPV6 |
105                           ETH_RSS_NONFRAG_IPV6_TCP |
106                           ETH_RSS_NONFRAG_IPV6_UDP |
107                           ETH_RSS_NONFRAG_IPV6_OTHER |
108                           ETH_RSS_IPV6_EX |
109                           ETH_RSS_IPV6_TCP_EX |
110                           ETH_RSS_IPV6_UDP_EX),
111                 [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP |
112                          ETH_RSS_NONFRAG_IPV6_TCP |
113                          ETH_RSS_IPV6_TCP_EX),
114                 [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP |
115                          ETH_RSS_NONFRAG_IPV6_UDP |
116                          ETH_RSS_IPV6_UDP_EX),
117         };
118         const uint64_t out[RTE_DIM(in)] = {
119                 [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
120                 [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
121                 [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
122                 [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
123         };
124         uint64_t seen = 0;
125         uint64_t conv = 0;
126         unsigned int i;
127
128         for (i = 0; i != RTE_DIM(in); ++i)
129                 if (rss_hf & in[i]) {
130                         seen |= rss_hf & in[i];
131                         conv |= out[i];
132                 }
133         if ((conv & priv->hw_rss_sup) == conv) {
134                 if (rss_hf == (uint64_t)-1) {
135                         /* Include inner RSS by default if supported. */
136                         conv |= priv->hw_rss_sup & IBV_RX_HASH_INNER;
137                         return conv;
138                 }
139                 if (!(rss_hf & ~seen))
140                         return conv;
141         }
142         rte_errno = ENOTSUP;
143         return (uint64_t)-1;
144 }
145
146 /**
147  * Merge Ethernet pattern item into flow rule handle.
148  *
149  * Additional mlx4-specific constraints on supported fields:
150  *
151  * - No support for partial masks, except in the specific case of matching
152  *   all multicast traffic (@p spec->dst and @p mask->dst equal to
153  *   01:00:00:00:00:00).
154  * - Not providing @p item->spec or providing an empty @p mask->dst is
155  *   *only* supported if the rule doesn't specify additional matching
156  *   criteria (i.e. rule is promiscuous-like).
157  *
158  * @param[in, out] flow
159  *   Flow rule handle to update.
160  * @param[in] item
161  *   Pattern item to merge.
162  * @param[in] proc
163  *   Associated item-processing object.
164  * @param[out] error
165  *   Perform verbose error reporting if not NULL.
166  *
167  * @return
168  *   0 on success, a negative errno value otherwise and rte_errno is set.
169  */
170 static int
171 mlx4_flow_merge_eth(struct rte_flow *flow,
172                     const struct rte_flow_item *item,
173                     const struct mlx4_flow_proc_item *proc,
174                     struct rte_flow_error *error)
175 {
176         const struct rte_flow_item_eth *spec = item->spec;
177         const struct rte_flow_item_eth *mask =
178                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
179         struct ibv_flow_spec_eth *eth;
180         const char *msg;
181         unsigned int i;
182
183         if (!mask) {
184                 flow->promisc = 1;
185         } else {
186                 uint32_t sum_dst = 0;
187                 uint32_t sum_src = 0;
188
189                 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
190                         sum_dst += mask->dst.addr_bytes[i];
191                         sum_src += mask->src.addr_bytes[i];
192                 }
193                 if (sum_src) {
194                         msg = "mlx4 does not support source MAC matching";
195                         goto error;
196                 } else if (!sum_dst) {
197                         flow->promisc = 1;
198                 } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
199                         if (!(spec->dst.addr_bytes[0] & 1)) {
200                                 msg = "mlx4 does not support the explicit"
201                                         " exclusion of all multicast traffic";
202                                 goto error;
203                         }
204                         flow->allmulti = 1;
205                 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
206                         msg = "mlx4 does not support matching partial"
207                                 " Ethernet fields";
208                         goto error;
209                 }
210         }
211         if (!flow->ibv_attr)
212                 return 0;
213         if (flow->promisc) {
214                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
215                 return 0;
216         }
217         if (flow->allmulti) {
218                 flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
219                 return 0;
220         }
221         ++flow->ibv_attr->num_of_specs;
222         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
223         *eth = (struct ibv_flow_spec_eth) {
224                 .type = IBV_FLOW_SPEC_ETH,
225                 .size = sizeof(*eth),
226         };
227         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
228         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
229         /* Remove unwanted bits from values. */
230         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
231                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
232         }
233         return 0;
234 error:
235         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
236                                   item, msg);
237 }
238
239 /**
240  * Merge VLAN pattern item into flow rule handle.
241  *
242  * Additional mlx4-specific constraints on supported fields:
243  *
244  * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
245  *   empty @p item->mask would also include non-VLAN traffic. Doing so is
246  *   therefore unsupported.
247  * - No support for partial masks.
248  *
249  * @param[in, out] flow
250  *   Flow rule handle to update.
251  * @param[in] item
252  *   Pattern item to merge.
253  * @param[in] proc
254  *   Associated item-processing object.
255  * @param[out] error
256  *   Perform verbose error reporting if not NULL.
257  *
258  * @return
259  *   0 on success, a negative errno value otherwise and rte_errno is set.
260  */
261 static int
262 mlx4_flow_merge_vlan(struct rte_flow *flow,
263                      const struct rte_flow_item *item,
264                      const struct mlx4_flow_proc_item *proc,
265                      struct rte_flow_error *error)
266 {
267         const struct rte_flow_item_vlan *spec = item->spec;
268         const struct rte_flow_item_vlan *mask =
269                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
270         struct ibv_flow_spec_eth *eth;
271         const char *msg;
272
273         if (!mask || !mask->tci) {
274                 msg = "mlx4 cannot match all VLAN traffic while excluding"
275                         " non-VLAN traffic, TCI VID must be specified";
276                 goto error;
277         }
278         if (mask->tci != RTE_BE16(0x0fff)) {
279                 msg = "mlx4 does not support partial TCI VID matching";
280                 goto error;
281         }
282         if (!flow->ibv_attr)
283                 return 0;
284         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
285                        sizeof(*eth));
286         eth->val.vlan_tag = spec->tci;
287         eth->mask.vlan_tag = mask->tci;
288         eth->val.vlan_tag &= eth->mask.vlan_tag;
289         return 0;
290 error:
291         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
292                                   item, msg);
293 }
294
295 /**
296  * Merge IPv4 pattern item into flow rule handle.
297  *
298  * Additional mlx4-specific constraints on supported fields:
299  *
300  * - No support for partial masks.
301  *
302  * @param[in, out] flow
303  *   Flow rule handle to update.
304  * @param[in] item
305  *   Pattern item to merge.
306  * @param[in] proc
307  *   Associated item-processing object.
308  * @param[out] error
309  *   Perform verbose error reporting if not NULL.
310  *
311  * @return
312  *   0 on success, a negative errno value otherwise and rte_errno is set.
313  */
314 static int
315 mlx4_flow_merge_ipv4(struct rte_flow *flow,
316                      const struct rte_flow_item *item,
317                      const struct mlx4_flow_proc_item *proc,
318                      struct rte_flow_error *error)
319 {
320         const struct rte_flow_item_ipv4 *spec = item->spec;
321         const struct rte_flow_item_ipv4 *mask =
322                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
323         struct ibv_flow_spec_ipv4 *ipv4;
324         const char *msg;
325
326         if (mask &&
327             ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
328              (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
329                 msg = "mlx4 does not support matching partial IPv4 fields";
330                 goto error;
331         }
332         if (!flow->ibv_attr)
333                 return 0;
334         ++flow->ibv_attr->num_of_specs;
335         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
336         *ipv4 = (struct ibv_flow_spec_ipv4) {
337                 .type = IBV_FLOW_SPEC_IPV4,
338                 .size = sizeof(*ipv4),
339         };
340         if (!spec)
341                 return 0;
342         ipv4->val = (struct ibv_flow_ipv4_filter) {
343                 .src_ip = spec->hdr.src_addr,
344                 .dst_ip = spec->hdr.dst_addr,
345         };
346         ipv4->mask = (struct ibv_flow_ipv4_filter) {
347                 .src_ip = mask->hdr.src_addr,
348                 .dst_ip = mask->hdr.dst_addr,
349         };
350         /* Remove unwanted bits from values. */
351         ipv4->val.src_ip &= ipv4->mask.src_ip;
352         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
353         return 0;
354 error:
355         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
356                                   item, msg);
357 }
358
359 /**
360  * Merge UDP pattern item into flow rule handle.
361  *
362  * Additional mlx4-specific constraints on supported fields:
363  *
364  * - No support for partial masks.
365  *
366  * @param[in, out] flow
367  *   Flow rule handle to update.
368  * @param[in] item
369  *   Pattern item to merge.
370  * @param[in] proc
371  *   Associated item-processing object.
372  * @param[out] error
373  *   Perform verbose error reporting if not NULL.
374  *
375  * @return
376  *   0 on success, a negative errno value otherwise and rte_errno is set.
377  */
378 static int
379 mlx4_flow_merge_udp(struct rte_flow *flow,
380                     const struct rte_flow_item *item,
381                     const struct mlx4_flow_proc_item *proc,
382                     struct rte_flow_error *error)
383 {
384         const struct rte_flow_item_udp *spec = item->spec;
385         const struct rte_flow_item_udp *mask =
386                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
387         struct ibv_flow_spec_tcp_udp *udp;
388         const char *msg;
389
390         if (mask &&
391             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
392              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
393                 msg = "mlx4 does not support matching partial UDP fields";
394                 goto error;
395         }
396         if (!flow->ibv_attr)
397                 return 0;
398         ++flow->ibv_attr->num_of_specs;
399         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
400         *udp = (struct ibv_flow_spec_tcp_udp) {
401                 .type = IBV_FLOW_SPEC_UDP,
402                 .size = sizeof(*udp),
403         };
404         if (!spec)
405                 return 0;
406         udp->val.dst_port = spec->hdr.dst_port;
407         udp->val.src_port = spec->hdr.src_port;
408         udp->mask.dst_port = mask->hdr.dst_port;
409         udp->mask.src_port = mask->hdr.src_port;
410         /* Remove unwanted bits from values. */
411         udp->val.src_port &= udp->mask.src_port;
412         udp->val.dst_port &= udp->mask.dst_port;
413         return 0;
414 error:
415         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
416                                   item, msg);
417 }
418
419 /**
420  * Merge TCP pattern item into flow rule handle.
421  *
422  * Additional mlx4-specific constraints on supported fields:
423  *
424  * - No support for partial masks.
425  *
426  * @param[in, out] flow
427  *   Flow rule handle to update.
428  * @param[in] item
429  *   Pattern item to merge.
430  * @param[in] proc
431  *   Associated item-processing object.
432  * @param[out] error
433  *   Perform verbose error reporting if not NULL.
434  *
435  * @return
436  *   0 on success, a negative errno value otherwise and rte_errno is set.
437  */
438 static int
439 mlx4_flow_merge_tcp(struct rte_flow *flow,
440                     const struct rte_flow_item *item,
441                     const struct mlx4_flow_proc_item *proc,
442                     struct rte_flow_error *error)
443 {
444         const struct rte_flow_item_tcp *spec = item->spec;
445         const struct rte_flow_item_tcp *mask =
446                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
447         struct ibv_flow_spec_tcp_udp *tcp;
448         const char *msg;
449
450         if (mask &&
451             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
452              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
453                 msg = "mlx4 does not support matching partial TCP fields";
454                 goto error;
455         }
456         if (!flow->ibv_attr)
457                 return 0;
458         ++flow->ibv_attr->num_of_specs;
459         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
460         *tcp = (struct ibv_flow_spec_tcp_udp) {
461                 .type = IBV_FLOW_SPEC_TCP,
462                 .size = sizeof(*tcp),
463         };
464         if (!spec)
465                 return 0;
466         tcp->val.dst_port = spec->hdr.dst_port;
467         tcp->val.src_port = spec->hdr.src_port;
468         tcp->mask.dst_port = mask->hdr.dst_port;
469         tcp->mask.src_port = mask->hdr.src_port;
470         /* Remove unwanted bits from values. */
471         tcp->val.src_port &= tcp->mask.src_port;
472         tcp->val.dst_port &= tcp->mask.dst_port;
473         return 0;
474 error:
475         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
476                                   item, msg);
477 }
478
479 /**
480  * Perform basic sanity checks on a pattern item.
481  *
482  * @param[in] item
483  *   Item specification.
484  * @param[in] proc
485  *   Associated item-processing object.
486  * @param[out] error
487  *   Perform verbose error reporting if not NULL.
488  *
489  * @return
490  *   0 on success, a negative errno value otherwise and rte_errno is set.
491  */
492 static int
493 mlx4_flow_item_check(const struct rte_flow_item *item,
494                      const struct mlx4_flow_proc_item *proc,
495                      struct rte_flow_error *error)
496 {
497         const uint8_t *mask;
498         unsigned int i;
499
500         /* item->last and item->mask cannot exist without item->spec. */
501         if (!item->spec && (item->mask || item->last))
502                 return rte_flow_error_set
503                         (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
504                          "\"mask\" or \"last\" field provided without a"
505                          " corresponding \"spec\"");
506         /* No spec, no mask, no problem. */
507         if (!item->spec)
508                 return 0;
509         mask = item->mask ?
510                 (const uint8_t *)item->mask :
511                 (const uint8_t *)proc->mask_default;
512         assert(mask);
513         /*
514          * Single-pass check to make sure that:
515          * - Mask is supported, no bits are set outside proc->mask_support.
516          * - Both item->spec and item->last are included in mask.
517          */
518         for (i = 0; i != proc->mask_sz; ++i) {
519                 if (!mask[i])
520                         continue;
521                 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
522                     ((const uint8_t *)proc->mask_support)[i])
523                         return rte_flow_error_set
524                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
525                                  item, "unsupported field found in \"mask\"");
526                 if (item->last &&
527                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
528                     (((const uint8_t *)item->last)[i] & mask[i]))
529                         return rte_flow_error_set
530                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
531                                  item,
532                                  "range between \"spec\" and \"last\""
533                                  " is larger than \"mask\"");
534         }
535         return 0;
536 }
537
538 /** Graph of supported items and associated actions. */
539 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
540         [RTE_FLOW_ITEM_TYPE_END] = {
541                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
542         },
543         [RTE_FLOW_ITEM_TYPE_ETH] = {
544                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
545                                        RTE_FLOW_ITEM_TYPE_IPV4),
546                 .mask_support = &(const struct rte_flow_item_eth){
547                         /* Only destination MAC can be matched. */
548                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
549                 },
550                 .mask_default = &rte_flow_item_eth_mask,
551                 .mask_sz = sizeof(struct rte_flow_item_eth),
552                 .merge = mlx4_flow_merge_eth,
553                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
554         },
555         [RTE_FLOW_ITEM_TYPE_VLAN] = {
556                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
557                 .mask_support = &(const struct rte_flow_item_vlan){
558                         /* Only TCI VID matching is supported. */
559                         .tci = RTE_BE16(0x0fff),
560                 },
561                 .mask_default = &rte_flow_item_vlan_mask,
562                 .mask_sz = sizeof(struct rte_flow_item_vlan),
563                 .merge = mlx4_flow_merge_vlan,
564                 .dst_sz = 0,
565         },
566         [RTE_FLOW_ITEM_TYPE_IPV4] = {
567                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
568                                        RTE_FLOW_ITEM_TYPE_TCP),
569                 .mask_support = &(const struct rte_flow_item_ipv4){
570                         .hdr = {
571                                 .src_addr = RTE_BE32(0xffffffff),
572                                 .dst_addr = RTE_BE32(0xffffffff),
573                         },
574                 },
575                 .mask_default = &rte_flow_item_ipv4_mask,
576                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
577                 .merge = mlx4_flow_merge_ipv4,
578                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
579         },
580         [RTE_FLOW_ITEM_TYPE_UDP] = {
581                 .mask_support = &(const struct rte_flow_item_udp){
582                         .hdr = {
583                                 .src_port = RTE_BE16(0xffff),
584                                 .dst_port = RTE_BE16(0xffff),
585                         },
586                 },
587                 .mask_default = &rte_flow_item_udp_mask,
588                 .mask_sz = sizeof(struct rte_flow_item_udp),
589                 .merge = mlx4_flow_merge_udp,
590                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
591         },
592         [RTE_FLOW_ITEM_TYPE_TCP] = {
593                 .mask_support = &(const struct rte_flow_item_tcp){
594                         .hdr = {
595                                 .src_port = RTE_BE16(0xffff),
596                                 .dst_port = RTE_BE16(0xffff),
597                         },
598                 },
599                 .mask_default = &rte_flow_item_tcp_mask,
600                 .mask_sz = sizeof(struct rte_flow_item_tcp),
601                 .merge = mlx4_flow_merge_tcp,
602                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
603         },
604 };
605
606 /**
607  * Make sure a flow rule is supported and initialize associated structure.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] attr
612  *   Flow rule attributes.
613  * @param[in] pattern
614  *   Pattern specification (list terminated by the END pattern item).
615  * @param[in] actions
616  *   Associated actions (list terminated by the END action).
617  * @param[out] error
618  *   Perform verbose error reporting if not NULL.
619  * @param[in, out] addr
620  *   Buffer where the resulting flow rule handle pointer must be stored.
621  *   If NULL, stop processing after validation stage.
622  *
623  * @return
624  *   0 on success, a negative errno value otherwise and rte_errno is set.
625  */
626 static int
627 mlx4_flow_prepare(struct priv *priv,
628                   const struct rte_flow_attr *attr,
629                   const struct rte_flow_item pattern[],
630                   const struct rte_flow_action actions[],
631                   struct rte_flow_error *error,
632                   struct rte_flow **addr)
633 {
634         const struct rte_flow_item *item;
635         const struct rte_flow_action *action;
636         const struct mlx4_flow_proc_item *proc;
637         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
638         struct rte_flow *flow = &temp;
639         const char *msg = NULL;
640
641         if (attr->group)
642                 return rte_flow_error_set
643                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
644                          NULL, "groups are not supported");
645         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
646                 return rte_flow_error_set
647                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
648                          NULL, "maximum priority level is "
649                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
650         if (attr->egress)
651                 return rte_flow_error_set
652                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
653                          NULL, "egress is not supported");
654         if (!attr->ingress)
655                 return rte_flow_error_set
656                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
657                          NULL, "only ingress is supported");
658 fill:
659         proc = mlx4_flow_proc_item_list;
660         /* Go over pattern. */
661         for (item = pattern; item->type; ++item) {
662                 const struct mlx4_flow_proc_item *next = NULL;
663                 unsigned int i;
664                 int err;
665
666                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
667                         continue;
668                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
669                         flow->internal = 1;
670                         continue;
671                 }
672                 if (flow->promisc || flow->allmulti) {
673                         msg = "mlx4 does not support additional matching"
674                                 " criteria combined with indiscriminate"
675                                 " matching on Ethernet headers";
676                         goto exit_item_not_supported;
677                 }
678                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
679                         if (proc->next_item[i] == item->type) {
680                                 next = &mlx4_flow_proc_item_list[item->type];
681                                 break;
682                         }
683                 }
684                 if (!next)
685                         goto exit_item_not_supported;
686                 proc = next;
687                 /*
688                  * Perform basic sanity checks only once, while handle is
689                  * not allocated.
690                  */
691                 if (flow == &temp) {
692                         err = mlx4_flow_item_check(item, proc, error);
693                         if (err)
694                                 return err;
695                 }
696                 if (proc->merge) {
697                         err = proc->merge(flow, item, proc, error);
698                         if (err)
699                                 return err;
700                 }
701                 flow->ibv_attr_size += proc->dst_sz;
702         }
703         /* Go over actions list. */
704         for (action = actions; action->type; ++action) {
705                 switch (action->type) {
706                         const struct rte_flow_action_queue *queue;
707                         const struct rte_flow_action_rss *rss;
708                         const struct rte_eth_rss_conf *rss_conf;
709                         unsigned int i;
710
711                 case RTE_FLOW_ACTION_TYPE_VOID:
712                         continue;
713                 case RTE_FLOW_ACTION_TYPE_DROP:
714                         flow->drop = 1;
715                         break;
716                 case RTE_FLOW_ACTION_TYPE_QUEUE:
717                         if (flow->rss)
718                                 break;
719                         queue = action->conf;
720                         if (queue->index >= priv->dev->data->nb_rx_queues) {
721                                 msg = "queue target index beyond number of"
722                                         " configured Rx queues";
723                                 goto exit_action_not_supported;
724                         }
725                         flow->rss = mlx4_rss_get
726                                 (priv, 0, mlx4_rss_hash_key_default, 1,
727                                  &queue->index);
728                         if (!flow->rss) {
729                                 msg = "not enough resources for additional"
730                                         " single-queue RSS context";
731                                 goto exit_action_not_supported;
732                         }
733                         break;
734                 case RTE_FLOW_ACTION_TYPE_RSS:
735                         if (flow->rss)
736                                 break;
737                         rss = action->conf;
738                         /* Default RSS configuration if none is provided. */
739                         rss_conf =
740                                 rss->rss_conf ?
741                                 rss->rss_conf :
742                                 &(struct rte_eth_rss_conf){
743                                         .rss_key = mlx4_rss_hash_key_default,
744                                         .rss_key_len = MLX4_RSS_HASH_KEY_SIZE,
745                                         .rss_hf = -1,
746                                 };
747                         /* Sanity checks. */
748                         for (i = 0; i < rss->num; ++i)
749                                 if (rss->queue[i] >=
750                                     priv->dev->data->nb_rx_queues)
751                                         break;
752                         if (i != rss->num) {
753                                 msg = "queue index target beyond number of"
754                                         " configured Rx queues";
755                                 goto exit_action_not_supported;
756                         }
757                         if (!rte_is_power_of_2(rss->num)) {
758                                 msg = "for RSS, mlx4 requires the number of"
759                                         " queues to be a power of two";
760                                 goto exit_action_not_supported;
761                         }
762                         if (rss_conf->rss_key_len !=
763                             sizeof(flow->rss->key)) {
764                                 msg = "mlx4 supports exactly one RSS hash key"
765                                         " length: "
766                                         MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
767                                 goto exit_action_not_supported;
768                         }
769                         for (i = 1; i < rss->num; ++i)
770                                 if (rss->queue[i] - rss->queue[i - 1] != 1)
771                                         break;
772                         if (i != rss->num) {
773                                 msg = "mlx4 requires RSS contexts to use"
774                                         " consecutive queue indices only";
775                                 goto exit_action_not_supported;
776                         }
777                         if (rss->queue[0] % rss->num) {
778                                 msg = "mlx4 requires the first queue of a RSS"
779                                         " context to be aligned on a multiple"
780                                         " of the context size";
781                                 goto exit_action_not_supported;
782                         }
783                         flow->rss = mlx4_rss_get
784                                 (priv,
785                                  mlx4_conv_rss_hf(priv, rss_conf->rss_hf),
786                                  rss_conf->rss_key, rss->num, rss->queue);
787                         if (!flow->rss) {
788                                 msg = "either invalid parameters or not enough"
789                                         " resources for additional multi-queue"
790                                         " RSS context";
791                                 goto exit_action_not_supported;
792                         }
793                         break;
794                 default:
795                         goto exit_action_not_supported;
796                 }
797         }
798         if (!flow->rss && !flow->drop)
799                 return rte_flow_error_set
800                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
801                          NULL, "no valid action");
802         /* Validation ends here. */
803         if (!addr) {
804                 if (flow->rss)
805                         mlx4_rss_put(flow->rss);
806                 return 0;
807         }
808         if (flow == &temp) {
809                 /* Allocate proper handle based on collected data. */
810                 const struct mlx4_malloc_vec vec[] = {
811                         {
812                                 .align = alignof(struct rte_flow),
813                                 .size = sizeof(*flow),
814                                 .addr = (void **)&flow,
815                         },
816                         {
817                                 .align = alignof(struct ibv_flow_attr),
818                                 .size = temp.ibv_attr_size,
819                                 .addr = (void **)&temp.ibv_attr,
820                         },
821                 };
822
823                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
824                         return rte_flow_error_set
825                                 (error, -rte_errno,
826                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
827                                  "flow rule handle allocation failure");
828                 /* Most fields will be updated by second pass. */
829                 *flow = (struct rte_flow){
830                         .ibv_attr = temp.ibv_attr,
831                         .ibv_attr_size = sizeof(*flow->ibv_attr),
832                         .rss = temp.rss,
833                 };
834                 *flow->ibv_attr = (struct ibv_flow_attr){
835                         .type = IBV_FLOW_ATTR_NORMAL,
836                         .size = sizeof(*flow->ibv_attr),
837                         .priority = attr->priority,
838                         .port = priv->port,
839                 };
840                 goto fill;
841         }
842         *addr = flow;
843         return 0;
844 exit_item_not_supported:
845         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
846                                   item, msg ? msg : "item not supported");
847 exit_action_not_supported:
848         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
849                                   action, msg ? msg : "action not supported");
850 }
851
852 /**
853  * Validate a flow supported by the NIC.
854  *
855  * @see rte_flow_validate()
856  * @see rte_flow_ops
857  */
858 static int
859 mlx4_flow_validate(struct rte_eth_dev *dev,
860                    const struct rte_flow_attr *attr,
861                    const struct rte_flow_item pattern[],
862                    const struct rte_flow_action actions[],
863                    struct rte_flow_error *error)
864 {
865         struct priv *priv = dev->data->dev_private;
866
867         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
868 }
869
870 /**
871  * Get a drop flow rule resources instance.
872  *
873  * @param priv
874  *   Pointer to private structure.
875  *
876  * @return
877  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
878  *   is set.
879  */
880 static struct mlx4_drop *
881 mlx4_drop_get(struct priv *priv)
882 {
883         struct mlx4_drop *drop = priv->drop;
884
885         if (drop) {
886                 assert(drop->refcnt);
887                 assert(drop->priv == priv);
888                 ++drop->refcnt;
889                 return drop;
890         }
891         drop = rte_malloc(__func__, sizeof(*drop), 0);
892         if (!drop)
893                 goto error;
894         *drop = (struct mlx4_drop){
895                 .priv = priv,
896                 .refcnt = 1,
897         };
898         drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
899         if (!drop->cq)
900                 goto error;
901         drop->qp = mlx4_glue->create_qp
902                 (priv->pd,
903                  &(struct ibv_qp_init_attr){
904                         .send_cq = drop->cq,
905                         .recv_cq = drop->cq,
906                         .qp_type = IBV_QPT_RAW_PACKET,
907                  });
908         if (!drop->qp)
909                 goto error;
910         priv->drop = drop;
911         return drop;
912 error:
913         if (drop->qp)
914                 claim_zero(mlx4_glue->destroy_qp(drop->qp));
915         if (drop->cq)
916                 claim_zero(mlx4_glue->destroy_cq(drop->cq));
917         if (drop)
918                 rte_free(drop);
919         rte_errno = ENOMEM;
920         return NULL;
921 }
922
923 /**
924  * Give back a drop flow rule resources instance.
925  *
926  * @param drop
927  *   Pointer to drop flow rule resources.
928  */
929 static void
930 mlx4_drop_put(struct mlx4_drop *drop)
931 {
932         assert(drop->refcnt);
933         if (--drop->refcnt)
934                 return;
935         drop->priv->drop = NULL;
936         claim_zero(mlx4_glue->destroy_qp(drop->qp));
937         claim_zero(mlx4_glue->destroy_cq(drop->cq));
938         rte_free(drop);
939 }
940
941 /**
942  * Toggle a configured flow rule.
943  *
944  * @param priv
945  *   Pointer to private structure.
946  * @param flow
947  *   Flow rule handle to toggle.
948  * @param enable
949  *   Whether associated Verbs flow must be created or removed.
950  * @param[out] error
951  *   Perform verbose error reporting if not NULL.
952  *
953  * @return
954  *   0 on success, a negative errno value otherwise and rte_errno is set.
955  */
956 static int
957 mlx4_flow_toggle(struct priv *priv,
958                  struct rte_flow *flow,
959                  int enable,
960                  struct rte_flow_error *error)
961 {
962         struct ibv_qp *qp = NULL;
963         const char *msg;
964         int err;
965
966         if (!enable) {
967                 if (!flow->ibv_flow)
968                         return 0;
969                 claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
970                 flow->ibv_flow = NULL;
971                 if (flow->drop)
972                         mlx4_drop_put(priv->drop);
973                 else if (flow->rss)
974                         mlx4_rss_detach(flow->rss);
975                 return 0;
976         }
977         assert(flow->ibv_attr);
978         if (!flow->internal &&
979             !priv->isolated &&
980             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
981                 if (flow->ibv_flow) {
982                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
983                         flow->ibv_flow = NULL;
984                         if (flow->drop)
985                                 mlx4_drop_put(priv->drop);
986                         else if (flow->rss)
987                                 mlx4_rss_detach(flow->rss);
988                 }
989                 err = EACCES;
990                 msg = ("priority level "
991                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
992                        " is reserved when not in isolated mode");
993                 goto error;
994         }
995         if (flow->rss) {
996                 struct mlx4_rss *rss = flow->rss;
997                 int missing = 0;
998                 unsigned int i;
999
1000                 /* Stop at the first nonexistent target queue. */
1001                 for (i = 0; i != rss->queues; ++i)
1002                         if (rss->queue_id[i] >=
1003                             priv->dev->data->nb_rx_queues ||
1004                             !priv->dev->data->rx_queues[rss->queue_id[i]]) {
1005                                 missing = 1;
1006                                 break;
1007                         }
1008                 if (flow->ibv_flow) {
1009                         if (missing ^ !flow->drop)
1010                                 return 0;
1011                         /* Verbs flow needs updating. */
1012                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1013                         flow->ibv_flow = NULL;
1014                         if (flow->drop)
1015                                 mlx4_drop_put(priv->drop);
1016                         else
1017                                 mlx4_rss_detach(rss);
1018                 }
1019                 if (!missing) {
1020                         err = mlx4_rss_attach(rss);
1021                         if (err) {
1022                                 err = -err;
1023                                 msg = "cannot create indirection table or hash"
1024                                         " QP to associate flow rule with";
1025                                 goto error;
1026                         }
1027                         qp = rss->qp;
1028                 }
1029                 /* A missing target queue drops traffic implicitly. */
1030                 flow->drop = missing;
1031         }
1032         if (flow->drop) {
1033                 if (flow->ibv_flow)
1034                         return 0;
1035                 mlx4_drop_get(priv);
1036                 if (!priv->drop) {
1037                         err = rte_errno;
1038                         msg = "resources for drop flow rule cannot be created";
1039                         goto error;
1040                 }
1041                 qp = priv->drop->qp;
1042         }
1043         assert(qp);
1044         if (flow->ibv_flow)
1045                 return 0;
1046         flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
1047         if (flow->ibv_flow)
1048                 return 0;
1049         if (flow->drop)
1050                 mlx4_drop_put(priv->drop);
1051         else if (flow->rss)
1052                 mlx4_rss_detach(flow->rss);
1053         err = errno;
1054         msg = "flow rule rejected by device";
1055 error:
1056         return rte_flow_error_set
1057                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
1058 }
1059
1060 /**
1061  * Create a flow.
1062  *
1063  * @see rte_flow_create()
1064  * @see rte_flow_ops
1065  */
1066 static struct rte_flow *
1067 mlx4_flow_create(struct rte_eth_dev *dev,
1068                  const struct rte_flow_attr *attr,
1069                  const struct rte_flow_item pattern[],
1070                  const struct rte_flow_action actions[],
1071                  struct rte_flow_error *error)
1072 {
1073         struct priv *priv = dev->data->dev_private;
1074         struct rte_flow *flow;
1075         int err;
1076
1077         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
1078         if (err)
1079                 return NULL;
1080         err = mlx4_flow_toggle(priv, flow, priv->started, error);
1081         if (!err) {
1082                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
1083
1084                 /* New rules are inserted after internal ones. */
1085                 if (!curr || !curr->internal) {
1086                         LIST_INSERT_HEAD(&priv->flows, flow, next);
1087                 } else {
1088                         while (LIST_NEXT(curr, next) &&
1089                                LIST_NEXT(curr, next)->internal)
1090                                 curr = LIST_NEXT(curr, next);
1091                         LIST_INSERT_AFTER(curr, flow, next);
1092                 }
1093                 return flow;
1094         }
1095         if (flow->rss)
1096                 mlx4_rss_put(flow->rss);
1097         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1098                            error->message);
1099         rte_free(flow);
1100         return NULL;
1101 }
1102
1103 /**
1104  * Configure isolated mode.
1105  *
1106  * @see rte_flow_isolate()
1107  * @see rte_flow_ops
1108  */
1109 static int
1110 mlx4_flow_isolate(struct rte_eth_dev *dev,
1111                   int enable,
1112                   struct rte_flow_error *error)
1113 {
1114         struct priv *priv = dev->data->dev_private;
1115
1116         if (!!enable == !!priv->isolated)
1117                 return 0;
1118         priv->isolated = !!enable;
1119         if (mlx4_flow_sync(priv, error)) {
1120                 priv->isolated = !enable;
1121                 return -rte_errno;
1122         }
1123         return 0;
1124 }
1125
1126 /**
1127  * Destroy a flow rule.
1128  *
1129  * @see rte_flow_destroy()
1130  * @see rte_flow_ops
1131  */
1132 static int
1133 mlx4_flow_destroy(struct rte_eth_dev *dev,
1134                   struct rte_flow *flow,
1135                   struct rte_flow_error *error)
1136 {
1137         struct priv *priv = dev->data->dev_private;
1138         int err = mlx4_flow_toggle(priv, flow, 0, error);
1139
1140         if (err)
1141                 return err;
1142         LIST_REMOVE(flow, next);
1143         if (flow->rss)
1144                 mlx4_rss_put(flow->rss);
1145         rte_free(flow);
1146         return 0;
1147 }
1148
1149 /**
1150  * Destroy user-configured flow rules.
1151  *
1152  * This function skips internal flows rules.
1153  *
1154  * @see rte_flow_flush()
1155  * @see rte_flow_ops
1156  */
1157 static int
1158 mlx4_flow_flush(struct rte_eth_dev *dev,
1159                 struct rte_flow_error *error)
1160 {
1161         struct priv *priv = dev->data->dev_private;
1162         struct rte_flow *flow = LIST_FIRST(&priv->flows);
1163
1164         while (flow) {
1165                 struct rte_flow *next = LIST_NEXT(flow, next);
1166
1167                 if (!flow->internal)
1168                         mlx4_flow_destroy(dev, flow, error);
1169                 flow = next;
1170         }
1171         return 0;
1172 }
1173
1174 /**
1175  * Helper function to determine the next configured VLAN filter.
1176  *
1177  * @param priv
1178  *   Pointer to private structure.
1179  * @param vlan
1180  *   VLAN ID to use as a starting point.
1181  *
1182  * @return
1183  *   Next configured VLAN ID or a high value (>= 4096) if there is none.
1184  */
1185 static uint16_t
1186 mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
1187 {
1188         while (vlan < 4096) {
1189                 if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
1190                     (UINT64_C(1) << (vlan % 64)))
1191                         return vlan;
1192                 ++vlan;
1193         }
1194         return vlan;
1195 }
1196
1197 /**
1198  * Generate internal flow rules.
1199  *
1200  * Various flow rules are created depending on the mode the device is in:
1201  *
1202  * 1. Promiscuous:
1203  *       port MAC + broadcast + catch-all (VLAN filtering is ignored).
1204  * 2. All multicast:
1205  *       port MAC/VLAN + broadcast + catch-all multicast.
1206  * 3. Otherwise:
1207  *       port MAC/VLAN + broadcast MAC/VLAN.
1208  *
1209  * About MAC flow rules:
1210  *
1211  * - MAC flow rules are generated from @p dev->data->mac_addrs
1212  *   (@p priv->mac array).
1213  * - An additional flow rule for Ethernet broadcasts is also generated.
1214  * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
1215  *   is enabled and VLAN filters are configured.
1216  *
1217  * @param priv
1218  *   Pointer to private structure.
1219  * @param[out] error
1220  *   Perform verbose error reporting if not NULL.
1221  *
1222  * @return
1223  *   0 on success, a negative errno value otherwise and rte_errno is set.
1224  */
1225 static int
1226 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1227 {
1228         struct rte_flow_attr attr = {
1229                 .priority = MLX4_FLOW_PRIORITY_LAST,
1230                 .ingress = 1,
1231         };
1232         struct rte_flow_item_eth eth_spec;
1233         const struct rte_flow_item_eth eth_mask = {
1234                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1235         };
1236         const struct rte_flow_item_eth eth_allmulti = {
1237                 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1238         };
1239         struct rte_flow_item_vlan vlan_spec;
1240         const struct rte_flow_item_vlan vlan_mask = {
1241                 .tci = RTE_BE16(0x0fff),
1242         };
1243         struct rte_flow_item pattern[] = {
1244                 {
1245                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1246                 },
1247                 {
1248                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1249                         .spec = &eth_spec,
1250                         .mask = &eth_mask,
1251                 },
1252                 {
1253                         /* Replaced with VLAN if filtering is enabled. */
1254                         .type = RTE_FLOW_ITEM_TYPE_END,
1255                 },
1256                 {
1257                         .type = RTE_FLOW_ITEM_TYPE_END,
1258                 },
1259         };
1260         /*
1261          * Round number of queues down to their previous power of 2 to
1262          * comply with RSS context limitations. Extra queues silently do not
1263          * get RSS by default.
1264          */
1265         uint32_t queues =
1266                 rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
1267         alignas(struct rte_flow_action_rss) uint8_t rss_conf_data
1268                 [offsetof(struct rte_flow_action_rss, queue) +
1269                  sizeof(((struct rte_flow_action_rss *)0)->queue[0]) * queues];
1270         struct rte_flow_action_rss *rss_conf = (void *)rss_conf_data;
1271         struct rte_flow_action actions[] = {
1272                 {
1273                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1274                         .conf = rss_conf,
1275                 },
1276                 {
1277                         .type = RTE_FLOW_ACTION_TYPE_END,
1278                 },
1279         };
1280         struct ether_addr *rule_mac = &eth_spec.dst;
1281         rte_be16_t *rule_vlan =
1282                 (priv->dev->data->dev_conf.rxmode.offloads &
1283                  DEV_RX_OFFLOAD_VLAN_FILTER) &&
1284                 !priv->dev->data->promiscuous ?
1285                 &vlan_spec.tci :
1286                 NULL;
1287         uint16_t vlan = 0;
1288         struct rte_flow *flow;
1289         unsigned int i;
1290         int err = 0;
1291
1292         /* Nothing to be done if there are no Rx queues. */
1293         if (!queues)
1294                 goto error;
1295         /* Prepare default RSS configuration. */
1296         *rss_conf = (struct rte_flow_action_rss){
1297                 .rss_conf = NULL, /* Rely on default fallback settings. */
1298                 .num = queues,
1299         };
1300         for (i = 0; i != queues; ++i)
1301                 rss_conf->queue[i] = i;
1302         /*
1303          * Set up VLAN item if filtering is enabled and at least one VLAN
1304          * filter is configured.
1305          */
1306         if (rule_vlan) {
1307                 vlan = mlx4_flow_internal_next_vlan(priv, 0);
1308                 if (vlan < 4096) {
1309                         pattern[2] = (struct rte_flow_item){
1310                                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
1311                                 .spec = &vlan_spec,
1312                                 .mask = &vlan_mask,
1313                         };
1314 next_vlan:
1315                         *rule_vlan = rte_cpu_to_be_16(vlan);
1316                 } else {
1317                         rule_vlan = NULL;
1318                 }
1319         }
1320         for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1321                 const struct ether_addr *mac;
1322
1323                 /* Broadcasts are handled by an extra iteration. */
1324                 if (i < RTE_DIM(priv->mac))
1325                         mac = &priv->mac[i];
1326                 else
1327                         mac = &eth_mask.dst;
1328                 if (is_zero_ether_addr(mac))
1329                         continue;
1330                 /* Check if MAC flow rule is already present. */
1331                 for (flow = LIST_FIRST(&priv->flows);
1332                      flow && flow->internal;
1333                      flow = LIST_NEXT(flow, next)) {
1334                         const struct ibv_flow_spec_eth *eth =
1335                                 (const void *)((uintptr_t)flow->ibv_attr +
1336                                                sizeof(*flow->ibv_attr));
1337                         unsigned int j;
1338
1339                         if (!flow->mac)
1340                                 continue;
1341                         assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
1342                         assert(flow->ibv_attr->num_of_specs == 1);
1343                         assert(eth->type == IBV_FLOW_SPEC_ETH);
1344                         assert(flow->rss);
1345                         if (rule_vlan &&
1346                             (eth->val.vlan_tag != *rule_vlan ||
1347                              eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1348                                 continue;
1349                         if (!rule_vlan && eth->mask.vlan_tag)
1350                                 continue;
1351                         for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1352                                 if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1353                                     eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1354                                     eth->val.src_mac[j] != UINT8_C(0x00) ||
1355                                     eth->mask.src_mac[j] != UINT8_C(0x00))
1356                                         break;
1357                         if (j != sizeof(mac->addr_bytes))
1358                                 continue;
1359                         if (flow->rss->queues != queues ||
1360                             memcmp(flow->rss->queue_id, rss_conf->queue,
1361                                    queues * sizeof(flow->rss->queue_id[0])))
1362                                 continue;
1363                         break;
1364                 }
1365                 if (!flow || !flow->internal) {
1366                         /* Not found, create a new flow rule. */
1367                         memcpy(rule_mac, mac, sizeof(*mac));
1368                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1369                                                 actions, error);
1370                         if (!flow) {
1371                                 err = -rte_errno;
1372                                 goto error;
1373                         }
1374                 }
1375                 flow->select = 1;
1376                 flow->mac = 1;
1377         }
1378         if (rule_vlan) {
1379                 vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1380                 if (vlan < 4096)
1381                         goto next_vlan;
1382         }
1383         /* Take care of promiscuous and all multicast flow rules. */
1384         if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
1385                 for (flow = LIST_FIRST(&priv->flows);
1386                      flow && flow->internal;
1387                      flow = LIST_NEXT(flow, next)) {
1388                         if (priv->dev->data->promiscuous) {
1389                                 if (flow->promisc)
1390                                         break;
1391                         } else {
1392                                 assert(priv->dev->data->all_multicast);
1393                                 if (flow->allmulti)
1394                                         break;
1395                         }
1396                 }
1397                 if (flow && flow->internal) {
1398                         assert(flow->rss);
1399                         if (flow->rss->queues != queues ||
1400                             memcmp(flow->rss->queue_id, rss_conf->queue,
1401                                    queues * sizeof(flow->rss->queue_id[0])))
1402                                 flow = NULL;
1403                 }
1404                 if (!flow || !flow->internal) {
1405                         /* Not found, create a new flow rule. */
1406                         if (priv->dev->data->promiscuous) {
1407                                 pattern[1].spec = NULL;
1408                                 pattern[1].mask = NULL;
1409                         } else {
1410                                 assert(priv->dev->data->all_multicast);
1411                                 pattern[1].spec = &eth_allmulti;
1412                                 pattern[1].mask = &eth_allmulti;
1413                         }
1414                         pattern[2] = pattern[3];
1415                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1416                                                 actions, error);
1417                         if (!flow) {
1418                                 err = -rte_errno;
1419                                 goto error;
1420                         }
1421                 }
1422                 assert(flow->promisc || flow->allmulti);
1423                 flow->select = 1;
1424         }
1425 error:
1426         /* Clear selection and clean up stale internal flow rules. */
1427         flow = LIST_FIRST(&priv->flows);
1428         while (flow && flow->internal) {
1429                 struct rte_flow *next = LIST_NEXT(flow, next);
1430
1431                 if (!flow->select)
1432                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1433                 else
1434                         flow->select = 0;
1435                 flow = next;
1436         }
1437         return err;
1438 }
1439
1440 /**
1441  * Synchronize flow rules.
1442  *
1443  * This function synchronizes flow rules with the state of the device by
1444  * taking into account isolated mode and whether target queues are
1445  * configured.
1446  *
1447  * @param priv
1448  *   Pointer to private structure.
1449  * @param[out] error
1450  *   Perform verbose error reporting if not NULL.
1451  *
1452  * @return
1453  *   0 on success, a negative errno value otherwise and rte_errno is set.
1454  */
1455 int
1456 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1457 {
1458         struct rte_flow *flow;
1459         int ret;
1460
1461         /* Internal flow rules are guaranteed to come first in the list. */
1462         if (priv->isolated) {
1463                 /*
1464                  * Get rid of them in isolated mode, stop at the first
1465                  * non-internal rule found.
1466                  */
1467                 for (flow = LIST_FIRST(&priv->flows);
1468                      flow && flow->internal;
1469                      flow = LIST_FIRST(&priv->flows))
1470                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1471         } else {
1472                 /* Refresh internal rules. */
1473                 ret = mlx4_flow_internal(priv, error);
1474                 if (ret)
1475                         return ret;
1476         }
1477         /* Toggle the remaining flow rules . */
1478         LIST_FOREACH(flow, &priv->flows, next) {
1479                 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1480                 if (ret)
1481                         return ret;
1482         }
1483         if (!priv->started)
1484                 assert(!priv->drop);
1485         return 0;
1486 }
1487
1488 /**
1489  * Clean up all flow rules.
1490  *
1491  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1492  * rules regardless of whether they are internal or user-configured.
1493  *
1494  * @param priv
1495  *   Pointer to private structure.
1496  */
1497 void
1498 mlx4_flow_clean(struct priv *priv)
1499 {
1500         struct rte_flow *flow;
1501
1502         while ((flow = LIST_FIRST(&priv->flows)))
1503                 mlx4_flow_destroy(priv->dev, flow, NULL);
1504         assert(LIST_EMPTY(&priv->rss));
1505 }
1506
1507 static const struct rte_flow_ops mlx4_flow_ops = {
1508         .validate = mlx4_flow_validate,
1509         .create = mlx4_flow_create,
1510         .destroy = mlx4_flow_destroy,
1511         .flush = mlx4_flow_flush,
1512         .isolate = mlx4_flow_isolate,
1513 };
1514
1515 /**
1516  * Manage filter operations.
1517  *
1518  * @param dev
1519  *   Pointer to Ethernet device structure.
1520  * @param filter_type
1521  *   Filter type.
1522  * @param filter_op
1523  *   Operation to perform.
1524  * @param arg
1525  *   Pointer to operation-specific structure.
1526  *
1527  * @return
1528  *   0 on success, negative errno value otherwise and rte_errno is set.
1529  */
1530 int
1531 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1532                  enum rte_filter_type filter_type,
1533                  enum rte_filter_op filter_op,
1534                  void *arg)
1535 {
1536         switch (filter_type) {
1537         case RTE_ETH_FILTER_GENERIC:
1538                 if (filter_op != RTE_ETH_FILTER_GET)
1539                         break;
1540                 *(const void **)arg = &mlx4_flow_ops;
1541                 return 0;
1542         default:
1543                 ERROR("%p: filter type (%d) not supported",
1544                       (void *)dev, filter_type);
1545                 break;
1546         }
1547         rte_errno = ENOTSUP;
1548         return -rte_errno;
1549 }