New upstream version 18.08
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_nl_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28
29 #include "mlx5.h"
30 #include "mlx5_autoconf.h"
31
32 #ifdef HAVE_TC_ACT_VLAN
33
34 #include <linux/tc_act/tc_vlan.h>
35
36 #else /* HAVE_TC_ACT_VLAN */
37
38 #define TCA_VLAN_ACT_POP 1
39 #define TCA_VLAN_ACT_PUSH 2
40 #define TCA_VLAN_ACT_MODIFY 3
41 #define TCA_VLAN_PARMS 2
42 #define TCA_VLAN_PUSH_VLAN_ID 3
43 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
44 #define TCA_VLAN_PAD 5
45 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
46
47 struct tc_vlan {
48         tc_gen;
49         int v_action;
50 };
51
52 #endif /* HAVE_TC_ACT_VLAN */
53
54 /* Normally found in linux/netlink.h. */
55 #ifndef NETLINK_CAP_ACK
56 #define NETLINK_CAP_ACK 10
57 #endif
58
59 /* Normally found in linux/pkt_sched.h. */
60 #ifndef TC_H_MIN_INGRESS
61 #define TC_H_MIN_INGRESS 0xfff2u
62 #endif
63
64 /* Normally found in linux/pkt_cls.h. */
65 #ifndef TCA_CLS_FLAGS_SKIP_SW
66 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
67 #endif
68 #ifndef HAVE_TCA_FLOWER_ACT
69 #define TCA_FLOWER_ACT 3
70 #endif
71 #ifndef HAVE_TCA_FLOWER_FLAGS
72 #define TCA_FLOWER_FLAGS 22
73 #endif
74 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
75 #define TCA_FLOWER_KEY_ETH_TYPE 8
76 #endif
77 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
78 #define TCA_FLOWER_KEY_ETH_DST 4
79 #endif
80 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
81 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
82 #endif
83 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
84 #define TCA_FLOWER_KEY_ETH_SRC 6
85 #endif
86 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
87 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
88 #endif
89 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
90 #define TCA_FLOWER_KEY_IP_PROTO 9
91 #endif
92 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
93 #define TCA_FLOWER_KEY_IPV4_SRC 10
94 #endif
95 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
96 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
97 #endif
98 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
99 #define TCA_FLOWER_KEY_IPV4_DST 12
100 #endif
101 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
102 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
103 #endif
104 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
105 #define TCA_FLOWER_KEY_IPV6_SRC 14
106 #endif
107 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
108 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
109 #endif
110 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
111 #define TCA_FLOWER_KEY_IPV6_DST 16
112 #endif
113 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
114 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
115 #endif
116 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
117 #define TCA_FLOWER_KEY_TCP_SRC 18
118 #endif
119 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
120 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
121 #endif
122 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
123 #define TCA_FLOWER_KEY_TCP_DST 19
124 #endif
125 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
126 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
127 #endif
128 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
129 #define TCA_FLOWER_KEY_UDP_SRC 20
130 #endif
131 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
132 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
133 #endif
134 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
135 #define TCA_FLOWER_KEY_UDP_DST 21
136 #endif
137 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
138 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
139 #endif
140 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
141 #define TCA_FLOWER_KEY_VLAN_ID 23
142 #endif
143 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
144 #define TCA_FLOWER_KEY_VLAN_PRIO 24
145 #endif
146 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
147 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
148 #endif
149
150 /** Parser state definitions for mlx5_nl_flow_trans[]. */
151 enum mlx5_nl_flow_trans {
152         INVALID,
153         BACK,
154         ATTR,
155         PATTERN,
156         ITEM_VOID,
157         ITEM_PORT_ID,
158         ITEM_ETH,
159         ITEM_VLAN,
160         ITEM_IPV4,
161         ITEM_IPV6,
162         ITEM_TCP,
163         ITEM_UDP,
164         ACTIONS,
165         ACTION_VOID,
166         ACTION_PORT_ID,
167         ACTION_DROP,
168         ACTION_OF_POP_VLAN,
169         ACTION_OF_PUSH_VLAN,
170         ACTION_OF_SET_VLAN_VID,
171         ACTION_OF_SET_VLAN_PCP,
172         END,
173 };
174
175 #define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
176
177 #define PATTERN_COMMON \
178         ITEM_VOID, ITEM_PORT_ID, ACTIONS
179 #define ACTIONS_COMMON \
180         ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
181         ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
182 #define ACTIONS_FATE \
183         ACTION_PORT_ID, ACTION_DROP
184
185 /** Parser state transitions used by mlx5_nl_flow_transpose(). */
186 static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
187         [INVALID] = NULL,
188         [BACK] = NULL,
189         [ATTR] = TRANS(PATTERN),
190         [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
191         [ITEM_VOID] = TRANS(BACK),
192         [ITEM_PORT_ID] = TRANS(BACK),
193         [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
194         [ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
195         [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
196         [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
197         [ITEM_TCP] = TRANS(PATTERN_COMMON),
198         [ITEM_UDP] = TRANS(PATTERN_COMMON),
199         [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
200         [ACTION_VOID] = TRANS(BACK),
201         [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
202         [ACTION_DROP] = TRANS(ACTION_VOID, END),
203         [ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
204         [ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
205         [ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
206         [ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
207         [END] = NULL,
208 };
209
210 /** Empty masks for known item types. */
211 static const union {
212         struct rte_flow_item_port_id port_id;
213         struct rte_flow_item_eth eth;
214         struct rte_flow_item_vlan vlan;
215         struct rte_flow_item_ipv4 ipv4;
216         struct rte_flow_item_ipv6 ipv6;
217         struct rte_flow_item_tcp tcp;
218         struct rte_flow_item_udp udp;
219 } mlx5_nl_flow_mask_empty;
220
221 /** Supported masks for known item types. */
222 static const struct {
223         struct rte_flow_item_port_id port_id;
224         struct rte_flow_item_eth eth;
225         struct rte_flow_item_vlan vlan;
226         struct rte_flow_item_ipv4 ipv4;
227         struct rte_flow_item_ipv6 ipv6;
228         struct rte_flow_item_tcp tcp;
229         struct rte_flow_item_udp udp;
230 } mlx5_nl_flow_mask_supported = {
231         .port_id = {
232                 .id = 0xffffffff,
233         },
234         .eth = {
235                 .type = RTE_BE16(0xffff),
236                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
237                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
238         },
239         .vlan = {
240                 /* PCP and VID only, no DEI. */
241                 .tci = RTE_BE16(0xefff),
242                 .inner_type = RTE_BE16(0xffff),
243         },
244         .ipv4.hdr = {
245                 .next_proto_id = 0xff,
246                 .src_addr = RTE_BE32(0xffffffff),
247                 .dst_addr = RTE_BE32(0xffffffff),
248         },
249         .ipv6.hdr = {
250                 .proto = 0xff,
251                 .src_addr =
252                         "\xff\xff\xff\xff\xff\xff\xff\xff"
253                         "\xff\xff\xff\xff\xff\xff\xff\xff",
254                 .dst_addr =
255                         "\xff\xff\xff\xff\xff\xff\xff\xff"
256                         "\xff\xff\xff\xff\xff\xff\xff\xff",
257         },
258         .tcp.hdr = {
259                 .src_port = RTE_BE16(0xffff),
260                 .dst_port = RTE_BE16(0xffff),
261         },
262         .udp.hdr = {
263                 .src_port = RTE_BE16(0xffff),
264                 .dst_port = RTE_BE16(0xffff),
265         },
266 };
267
268 /**
269  * Retrieve mask for pattern item.
270  *
271  * This function does basic sanity checks on a pattern item in order to
272  * return the most appropriate mask for it.
273  *
274  * @param[in] item
275  *   Item specification.
276  * @param[in] mask_default
277  *   Default mask for pattern item as specified by the flow API.
278  * @param[in] mask_supported
279  *   Mask fields supported by the implementation.
280  * @param[in] mask_empty
281  *   Empty mask to return when there is no specification.
282  * @param[out] error
283  *   Perform verbose error reporting if not NULL.
284  *
285  * @return
286  *   Either @p item->mask or one of the mask parameters on success, NULL
287  *   otherwise and rte_errno is set.
288  */
289 static const void *
290 mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
291                        const void *mask_default,
292                        const void *mask_supported,
293                        const void *mask_empty,
294                        size_t mask_size,
295                        struct rte_flow_error *error)
296 {
297         const uint8_t *mask;
298         size_t i;
299
300         /* item->last and item->mask cannot exist without item->spec. */
301         if (!item->spec && (item->mask || item->last)) {
302                 rte_flow_error_set
303                         (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
304                          "\"mask\" or \"last\" field provided without a"
305                          " corresponding \"spec\"");
306                 return NULL;
307         }
308         /* No spec, no mask, no problem. */
309         if (!item->spec)
310                 return mask_empty;
311         mask = item->mask ? item->mask : mask_default;
312         assert(mask);
313         /*
314          * Single-pass check to make sure that:
315          * - Mask is supported, no bits are set outside mask_supported.
316          * - Both item->spec and item->last are included in mask.
317          */
318         for (i = 0; i != mask_size; ++i) {
319                 if (!mask[i])
320                         continue;
321                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
322                     ((const uint8_t *)mask_supported)[i]) {
323                         rte_flow_error_set
324                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
325                                  mask, "unsupported field found in \"mask\"");
326                         return NULL;
327                 }
328                 if (item->last &&
329                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
330                     (((const uint8_t *)item->last)[i] & mask[i])) {
331                         rte_flow_error_set
332                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
333                                  item->last,
334                                  "range between \"spec\" and \"last\" not"
335                                  " comprised in \"mask\"");
336                         return NULL;
337                 }
338         }
339         return mask;
340 }
341
342 /**
343  * Transpose flow rule description to rtnetlink message.
344  *
345  * This function transposes a flow rule description to a traffic control
346  * (TC) filter creation message ready to be sent over Netlink.
347  *
348  * Target interface is specified as the first entry of the @p ptoi table.
349  * Subsequent entries enable this function to resolve other DPDK port IDs
350  * found in the flow rule.
351  *
352  * @param[out] buf
353  *   Output message buffer. May be NULL when @p size is 0.
354  * @param size
355  *   Size of @p buf. Message may be truncated if not large enough.
356  * @param[in] ptoi
357  *   DPDK port ID to network interface index translation table. This table
358  *   is terminated by an entry with a zero ifindex value.
359  * @param[in] attr
360  *   Flow rule attributes.
361  * @param[in] pattern
362  *   Pattern specification.
363  * @param[in] actions
364  *   Associated actions.
365  * @param[out] error
366  *   Perform verbose error reporting if not NULL.
367  *
368  * @return
369  *   A positive value representing the exact size of the message in bytes
370  *   regardless of the @p size parameter on success, a negative errno value
371  *   otherwise and rte_errno is set.
372  */
373 int
374 mlx5_nl_flow_transpose(void *buf,
375                        size_t size,
376                        const struct mlx5_nl_flow_ptoi *ptoi,
377                        const struct rte_flow_attr *attr,
378                        const struct rte_flow_item *pattern,
379                        const struct rte_flow_action *actions,
380                        struct rte_flow_error *error)
381 {
382         alignas(struct nlmsghdr)
383         uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
384         const struct rte_flow_item *item;
385         const struct rte_flow_action *action;
386         unsigned int n;
387         uint32_t act_index_cur;
388         bool in_port_id_set;
389         bool eth_type_set;
390         bool vlan_present;
391         bool vlan_eth_type_set;
392         bool ip_proto_set;
393         struct nlattr *na_flower;
394         struct nlattr *na_flower_act;
395         struct nlattr *na_vlan_id;
396         struct nlattr *na_vlan_priority;
397         const enum mlx5_nl_flow_trans *trans;
398         const enum mlx5_nl_flow_trans *back;
399
400         if (!size)
401                 goto error_nobufs;
402 init:
403         item = pattern;
404         action = actions;
405         n = 0;
406         act_index_cur = 0;
407         in_port_id_set = false;
408         eth_type_set = false;
409         vlan_present = false;
410         vlan_eth_type_set = false;
411         ip_proto_set = false;
412         na_flower = NULL;
413         na_flower_act = NULL;
414         na_vlan_id = NULL;
415         na_vlan_priority = NULL;
416         trans = TRANS(ATTR);
417         back = trans;
418 trans:
419         switch (trans[n++]) {
420                 union {
421                         const struct rte_flow_item_port_id *port_id;
422                         const struct rte_flow_item_eth *eth;
423                         const struct rte_flow_item_vlan *vlan;
424                         const struct rte_flow_item_ipv4 *ipv4;
425                         const struct rte_flow_item_ipv6 *ipv6;
426                         const struct rte_flow_item_tcp *tcp;
427                         const struct rte_flow_item_udp *udp;
428                 } spec, mask;
429                 union {
430                         const struct rte_flow_action_port_id *port_id;
431                         const struct rte_flow_action_of_push_vlan *of_push_vlan;
432                         const struct rte_flow_action_of_set_vlan_vid *
433                                 of_set_vlan_vid;
434                         const struct rte_flow_action_of_set_vlan_pcp *
435                                 of_set_vlan_pcp;
436                 } conf;
437                 struct nlmsghdr *nlh;
438                 struct tcmsg *tcm;
439                 struct nlattr *act_index;
440                 struct nlattr *act;
441                 unsigned int i;
442
443         case INVALID:
444                 if (item->type)
445                         return rte_flow_error_set
446                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
447                                  item, "unsupported pattern item combination");
448                 else if (action->type)
449                         return rte_flow_error_set
450                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
451                                  action, "unsupported action combination");
452                 return rte_flow_error_set
453                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
454                          "flow rule lacks some kind of fate action");
455         case BACK:
456                 trans = back;
457                 n = 0;
458                 goto trans;
459         case ATTR:
460                 /*
461                  * Supported attributes: no groups, some priorities and
462                  * ingress only. Don't care about transfer as it is the
463                  * caller's problem.
464                  */
465                 if (attr->group)
466                         return rte_flow_error_set
467                                 (error, ENOTSUP,
468                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
469                                  attr, "groups are not supported");
470                 if (attr->priority > 0xfffe)
471                         return rte_flow_error_set
472                                 (error, ENOTSUP,
473                                  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
474                                  attr, "lowest priority level is 0xfffe");
475                 if (!attr->ingress)
476                         return rte_flow_error_set
477                                 (error, ENOTSUP,
478                                  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
479                                  attr, "only ingress is supported");
480                 if (attr->egress)
481                         return rte_flow_error_set
482                                 (error, ENOTSUP,
483                                  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
484                                  attr, "egress is not supported");
485                 if (size < mnl_nlmsg_size(sizeof(*tcm)))
486                         goto error_nobufs;
487                 nlh = mnl_nlmsg_put_header(buf);
488                 nlh->nlmsg_type = 0;
489                 nlh->nlmsg_flags = 0;
490                 nlh->nlmsg_seq = 0;
491                 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
492                 tcm->tcm_family = AF_UNSPEC;
493                 tcm->tcm_ifindex = ptoi[0].ifindex;
494                 /*
495                  * Let kernel pick a handle by default. A predictable handle
496                  * can be set by the caller on the resulting buffer through
497                  * mlx5_nl_flow_brand().
498                  */
499                 tcm->tcm_handle = 0;
500                 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
501                 /*
502                  * Priority cannot be zero to prevent the kernel from
503                  * picking one automatically.
504                  */
505                 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
506                                           RTE_BE16(ETH_P_ALL));
507                 break;
508         case PATTERN:
509                 if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
510                         goto error_nobufs;
511                 na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
512                 if (!na_flower)
513                         goto error_nobufs;
514                 if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
515                                             TCA_CLS_FLAGS_SKIP_SW))
516                         goto error_nobufs;
517                 break;
518         case ITEM_VOID:
519                 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
520                         goto trans;
521                 ++item;
522                 break;
523         case ITEM_PORT_ID:
524                 if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
525                         goto trans;
526                 mask.port_id = mlx5_nl_flow_item_mask
527                         (item, &rte_flow_item_port_id_mask,
528                          &mlx5_nl_flow_mask_supported.port_id,
529                          &mlx5_nl_flow_mask_empty.port_id,
530                          sizeof(mlx5_nl_flow_mask_supported.port_id), error);
531                 if (!mask.port_id)
532                         return -rte_errno;
533                 if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
534                         in_port_id_set = 1;
535                         ++item;
536                         break;
537                 }
538                 spec.port_id = item->spec;
539                 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
540                         return rte_flow_error_set
541                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
542                                  mask.port_id,
543                                  "no support for partial mask on"
544                                  " \"id\" field");
545                 if (!mask.port_id->id)
546                         i = 0;
547                 else
548                         for (i = 0; ptoi[i].ifindex; ++i)
549                                 if (ptoi[i].port_id == spec.port_id->id)
550                                         break;
551                 if (!ptoi[i].ifindex)
552                         return rte_flow_error_set
553                                 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
554                                  spec.port_id,
555                                  "missing data to convert port ID to ifindex");
556                 tcm = mnl_nlmsg_get_payload(buf);
557                 if (in_port_id_set &&
558                     ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
559                         return rte_flow_error_set
560                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
561                                  spec.port_id,
562                                  "cannot match traffic for several port IDs"
563                                  " through a single flow rule");
564                 tcm->tcm_ifindex = ptoi[i].ifindex;
565                 in_port_id_set = 1;
566                 ++item;
567                 break;
568         case ITEM_ETH:
569                 if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
570                         goto trans;
571                 mask.eth = mlx5_nl_flow_item_mask
572                         (item, &rte_flow_item_eth_mask,
573                          &mlx5_nl_flow_mask_supported.eth,
574                          &mlx5_nl_flow_mask_empty.eth,
575                          sizeof(mlx5_nl_flow_mask_supported.eth), error);
576                 if (!mask.eth)
577                         return -rte_errno;
578                 if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
579                         ++item;
580                         break;
581                 }
582                 spec.eth = item->spec;
583                 if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
584                         return rte_flow_error_set
585                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
586                                  mask.eth,
587                                  "no support for partial mask on"
588                                  " \"type\" field");
589                 if (mask.eth->type) {
590                         if (!mnl_attr_put_u16_check(buf, size,
591                                                     TCA_FLOWER_KEY_ETH_TYPE,
592                                                     spec.eth->type))
593                                 goto error_nobufs;
594                         eth_type_set = 1;
595                 }
596                 if ((!is_zero_ether_addr(&mask.eth->dst) &&
597                      (!mnl_attr_put_check(buf, size,
598                                           TCA_FLOWER_KEY_ETH_DST,
599                                           ETHER_ADDR_LEN,
600                                           spec.eth->dst.addr_bytes) ||
601                       !mnl_attr_put_check(buf, size,
602                                           TCA_FLOWER_KEY_ETH_DST_MASK,
603                                           ETHER_ADDR_LEN,
604                                           mask.eth->dst.addr_bytes))) ||
605                     (!is_zero_ether_addr(&mask.eth->src) &&
606                      (!mnl_attr_put_check(buf, size,
607                                           TCA_FLOWER_KEY_ETH_SRC,
608                                           ETHER_ADDR_LEN,
609                                           spec.eth->src.addr_bytes) ||
610                       !mnl_attr_put_check(buf, size,
611                                           TCA_FLOWER_KEY_ETH_SRC_MASK,
612                                           ETHER_ADDR_LEN,
613                                           mask.eth->src.addr_bytes))))
614                         goto error_nobufs;
615                 ++item;
616                 break;
617         case ITEM_VLAN:
618                 if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
619                         goto trans;
620                 mask.vlan = mlx5_nl_flow_item_mask
621                         (item, &rte_flow_item_vlan_mask,
622                          &mlx5_nl_flow_mask_supported.vlan,
623                          &mlx5_nl_flow_mask_empty.vlan,
624                          sizeof(mlx5_nl_flow_mask_supported.vlan), error);
625                 if (!mask.vlan)
626                         return -rte_errno;
627                 if (!eth_type_set &&
628                     !mnl_attr_put_u16_check(buf, size,
629                                             TCA_FLOWER_KEY_ETH_TYPE,
630                                             RTE_BE16(ETH_P_8021Q)))
631                         goto error_nobufs;
632                 eth_type_set = 1;
633                 vlan_present = 1;
634                 if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
635                         ++item;
636                         break;
637                 }
638                 spec.vlan = item->spec;
639                 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
640                      (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
641                     (mask.vlan->tci & RTE_BE16(0x0fff) &&
642                      (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
643                     (mask.vlan->inner_type &&
644                      mask.vlan->inner_type != RTE_BE16(0xffff)))
645                         return rte_flow_error_set
646                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
647                                  mask.vlan,
648                                  "no support for partial masks on"
649                                  " \"tci\" (PCP and VID parts) and"
650                                  " \"inner_type\" fields");
651                 if (mask.vlan->inner_type) {
652                         if (!mnl_attr_put_u16_check
653                             (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
654                              spec.vlan->inner_type))
655                                 goto error_nobufs;
656                         vlan_eth_type_set = 1;
657                 }
658                 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
659                      !mnl_attr_put_u8_check
660                      (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
661                       (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
662                     (mask.vlan->tci & RTE_BE16(0x0fff) &&
663                      !mnl_attr_put_u16_check
664                      (buf, size, TCA_FLOWER_KEY_VLAN_ID,
665                       rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
666                         goto error_nobufs;
667                 ++item;
668                 break;
669         case ITEM_IPV4:
670                 if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
671                         goto trans;
672                 mask.ipv4 = mlx5_nl_flow_item_mask
673                         (item, &rte_flow_item_ipv4_mask,
674                          &mlx5_nl_flow_mask_supported.ipv4,
675                          &mlx5_nl_flow_mask_empty.ipv4,
676                          sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
677                 if (!mask.ipv4)
678                         return -rte_errno;
679                 if ((!eth_type_set || !vlan_eth_type_set) &&
680                     !mnl_attr_put_u16_check(buf, size,
681                                             vlan_present ?
682                                             TCA_FLOWER_KEY_VLAN_ETH_TYPE :
683                                             TCA_FLOWER_KEY_ETH_TYPE,
684                                             RTE_BE16(ETH_P_IP)))
685                         goto error_nobufs;
686                 eth_type_set = 1;
687                 vlan_eth_type_set = 1;
688                 if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
689                         ++item;
690                         break;
691                 }
692                 spec.ipv4 = item->spec;
693                 if (mask.ipv4->hdr.next_proto_id &&
694                     mask.ipv4->hdr.next_proto_id != 0xff)
695                         return rte_flow_error_set
696                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
697                                  mask.ipv4,
698                                  "no support for partial mask on"
699                                  " \"hdr.next_proto_id\" field");
700                 if (mask.ipv4->hdr.next_proto_id) {
701                         if (!mnl_attr_put_u8_check
702                             (buf, size, TCA_FLOWER_KEY_IP_PROTO,
703                              spec.ipv4->hdr.next_proto_id))
704                                 goto error_nobufs;
705                         ip_proto_set = 1;
706                 }
707                 if ((mask.ipv4->hdr.src_addr &&
708                      (!mnl_attr_put_u32_check(buf, size,
709                                               TCA_FLOWER_KEY_IPV4_SRC,
710                                               spec.ipv4->hdr.src_addr) ||
711                       !mnl_attr_put_u32_check(buf, size,
712                                               TCA_FLOWER_KEY_IPV4_SRC_MASK,
713                                               mask.ipv4->hdr.src_addr))) ||
714                     (mask.ipv4->hdr.dst_addr &&
715                      (!mnl_attr_put_u32_check(buf, size,
716                                               TCA_FLOWER_KEY_IPV4_DST,
717                                               spec.ipv4->hdr.dst_addr) ||
718                       !mnl_attr_put_u32_check(buf, size,
719                                               TCA_FLOWER_KEY_IPV4_DST_MASK,
720                                               mask.ipv4->hdr.dst_addr))))
721                         goto error_nobufs;
722                 ++item;
723                 break;
724         case ITEM_IPV6:
725                 if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
726                         goto trans;
727                 mask.ipv6 = mlx5_nl_flow_item_mask
728                         (item, &rte_flow_item_ipv6_mask,
729                          &mlx5_nl_flow_mask_supported.ipv6,
730                          &mlx5_nl_flow_mask_empty.ipv6,
731                          sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
732                 if (!mask.ipv6)
733                         return -rte_errno;
734                 if ((!eth_type_set || !vlan_eth_type_set) &&
735                     !mnl_attr_put_u16_check(buf, size,
736                                             vlan_present ?
737                                             TCA_FLOWER_KEY_VLAN_ETH_TYPE :
738                                             TCA_FLOWER_KEY_ETH_TYPE,
739                                             RTE_BE16(ETH_P_IPV6)))
740                         goto error_nobufs;
741                 eth_type_set = 1;
742                 vlan_eth_type_set = 1;
743                 if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
744                         ++item;
745                         break;
746                 }
747                 spec.ipv6 = item->spec;
748                 if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
749                         return rte_flow_error_set
750                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
751                                  mask.ipv6,
752                                  "no support for partial mask on"
753                                  " \"hdr.proto\" field");
754                 if (mask.ipv6->hdr.proto) {
755                         if (!mnl_attr_put_u8_check
756                             (buf, size, TCA_FLOWER_KEY_IP_PROTO,
757                              spec.ipv6->hdr.proto))
758                                 goto error_nobufs;
759                         ip_proto_set = 1;
760                 }
761                 if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
762                      (!mnl_attr_put_check(buf, size,
763                                           TCA_FLOWER_KEY_IPV6_SRC,
764                                           sizeof(spec.ipv6->hdr.src_addr),
765                                           spec.ipv6->hdr.src_addr) ||
766                       !mnl_attr_put_check(buf, size,
767                                           TCA_FLOWER_KEY_IPV6_SRC_MASK,
768                                           sizeof(mask.ipv6->hdr.src_addr),
769                                           mask.ipv6->hdr.src_addr))) ||
770                     (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
771                      (!mnl_attr_put_check(buf, size,
772                                           TCA_FLOWER_KEY_IPV6_DST,
773                                           sizeof(spec.ipv6->hdr.dst_addr),
774                                           spec.ipv6->hdr.dst_addr) ||
775                       !mnl_attr_put_check(buf, size,
776                                           TCA_FLOWER_KEY_IPV6_DST_MASK,
777                                           sizeof(mask.ipv6->hdr.dst_addr),
778                                           mask.ipv6->hdr.dst_addr))))
779                         goto error_nobufs;
780                 ++item;
781                 break;
782         case ITEM_TCP:
783                 if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
784                         goto trans;
785                 mask.tcp = mlx5_nl_flow_item_mask
786                         (item, &rte_flow_item_tcp_mask,
787                          &mlx5_nl_flow_mask_supported.tcp,
788                          &mlx5_nl_flow_mask_empty.tcp,
789                          sizeof(mlx5_nl_flow_mask_supported.tcp), error);
790                 if (!mask.tcp)
791                         return -rte_errno;
792                 if (!ip_proto_set &&
793                     !mnl_attr_put_u8_check(buf, size,
794                                            TCA_FLOWER_KEY_IP_PROTO,
795                                            IPPROTO_TCP))
796                         goto error_nobufs;
797                 if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
798                         ++item;
799                         break;
800                 }
801                 spec.tcp = item->spec;
802                 if ((mask.tcp->hdr.src_port &&
803                      mask.tcp->hdr.src_port != RTE_BE16(0xffff)) ||
804                     (mask.tcp->hdr.dst_port &&
805                      mask.tcp->hdr.dst_port != RTE_BE16(0xffff)))
806                         return rte_flow_error_set
807                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
808                                  mask.tcp,
809                                  "no support for partial masks on"
810                                  " \"hdr.src_port\" and \"hdr.dst_port\""
811                                  " fields");
812                 if ((mask.tcp->hdr.src_port &&
813                      (!mnl_attr_put_u16_check(buf, size,
814                                               TCA_FLOWER_KEY_TCP_SRC,
815                                               spec.tcp->hdr.src_port) ||
816                       !mnl_attr_put_u16_check(buf, size,
817                                               TCA_FLOWER_KEY_TCP_SRC_MASK,
818                                               mask.tcp->hdr.src_port))) ||
819                     (mask.tcp->hdr.dst_port &&
820                      (!mnl_attr_put_u16_check(buf, size,
821                                               TCA_FLOWER_KEY_TCP_DST,
822                                               spec.tcp->hdr.dst_port) ||
823                       !mnl_attr_put_u16_check(buf, size,
824                                               TCA_FLOWER_KEY_TCP_DST_MASK,
825                                               mask.tcp->hdr.dst_port))))
826                         goto error_nobufs;
827                 ++item;
828                 break;
829         case ITEM_UDP:
830                 if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
831                         goto trans;
832                 mask.udp = mlx5_nl_flow_item_mask
833                         (item, &rte_flow_item_udp_mask,
834                          &mlx5_nl_flow_mask_supported.udp,
835                          &mlx5_nl_flow_mask_empty.udp,
836                          sizeof(mlx5_nl_flow_mask_supported.udp), error);
837                 if (!mask.udp)
838                         return -rte_errno;
839                 if (!ip_proto_set &&
840                     !mnl_attr_put_u8_check(buf, size,
841                                            TCA_FLOWER_KEY_IP_PROTO,
842                                            IPPROTO_UDP))
843                         goto error_nobufs;
844                 if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
845                         ++item;
846                         break;
847                 }
848                 spec.udp = item->spec;
849                 if ((mask.udp->hdr.src_port &&
850                      mask.udp->hdr.src_port != RTE_BE16(0xffff)) ||
851                     (mask.udp->hdr.dst_port &&
852                      mask.udp->hdr.dst_port != RTE_BE16(0xffff)))
853                         return rte_flow_error_set
854                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
855                                  mask.udp,
856                                  "no support for partial masks on"
857                                  " \"hdr.src_port\" and \"hdr.dst_port\""
858                                  " fields");
859                 if ((mask.udp->hdr.src_port &&
860                      (!mnl_attr_put_u16_check(buf, size,
861                                               TCA_FLOWER_KEY_UDP_SRC,
862                                               spec.udp->hdr.src_port) ||
863                       !mnl_attr_put_u16_check(buf, size,
864                                               TCA_FLOWER_KEY_UDP_SRC_MASK,
865                                               mask.udp->hdr.src_port))) ||
866                     (mask.udp->hdr.dst_port &&
867                      (!mnl_attr_put_u16_check(buf, size,
868                                               TCA_FLOWER_KEY_UDP_DST,
869                                               spec.udp->hdr.dst_port) ||
870                       !mnl_attr_put_u16_check(buf, size,
871                                               TCA_FLOWER_KEY_UDP_DST_MASK,
872                                               mask.udp->hdr.dst_port))))
873                         goto error_nobufs;
874                 ++item;
875                 break;
876         case ACTIONS:
877                 if (item->type != RTE_FLOW_ITEM_TYPE_END)
878                         goto trans;
879                 assert(na_flower);
880                 assert(!na_flower_act);
881                 na_flower_act =
882                         mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
883                 if (!na_flower_act)
884                         goto error_nobufs;
885                 act_index_cur = 1;
886                 break;
887         case ACTION_VOID:
888                 if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
889                         goto trans;
890                 ++action;
891                 break;
892         case ACTION_PORT_ID:
893                 if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
894                         goto trans;
895                 conf.port_id = action->conf;
896                 if (conf.port_id->original)
897                         i = 0;
898                 else
899                         for (i = 0; ptoi[i].ifindex; ++i)
900                                 if (ptoi[i].port_id == conf.port_id->id)
901                                         break;
902                 if (!ptoi[i].ifindex)
903                         return rte_flow_error_set
904                                 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
905                                  conf.port_id,
906                                  "missing data to convert port ID to ifindex");
907                 act_index =
908                         mnl_attr_nest_start_check(buf, size, act_index_cur++);
909                 if (!act_index ||
910                     !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
911                         goto error_nobufs;
912                 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
913                 if (!act)
914                         goto error_nobufs;
915                 if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
916                                         sizeof(struct tc_mirred),
917                                         &(struct tc_mirred){
918                                                 .action = TC_ACT_STOLEN,
919                                                 .eaction = TCA_EGRESS_REDIR,
920                                                 .ifindex = ptoi[i].ifindex,
921                                         }))
922                         goto error_nobufs;
923                 mnl_attr_nest_end(buf, act);
924                 mnl_attr_nest_end(buf, act_index);
925                 ++action;
926                 break;
927         case ACTION_DROP:
928                 if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
929                         goto trans;
930                 act_index =
931                         mnl_attr_nest_start_check(buf, size, act_index_cur++);
932                 if (!act_index ||
933                     !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
934                         goto error_nobufs;
935                 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
936                 if (!act)
937                         goto error_nobufs;
938                 if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
939                                         sizeof(struct tc_gact),
940                                         &(struct tc_gact){
941                                                 .action = TC_ACT_SHOT,
942                                         }))
943                         goto error_nobufs;
944                 mnl_attr_nest_end(buf, act);
945                 mnl_attr_nest_end(buf, act_index);
946                 ++action;
947                 break;
948         case ACTION_OF_POP_VLAN:
949                 if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
950                         goto trans;
951                 conf.of_push_vlan = NULL;
952                 i = TCA_VLAN_ACT_POP;
953                 goto action_of_vlan;
954         case ACTION_OF_PUSH_VLAN:
955                 if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
956                         goto trans;
957                 conf.of_push_vlan = action->conf;
958                 i = TCA_VLAN_ACT_PUSH;
959                 goto action_of_vlan;
960         case ACTION_OF_SET_VLAN_VID:
961                 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
962                         goto trans;
963                 conf.of_set_vlan_vid = action->conf;
964                 if (na_vlan_id)
965                         goto override_na_vlan_id;
966                 i = TCA_VLAN_ACT_MODIFY;
967                 goto action_of_vlan;
968         case ACTION_OF_SET_VLAN_PCP:
969                 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
970                         goto trans;
971                 conf.of_set_vlan_pcp = action->conf;
972                 if (na_vlan_priority)
973                         goto override_na_vlan_priority;
974                 i = TCA_VLAN_ACT_MODIFY;
975                 goto action_of_vlan;
976 action_of_vlan:
977                 act_index =
978                         mnl_attr_nest_start_check(buf, size, act_index_cur++);
979                 if (!act_index ||
980                     !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
981                         goto error_nobufs;
982                 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
983                 if (!act)
984                         goto error_nobufs;
985                 if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
986                                         sizeof(struct tc_vlan),
987                                         &(struct tc_vlan){
988                                                 .action = TC_ACT_PIPE,
989                                                 .v_action = i,
990                                         }))
991                         goto error_nobufs;
992                 if (i == TCA_VLAN_ACT_POP) {
993                         mnl_attr_nest_end(buf, act);
994                         mnl_attr_nest_end(buf, act_index);
995                         ++action;
996                         break;
997                 }
998                 if (i == TCA_VLAN_ACT_PUSH &&
999                     !mnl_attr_put_u16_check(buf, size,
1000                                             TCA_VLAN_PUSH_VLAN_PROTOCOL,
1001                                             conf.of_push_vlan->ethertype))
1002                         goto error_nobufs;
1003                 na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
1004                 if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
1005                         goto error_nobufs;
1006                 na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
1007                 if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
1008                         goto error_nobufs;
1009                 mnl_attr_nest_end(buf, act);
1010                 mnl_attr_nest_end(buf, act_index);
1011                 if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1012 override_na_vlan_id:
1013                         na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1014                         *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1015                                 rte_be_to_cpu_16
1016                                 (conf.of_set_vlan_vid->vlan_vid);
1017                 } else if (action->type ==
1018                            RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1019 override_na_vlan_priority:
1020                         na_vlan_priority->nla_type =
1021                                 TCA_VLAN_PUSH_VLAN_PRIORITY;
1022                         *(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
1023                                 conf.of_set_vlan_pcp->vlan_pcp;
1024                 }
1025                 ++action;
1026                 break;
1027         case END:
1028                 if (item->type != RTE_FLOW_ITEM_TYPE_END ||
1029                     action->type != RTE_FLOW_ACTION_TYPE_END)
1030                         goto trans;
1031                 if (na_flower_act)
1032                         mnl_attr_nest_end(buf, na_flower_act);
1033                 if (na_flower)
1034                         mnl_attr_nest_end(buf, na_flower);
1035                 nlh = buf;
1036                 return nlh->nlmsg_len;
1037         }
1038         back = trans;
1039         trans = mlx5_nl_flow_trans[trans[n - 1]];
1040         n = 0;
1041         goto trans;
1042 error_nobufs:
1043         if (buf != buf_tmp) {
1044                 buf = buf_tmp;
1045                 size = sizeof(buf_tmp);
1046                 goto init;
1047         }
1048         return rte_flow_error_set
1049                 (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1050                  "generated TC message is too large");
1051 }
1052
1053 /**
1054  * Brand rtnetlink buffer with unique handle.
1055  *
1056  * This handle should be unique for a given network interface to avoid
1057  * collisions.
1058  *
1059  * @param buf
1060  *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1061  * @param handle
1062  *   Unique 32-bit handle to use.
1063  */
1064 void
1065 mlx5_nl_flow_brand(void *buf, uint32_t handle)
1066 {
1067         struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
1068
1069         tcm->tcm_handle = handle;
1070 }
1071
1072 /**
1073  * Send Netlink message with acknowledgment.
1074  *
1075  * @param nl
1076  *   Libmnl socket to use.
1077  * @param nlh
1078  *   Message to send. This function always raises the NLM_F_ACK flag before
1079  *   sending.
1080  *
1081  * @return
1082  *   0 on success, a negative errno value otherwise and rte_errno is set.
1083  */
1084 static int
1085 mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1086 {
1087         alignas(struct nlmsghdr)
1088         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1089                     nlh->nlmsg_len - sizeof(*nlh)];
1090         uint32_t seq = random();
1091         int ret;
1092
1093         nlh->nlmsg_flags |= NLM_F_ACK;
1094         nlh->nlmsg_seq = seq;
1095         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1096         if (ret != -1)
1097                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1098         if (ret != -1)
1099                 ret = mnl_cb_run
1100                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1101         if (!ret)
1102                 return 0;
1103         rte_errno = errno;
1104         return -rte_errno;
1105 }
1106
1107 /**
1108  * Create a Netlink flow rule.
1109  *
1110  * @param nl
1111  *   Libmnl socket to use.
1112  * @param buf
1113  *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1114  * @param[out] error
1115  *   Perform verbose error reporting if not NULL.
1116  *
1117  * @return
1118  *   0 on success, a negative errno value otherwise and rte_errno is set.
1119  */
1120 int
1121 mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
1122                     struct rte_flow_error *error)
1123 {
1124         struct nlmsghdr *nlh = buf;
1125
1126         nlh->nlmsg_type = RTM_NEWTFILTER;
1127         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1128         if (!mlx5_nl_flow_nl_ack(nl, nlh))
1129                 return 0;
1130         return rte_flow_error_set
1131                 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1132                  "netlink: failed to create TC flow rule");
1133 }
1134
1135 /**
1136  * Destroy a Netlink flow rule.
1137  *
1138  * @param nl
1139  *   Libmnl socket to use.
1140  * @param buf
1141  *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1142  * @param[out] error
1143  *   Perform verbose error reporting if not NULL.
1144  *
1145  * @return
1146  *   0 on success, a negative errno value otherwise and rte_errno is set.
1147  */
1148 int
1149 mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
1150                      struct rte_flow_error *error)
1151 {
1152         struct nlmsghdr *nlh = buf;
1153
1154         nlh->nlmsg_type = RTM_DELTFILTER;
1155         nlh->nlmsg_flags = NLM_F_REQUEST;
1156         if (!mlx5_nl_flow_nl_ack(nl, nlh))
1157                 return 0;
1158         return rte_flow_error_set
1159                 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1160                  "netlink: failed to destroy TC flow rule");
1161 }
1162
1163 /**
1164  * Initialize ingress qdisc of a given network interface.
1165  *
1166  * @param nl
1167  *   Libmnl socket of the @p NETLINK_ROUTE kind.
1168  * @param ifindex
1169  *   Index of network interface to initialize.
1170  * @param[out] error
1171  *   Perform verbose error reporting if not NULL.
1172  *
1173  * @return
1174  *   0 on success, a negative errno value otherwise and rte_errno is set.
1175  */
1176 int
1177 mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
1178                   struct rte_flow_error *error)
1179 {
1180         struct nlmsghdr *nlh;
1181         struct tcmsg *tcm;
1182         alignas(struct nlmsghdr)
1183         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1184
1185         /* Destroy existing ingress qdisc and everything attached to it. */
1186         nlh = mnl_nlmsg_put_header(buf);
1187         nlh->nlmsg_type = RTM_DELQDISC;
1188         nlh->nlmsg_flags = NLM_F_REQUEST;
1189         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1190         tcm->tcm_family = AF_UNSPEC;
1191         tcm->tcm_ifindex = ifindex;
1192         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1193         tcm->tcm_parent = TC_H_INGRESS;
1194         /* Ignore errors when qdisc is already absent. */
1195         if (mlx5_nl_flow_nl_ack(nl, nlh) &&
1196             rte_errno != EINVAL && rte_errno != ENOENT)
1197                 return rte_flow_error_set
1198                         (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1199                          NULL, "netlink: failed to remove ingress qdisc");
1200         /* Create fresh ingress qdisc. */
1201         nlh = mnl_nlmsg_put_header(buf);
1202         nlh->nlmsg_type = RTM_NEWQDISC;
1203         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1204         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1205         tcm->tcm_family = AF_UNSPEC;
1206         tcm->tcm_ifindex = ifindex;
1207         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1208         tcm->tcm_parent = TC_H_INGRESS;
1209         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1210         if (mlx5_nl_flow_nl_ack(nl, nlh))
1211                 return rte_flow_error_set
1212                         (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1213                          NULL, "netlink: failed to create ingress qdisc");
1214         return 0;
1215 }
1216
1217 /**
1218  * Create and configure a libmnl socket for Netlink flow rules.
1219  *
1220  * @return
1221  *   A valid libmnl socket object pointer on success, NULL otherwise and
1222  *   rte_errno is set.
1223  */
1224 struct mnl_socket *
1225 mlx5_nl_flow_socket_create(void)
1226 {
1227         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1228
1229         if (nl) {
1230                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1231                                       sizeof(int));
1232                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1233                         return nl;
1234         }
1235         rte_errno = errno;
1236         if (nl)
1237                 mnl_socket_close(nl);
1238         return NULL;
1239 }
1240
1241 /**
1242  * Destroy a libmnl socket.
1243  */
1244 void
1245 mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
1246 {
1247         mnl_socket_close(nl);
1248 }