New upstream version 18.11-rc1
[deb_dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/socket.h>
24
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
28 #include <rte_flow.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
31
32 #include "mlx5.h"
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
35
36 #ifdef HAVE_TC_ACT_VLAN
37
38 #include <linux/tc_act/tc_vlan.h>
39
40 #else /* HAVE_TC_ACT_VLAN */
41
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
50
51 struct tc_vlan {
52         tc_gen;
53         int v_action;
54 };
55
56 #endif /* HAVE_TC_ACT_VLAN */
57
58 #ifdef HAVE_TC_ACT_PEDIT
59
60 #include <linux/tc_act/tc_pedit.h>
61
62 #else /* HAVE_TC_ACT_VLAN */
63
64 enum {
65         TCA_PEDIT_UNSPEC,
66         TCA_PEDIT_TM,
67         TCA_PEDIT_PARMS,
68         TCA_PEDIT_PAD,
69         TCA_PEDIT_PARMS_EX,
70         TCA_PEDIT_KEYS_EX,
71         TCA_PEDIT_KEY_EX,
72         __TCA_PEDIT_MAX
73 };
74
75 enum {
76         TCA_PEDIT_KEY_EX_HTYPE = 1,
77         TCA_PEDIT_KEY_EX_CMD = 2,
78         __TCA_PEDIT_KEY_EX_MAX
79 };
80
81 enum pedit_header_type {
82         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
88         __PEDIT_HDR_TYPE_MAX,
89 };
90
91 enum pedit_cmd {
92         TCA_PEDIT_KEY_EX_CMD_SET = 0,
93         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
94         __PEDIT_CMD_MAX,
95 };
96
97 struct tc_pedit_key {
98         __u32 mask; /* AND */
99         __u32 val; /*XOR */
100         __u32 off; /*offset */
101         __u32 at;
102         __u32 offmask;
103         __u32 shift;
104 };
105
106 __extension__
107 struct tc_pedit_sel {
108         tc_gen;
109         unsigned char nkeys;
110         unsigned char flags;
111         struct tc_pedit_key keys[0];
112 };
113
114 #endif /* HAVE_TC_ACT_VLAN */
115
116 /* Normally found in linux/netlink.h. */
117 #ifndef NETLINK_CAP_ACK
118 #define NETLINK_CAP_ACK 10
119 #endif
120
121 /* Normally found in linux/pkt_sched.h. */
122 #ifndef TC_H_MIN_INGRESS
123 #define TC_H_MIN_INGRESS 0xfff2u
124 #endif
125
126 /* Normally found in linux/pkt_cls.h. */
127 #ifndef TCA_CLS_FLAGS_SKIP_SW
128 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
129 #endif
130 #ifndef HAVE_TCA_CHAIN
131 #define TCA_CHAIN 11
132 #endif
133 #ifndef HAVE_TCA_FLOWER_ACT
134 #define TCA_FLOWER_ACT 3
135 #endif
136 #ifndef HAVE_TCA_FLOWER_FLAGS
137 #define TCA_FLOWER_FLAGS 22
138 #endif
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
140 #define TCA_FLOWER_KEY_ETH_TYPE 8
141 #endif
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
143 #define TCA_FLOWER_KEY_ETH_DST 4
144 #endif
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
146 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
147 #endif
148 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
149 #define TCA_FLOWER_KEY_ETH_SRC 6
150 #endif
151 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
152 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
153 #endif
154 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
155 #define TCA_FLOWER_KEY_IP_PROTO 9
156 #endif
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
158 #define TCA_FLOWER_KEY_IPV4_SRC 10
159 #endif
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
161 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
162 #endif
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
164 #define TCA_FLOWER_KEY_IPV4_DST 12
165 #endif
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
167 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
168 #endif
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
170 #define TCA_FLOWER_KEY_IPV6_SRC 14
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
173 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
176 #define TCA_FLOWER_KEY_IPV6_DST 16
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
179 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
182 #define TCA_FLOWER_KEY_TCP_SRC 18
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
185 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
188 #define TCA_FLOWER_KEY_TCP_DST 19
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
191 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
194 #define TCA_FLOWER_KEY_UDP_SRC 20
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
197 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
200 #define TCA_FLOWER_KEY_UDP_DST 21
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
203 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
206 #define TCA_FLOWER_KEY_VLAN_ID 23
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
209 #define TCA_FLOWER_KEY_VLAN_PRIO 24
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
212 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
213 #endif
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
215 #define TCA_FLOWER_KEY_TCP_FLAGS 71
216 #endif
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
218 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
219 #endif
220 #ifndef HAVE_TC_ACT_GOTO_CHAIN
221 #define TC_ACT_GOTO_CHAIN 0x20000000
222 #endif
223
224 #ifndef IPV6_ADDR_LEN
225 #define IPV6_ADDR_LEN 16
226 #endif
227
228 #ifndef IPV4_ADDR_LEN
229 #define IPV4_ADDR_LEN 4
230 #endif
231
232 #ifndef TP_PORT_LEN
233 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
234 #endif
235
236 #ifndef TTL_LEN
237 #define TTL_LEN 1
238 #endif
239
240 #ifndef TCA_ACT_MAX_PRIO
241 #define TCA_ACT_MAX_PRIO 32
242 #endif
243
244 /**
245  * Structure for holding netlink context.
246  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
247  * Using this (8KB) buffer size ensures that netlink messages will never be
248  * truncated.
249  */
250 struct mlx5_flow_tcf_context {
251         struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
252         uint32_t seq; /* Message sequence number. */
253         uint32_t buf_size; /* Message buffer size. */
254         uint8_t *buf; /* Message buffer. */
255 };
256
257 /** Structure used when extracting the values of a flow counters
258  * from a netlink message.
259  */
260 struct flow_tcf_stats_basic {
261         bool valid;
262         struct gnet_stats_basic counters;
263 };
264
265 /** Empty masks for known item types. */
266 static const union {
267         struct rte_flow_item_port_id port_id;
268         struct rte_flow_item_eth eth;
269         struct rte_flow_item_vlan vlan;
270         struct rte_flow_item_ipv4 ipv4;
271         struct rte_flow_item_ipv6 ipv6;
272         struct rte_flow_item_tcp tcp;
273         struct rte_flow_item_udp udp;
274 } flow_tcf_mask_empty;
275
276 /** Supported masks for known item types. */
277 static const struct {
278         struct rte_flow_item_port_id port_id;
279         struct rte_flow_item_eth eth;
280         struct rte_flow_item_vlan vlan;
281         struct rte_flow_item_ipv4 ipv4;
282         struct rte_flow_item_ipv6 ipv6;
283         struct rte_flow_item_tcp tcp;
284         struct rte_flow_item_udp udp;
285 } flow_tcf_mask_supported = {
286         .port_id = {
287                 .id = 0xffffffff,
288         },
289         .eth = {
290                 .type = RTE_BE16(0xffff),
291                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
292                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
293         },
294         .vlan = {
295                 /* PCP and VID only, no DEI. */
296                 .tci = RTE_BE16(0xefff),
297                 .inner_type = RTE_BE16(0xffff),
298         },
299         .ipv4.hdr = {
300                 .next_proto_id = 0xff,
301                 .src_addr = RTE_BE32(0xffffffff),
302                 .dst_addr = RTE_BE32(0xffffffff),
303         },
304         .ipv6.hdr = {
305                 .proto = 0xff,
306                 .src_addr =
307                         "\xff\xff\xff\xff\xff\xff\xff\xff"
308                         "\xff\xff\xff\xff\xff\xff\xff\xff",
309                 .dst_addr =
310                         "\xff\xff\xff\xff\xff\xff\xff\xff"
311                         "\xff\xff\xff\xff\xff\xff\xff\xff",
312         },
313         .tcp.hdr = {
314                 .src_port = RTE_BE16(0xffff),
315                 .dst_port = RTE_BE16(0xffff),
316                 .tcp_flags = 0xff,
317         },
318         .udp.hdr = {
319                 .src_port = RTE_BE16(0xffff),
320                 .dst_port = RTE_BE16(0xffff),
321         },
322 };
323
324 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
325 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
326 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
327 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
328 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
329
330 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
331
332 /** DPDK port to network interface index (ifindex) conversion. */
333 struct flow_tcf_ptoi {
334         uint16_t port_id; /**< DPDK port ID. */
335         unsigned int ifindex; /**< Network interface index. */
336 };
337
338 /* Due to a limitation on driver/FW. */
339 #define MLX5_TCF_GROUP_ID_MAX 3
340 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
341
342 #define MLX5_TCF_FATE_ACTIONS \
343         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
344          MLX5_FLOW_ACTION_JUMP)
345
346 #define MLX5_TCF_VLAN_ACTIONS \
347         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
348          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
349
350 #define MLX5_TCF_PEDIT_ACTIONS \
351         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
352          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
353          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
354          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
355          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
356
357 #define MLX5_TCF_CONFIG_ACTIONS \
358         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
359          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
360          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
361          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
362
363 #define MAX_PEDIT_KEYS 128
364 #define SZ_PEDIT_KEY_VAL 4
365
366 #define NUM_OF_PEDIT_KEYS(sz) \
367         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
368
369 struct pedit_key_ex {
370         enum pedit_header_type htype;
371         enum pedit_cmd cmd;
372 };
373
374 struct pedit_parser {
375         struct tc_pedit_sel sel;
376         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
377         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
378 };
379
380 /**
381  * Create space for using the implicitly created TC flow counter.
382  *
383  * @param[in] dev
384  *   Pointer to the Ethernet device structure.
385  *
386  * @return
387  *   A pointer to the counter data structure, NULL otherwise and
388  *   rte_errno is set.
389  */
390 static struct mlx5_flow_counter *
391 flow_tcf_counter_new(void)
392 {
393         struct mlx5_flow_counter *cnt;
394
395         /*
396          * eswitch counter cannot be shared and its id is unknown.
397          * currently returning all with id 0.
398          * in the future maybe better to switch to unique numbers.
399          */
400         struct mlx5_flow_counter tmpl = {
401                 .ref_cnt = 1,
402         };
403         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
404         if (!cnt) {
405                 rte_errno = ENOMEM;
406                 return NULL;
407         }
408         *cnt = tmpl;
409         /* Implicit counter, do not add to list. */
410         return cnt;
411 }
412
413 /**
414  * Set pedit key of MAC address
415  *
416  * @param[in] actions
417  *   pointer to action specification
418  * @param[in,out] p_parser
419  *   pointer to pedit_parser
420  */
421 static void
422 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
423                            struct pedit_parser *p_parser)
424 {
425         int idx = p_parser->sel.nkeys;
426         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
427                                         offsetof(struct ether_hdr, s_addr) :
428                                         offsetof(struct ether_hdr, d_addr);
429         const struct rte_flow_action_set_mac *conf =
430                 (const struct rte_flow_action_set_mac *)actions->conf;
431
432         p_parser->keys[idx].off = off;
433         p_parser->keys[idx].mask = ~UINT32_MAX;
434         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
435         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
436         memcpy(&p_parser->keys[idx].val,
437                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
438         idx++;
439         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
440         p_parser->keys[idx].mask = 0xFFFF0000;
441         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
442         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
443         memcpy(&p_parser->keys[idx].val,
444                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
445                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
446         p_parser->sel.nkeys = (++idx);
447 }
448
449 /**
450  * Set pedit key of decrease/set ttl
451  *
452  * @param[in] actions
453  *   pointer to action specification
454  * @param[in,out] p_parser
455  *   pointer to pedit_parser
456  * @param[in] item_flags
457  *   flags of all items presented
458  */
459 static void
460 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
461                                 struct pedit_parser *p_parser,
462                                 uint64_t item_flags)
463 {
464         int idx = p_parser->sel.nkeys;
465
466         p_parser->keys[idx].mask = 0xFFFFFF00;
467         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
468                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
469                 p_parser->keys[idx].off =
470                         offsetof(struct ipv4_hdr, time_to_live);
471         }
472         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
473                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
474                 p_parser->keys[idx].off =
475                         offsetof(struct ipv6_hdr, hop_limits);
476         }
477         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
478                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
479                 p_parser->keys[idx].val = 0x000000FF;
480         } else {
481                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
482                 p_parser->keys[idx].val =
483                         (__u32)((const struct rte_flow_action_set_ttl *)
484                          actions->conf)->ttl_value;
485         }
486         p_parser->sel.nkeys = (++idx);
487 }
488
489 /**
490  * Set pedit key of transport (TCP/UDP) port value
491  *
492  * @param[in] actions
493  *   pointer to action specification
494  * @param[in,out] p_parser
495  *   pointer to pedit_parser
496  * @param[in] item_flags
497  *   flags of all items presented
498  */
499 static void
500 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
501                                 struct pedit_parser *p_parser,
502                                 uint64_t item_flags)
503 {
504         int idx = p_parser->sel.nkeys;
505
506         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
507                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
508         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
509                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
510         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
511         /* offset of src/dst port is same for TCP and UDP */
512         p_parser->keys[idx].off =
513                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
514                 offsetof(struct tcp_hdr, src_port) :
515                 offsetof(struct tcp_hdr, dst_port);
516         p_parser->keys[idx].mask = 0xFFFF0000;
517         p_parser->keys[idx].val =
518                 (__u32)((const struct rte_flow_action_set_tp *)
519                                 actions->conf)->port;
520         p_parser->sel.nkeys = (++idx);
521 }
522
523 /**
524  * Set pedit key of ipv6 address
525  *
526  * @param[in] actions
527  *   pointer to action specification
528  * @param[in,out] p_parser
529  *   pointer to pedit_parser
530  */
531 static void
532 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
533                                  struct pedit_parser *p_parser)
534 {
535         int idx = p_parser->sel.nkeys;
536         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
537         int off_base =
538                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
539                 offsetof(struct ipv6_hdr, src_addr) :
540                 offsetof(struct ipv6_hdr, dst_addr);
541         const struct rte_flow_action_set_ipv6 *conf =
542                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
543
544         for (int i = 0; i < keys; i++, idx++) {
545                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
546                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
547                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
548                 p_parser->keys[idx].mask = ~UINT32_MAX;
549                 memcpy(&p_parser->keys[idx].val,
550                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
551                         SZ_PEDIT_KEY_VAL);
552         }
553         p_parser->sel.nkeys += keys;
554 }
555
556 /**
557  * Set pedit key of ipv4 address
558  *
559  * @param[in] actions
560  *   pointer to action specification
561  * @param[in,out] p_parser
562  *   pointer to pedit_parser
563  */
564 static void
565 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
566                                  struct pedit_parser *p_parser)
567 {
568         int idx = p_parser->sel.nkeys;
569
570         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
571         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
572         p_parser->keys[idx].off =
573                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
574                 offsetof(struct ipv4_hdr, src_addr) :
575                 offsetof(struct ipv4_hdr, dst_addr);
576         p_parser->keys[idx].mask = ~UINT32_MAX;
577         p_parser->keys[idx].val =
578                 ((const struct rte_flow_action_set_ipv4 *)
579                  actions->conf)->ipv4_addr;
580         p_parser->sel.nkeys = (++idx);
581 }
582
583 /**
584  * Create the pedit's na attribute in netlink message
585  * on pre-allocate message buffer
586  *
587  * @param[in,out] nl
588  *   pointer to pre-allocated netlink message buffer
589  * @param[in,out] actions
590  *   pointer to pointer of actions specification.
591  * @param[in,out] action_flags
592  *   pointer to actions flags
593  * @param[in] item_flags
594  *   flags of all item presented
595  */
596 static void
597 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
598                               const struct rte_flow_action **actions,
599                               uint64_t item_flags)
600 {
601         struct pedit_parser p_parser;
602         struct nlattr *na_act_options;
603         struct nlattr *na_pedit_keys;
604
605         memset(&p_parser, 0, sizeof(p_parser));
606         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
607         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
608         /* all modify header actions should be in one tc-pedit action */
609         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
610                 switch ((*actions)->type) {
611                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
612                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
613                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
614                         break;
615                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
616                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
617                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
618                         break;
619                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
620                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
621                         flow_tcf_pedit_key_set_tp_port(*actions,
622                                                         &p_parser, item_flags);
623                         break;
624                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
625                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
626                         flow_tcf_pedit_key_set_dec_ttl(*actions,
627                                                         &p_parser, item_flags);
628                         break;
629                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
630                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
631                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
632                         break;
633                 default:
634                         goto pedit_mnl_msg_done;
635                 }
636         }
637 pedit_mnl_msg_done:
638         p_parser.sel.action = TC_ACT_PIPE;
639         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
640                      sizeof(p_parser.sel) +
641                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
642                      &p_parser);
643         na_pedit_keys =
644                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
645         for (int i = 0; i < p_parser.sel.nkeys; i++) {
646                 struct nlattr *na_pedit_key =
647                         mnl_attr_nest_start(nl,
648                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
649                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
650                                  p_parser.keys_ex[i].htype);
651                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
652                                  p_parser.keys_ex[i].cmd);
653                 mnl_attr_nest_end(nl, na_pedit_key);
654         }
655         mnl_attr_nest_end(nl, na_pedit_keys);
656         mnl_attr_nest_end(nl, na_act_options);
657         (*actions)--;
658 }
659
660 /**
661  * Calculate max memory size of one TC-pedit actions.
662  * One TC-pedit action can contain set of keys each defining
663  * a rewrite element (rte_flow action)
664  *
665  * @param[in,out] actions
666  *   actions specification.
667  * @param[in,out] action_flags
668  *   actions flags
669  * @param[in,out] size
670  *   accumulated size
671  * @return
672  *   Max memory size of one TC-pedit action
673  */
674 static int
675 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
676                                 uint64_t *action_flags)
677 {
678         int pedit_size = 0;
679         int keys = 0;
680         uint64_t flags = 0;
681
682         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
683                       SZ_NLATTR_STRZ_OF("pedit") +
684                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
685         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
686                 switch ((*actions)->type) {
687                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
688                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
689                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
690                         break;
691                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
692                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
693                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
694                         break;
695                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
696                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
697                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
698                         break;
699                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
700                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
701                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
702                         break;
703                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
704                         /* TCP is as same as UDP */
705                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
706                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
707                         break;
708                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
709                         /* TCP is as same as UDP */
710                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
711                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
712                         break;
713                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
714                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
715                         flags |= MLX5_FLOW_ACTION_SET_TTL;
716                         break;
717                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
718                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
719                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
720                         break;
721                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
722                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
723                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
724                         break;
725                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
726                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
727                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
728                         break;
729                 default:
730                         goto get_pedit_action_size_done;
731                 }
732         }
733 get_pedit_action_size_done:
734         /* TCA_PEDIT_PARAMS_EX */
735         pedit_size +=
736                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
737                                   keys * sizeof(struct tc_pedit_key));
738         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
739         pedit_size += keys *
740                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
741                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
742                        SZ_NLATTR_DATA_OF(2));
743         (*action_flags) |= flags;
744         (*actions)--;
745         return pedit_size;
746 }
747
748 /**
749  * Retrieve mask for pattern item.
750  *
751  * This function does basic sanity checks on a pattern item in order to
752  * return the most appropriate mask for it.
753  *
754  * @param[in] item
755  *   Item specification.
756  * @param[in] mask_default
757  *   Default mask for pattern item as specified by the flow API.
758  * @param[in] mask_supported
759  *   Mask fields supported by the implementation.
760  * @param[in] mask_empty
761  *   Empty mask to return when there is no specification.
762  * @param[out] error
763  *   Perform verbose error reporting if not NULL.
764  *
765  * @return
766  *   Either @p item->mask or one of the mask parameters on success, NULL
767  *   otherwise and rte_errno is set.
768  */
769 static const void *
770 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
771                    const void *mask_supported, const void *mask_empty,
772                    size_t mask_size, struct rte_flow_error *error)
773 {
774         const uint8_t *mask;
775         size_t i;
776
777         /* item->last and item->mask cannot exist without item->spec. */
778         if (!item->spec && (item->mask || item->last)) {
779                 rte_flow_error_set(error, EINVAL,
780                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
781                                    "\"mask\" or \"last\" field provided without"
782                                    " a corresponding \"spec\"");
783                 return NULL;
784         }
785         /* No spec, no mask, no problem. */
786         if (!item->spec)
787                 return mask_empty;
788         mask = item->mask ? item->mask : mask_default;
789         assert(mask);
790         /*
791          * Single-pass check to make sure that:
792          * - Mask is supported, no bits are set outside mask_supported.
793          * - Both item->spec and item->last are included in mask.
794          */
795         for (i = 0; i != mask_size; ++i) {
796                 if (!mask[i])
797                         continue;
798                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
799                     ((const uint8_t *)mask_supported)[i]) {
800                         rte_flow_error_set(error, ENOTSUP,
801                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
802                                            "unsupported field found"
803                                            " in \"mask\"");
804                         return NULL;
805                 }
806                 if (item->last &&
807                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
808                     (((const uint8_t *)item->last)[i] & mask[i])) {
809                         rte_flow_error_set(error, EINVAL,
810                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
811                                            item->last,
812                                            "range between \"spec\" and \"last\""
813                                            " not comprised in \"mask\"");
814                         return NULL;
815                 }
816         }
817         return mask;
818 }
819
820 /**
821  * Build a conversion table between port ID and ifindex.
822  *
823  * @param[in] dev
824  *   Pointer to Ethernet device.
825  * @param[out] ptoi
826  *   Pointer to ptoi table.
827  * @param[in] len
828  *   Size of ptoi table provided.
829  *
830  * @return
831  *   Size of ptoi table filled.
832  */
833 static unsigned int
834 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
835                           unsigned int len)
836 {
837         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
838         uint16_t port_id[n + 1];
839         unsigned int i;
840         unsigned int own = 0;
841
842         /* At least one port is needed when no switch domain is present. */
843         if (!n) {
844                 n = 1;
845                 port_id[0] = dev->data->port_id;
846         } else {
847                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
848         }
849         if (n > len)
850                 return 0;
851         for (i = 0; i != n; ++i) {
852                 struct rte_eth_dev_info dev_info;
853
854                 rte_eth_dev_info_get(port_id[i], &dev_info);
855                 if (port_id[i] == dev->data->port_id)
856                         own = i;
857                 ptoi[i].port_id = port_id[i];
858                 ptoi[i].ifindex = dev_info.if_index;
859         }
860         /* Ensure first entry of ptoi[] is the current device. */
861         if (own) {
862                 ptoi[n] = ptoi[0];
863                 ptoi[0] = ptoi[own];
864                 ptoi[own] = ptoi[n];
865         }
866         /* An entry with zero ifindex terminates ptoi[]. */
867         ptoi[n].port_id = 0;
868         ptoi[n].ifindex = 0;
869         return n;
870 }
871
872 /**
873  * Verify the @p attr will be correctly understood by the E-switch.
874  *
875  * @param[in] attr
876  *   Pointer to flow attributes
877  * @param[out] error
878  *   Pointer to error structure.
879  *
880  * @return
881  *   0 on success, a negative errno value otherwise and rte_errno is set.
882  */
883 static int
884 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
885                              struct rte_flow_error *error)
886 {
887         /*
888          * Supported attributes: groups, some priorities and ingress only.
889          * group is supported only if kernel supports chain. Don't care about
890          * transfer as it is the caller's problem.
891          */
892         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
893                 return rte_flow_error_set(error, ENOTSUP,
894                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
895                                           "group ID larger than "
896                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
897                                           " isn't supported");
898         else if (attr->group > 0 &&
899                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
900                 return rte_flow_error_set(error, ENOTSUP,
901                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
902                                           attr,
903                                           "lowest priority level is "
904                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
905                                           " when group is configured");
906         else if (attr->priority > 0xfffe)
907                 return rte_flow_error_set(error, ENOTSUP,
908                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
909                                           attr,
910                                           "lowest priority level is 0xfffe");
911         if (!attr->ingress)
912                 return rte_flow_error_set(error, EINVAL,
913                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
914                                           attr, "only ingress is supported");
915         if (attr->egress)
916                 return rte_flow_error_set(error, ENOTSUP,
917                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
918                                           attr, "egress is not supported");
919         return 0;
920 }
921
922 /**
923  * Validate flow for E-Switch.
924  *
925  * @param[in] priv
926  *   Pointer to the priv structure.
927  * @param[in] attr
928  *   Pointer to the flow attributes.
929  * @param[in] items
930  *   Pointer to the list of items.
931  * @param[in] actions
932  *   Pointer to the list of actions.
933  * @param[out] error
934  *   Pointer to the error structure.
935  *
936  * @return
937  *   0 on success, a negative errno value otherwise and rte_ernno is set.
938  */
939 static int
940 flow_tcf_validate(struct rte_eth_dev *dev,
941                   const struct rte_flow_attr *attr,
942                   const struct rte_flow_item items[],
943                   const struct rte_flow_action actions[],
944                   struct rte_flow_error *error)
945 {
946         union {
947                 const struct rte_flow_item_port_id *port_id;
948                 const struct rte_flow_item_eth *eth;
949                 const struct rte_flow_item_vlan *vlan;
950                 const struct rte_flow_item_ipv4 *ipv4;
951                 const struct rte_flow_item_ipv6 *ipv6;
952                 const struct rte_flow_item_tcp *tcp;
953                 const struct rte_flow_item_udp *udp;
954         } spec, mask;
955         union {
956                 const struct rte_flow_action_port_id *port_id;
957                 const struct rte_flow_action_jump *jump;
958                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
959                 const struct rte_flow_action_of_set_vlan_vid *
960                         of_set_vlan_vid;
961                 const struct rte_flow_action_of_set_vlan_pcp *
962                         of_set_vlan_pcp;
963                 const struct rte_flow_action_set_ipv4 *set_ipv4;
964                 const struct rte_flow_action_set_ipv6 *set_ipv6;
965         } conf;
966         uint64_t item_flags = 0;
967         uint64_t action_flags = 0;
968         uint8_t next_protocol = -1;
969         unsigned int tcm_ifindex = 0;
970         uint8_t pedit_validated = 0;
971         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
972         struct rte_eth_dev *port_id_dev = NULL;
973         bool in_port_id_set;
974         int ret;
975
976         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
977                                                 PTOI_TABLE_SZ_MAX(dev)));
978         ret = flow_tcf_validate_attributes(attr, error);
979         if (ret < 0)
980                 return ret;
981         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
982                 unsigned int i;
983
984                 switch (items->type) {
985                 case RTE_FLOW_ITEM_TYPE_VOID:
986                         break;
987                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
988                         mask.port_id = flow_tcf_item_mask
989                                 (items, &rte_flow_item_port_id_mask,
990                                  &flow_tcf_mask_supported.port_id,
991                                  &flow_tcf_mask_empty.port_id,
992                                  sizeof(flow_tcf_mask_supported.port_id),
993                                  error);
994                         if (!mask.port_id)
995                                 return -rte_errno;
996                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
997                                 in_port_id_set = 1;
998                                 break;
999                         }
1000                         spec.port_id = items->spec;
1001                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1002                                 return rte_flow_error_set
1003                                         (error, ENOTSUP,
1004                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1005                                          mask.port_id,
1006                                          "no support for partial mask on"
1007                                          " \"id\" field");
1008                         if (!mask.port_id->id)
1009                                 i = 0;
1010                         else
1011                                 for (i = 0; ptoi[i].ifindex; ++i)
1012                                         if (ptoi[i].port_id == spec.port_id->id)
1013                                                 break;
1014                         if (!ptoi[i].ifindex)
1015                                 return rte_flow_error_set
1016                                         (error, ENODEV,
1017                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1018                                          spec.port_id,
1019                                          "missing data to convert port ID to"
1020                                          " ifindex");
1021                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1022                                 return rte_flow_error_set
1023                                         (error, ENOTSUP,
1024                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1025                                          spec.port_id,
1026                                          "cannot match traffic for"
1027                                          " several port IDs through"
1028                                          " a single flow rule");
1029                         tcm_ifindex = ptoi[i].ifindex;
1030                         in_port_id_set = 1;
1031                         break;
1032                 case RTE_FLOW_ITEM_TYPE_ETH:
1033                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1034                                                           error);
1035                         if (ret < 0)
1036                                 return ret;
1037                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1038                         /* TODO:
1039                          * Redundant check due to different supported mask.
1040                          * Same for the rest of items.
1041                          */
1042                         mask.eth = flow_tcf_item_mask
1043                                 (items, &rte_flow_item_eth_mask,
1044                                  &flow_tcf_mask_supported.eth,
1045                                  &flow_tcf_mask_empty.eth,
1046                                  sizeof(flow_tcf_mask_supported.eth),
1047                                  error);
1048                         if (!mask.eth)
1049                                 return -rte_errno;
1050                         if (mask.eth->type && mask.eth->type !=
1051                             RTE_BE16(0xffff))
1052                                 return rte_flow_error_set
1053                                         (error, ENOTSUP,
1054                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1055                                          mask.eth,
1056                                          "no support for partial mask on"
1057                                          " \"type\" field");
1058                         break;
1059                 case RTE_FLOW_ITEM_TYPE_VLAN:
1060                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1061                                                            error);
1062                         if (ret < 0)
1063                                 return ret;
1064                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1065                         mask.vlan = flow_tcf_item_mask
1066                                 (items, &rte_flow_item_vlan_mask,
1067                                  &flow_tcf_mask_supported.vlan,
1068                                  &flow_tcf_mask_empty.vlan,
1069                                  sizeof(flow_tcf_mask_supported.vlan),
1070                                  error);
1071                         if (!mask.vlan)
1072                                 return -rte_errno;
1073                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1074                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
1075                               RTE_BE16(0xe000)) ||
1076                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
1077                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1078                               RTE_BE16(0x0fff)) ||
1079                             (mask.vlan->inner_type &&
1080                              mask.vlan->inner_type != RTE_BE16(0xffff)))
1081                                 return rte_flow_error_set
1082                                         (error, ENOTSUP,
1083                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1084                                          mask.vlan,
1085                                          "no support for partial masks on"
1086                                          " \"tci\" (PCP and VID parts) and"
1087                                          " \"inner_type\" fields");
1088                         break;
1089                 case RTE_FLOW_ITEM_TYPE_IPV4:
1090                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1091                                                            error);
1092                         if (ret < 0)
1093                                 return ret;
1094                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1095                         mask.ipv4 = flow_tcf_item_mask
1096                                 (items, &rte_flow_item_ipv4_mask,
1097                                  &flow_tcf_mask_supported.ipv4,
1098                                  &flow_tcf_mask_empty.ipv4,
1099                                  sizeof(flow_tcf_mask_supported.ipv4),
1100                                  error);
1101                         if (!mask.ipv4)
1102                                 return -rte_errno;
1103                         if (mask.ipv4->hdr.next_proto_id &&
1104                             mask.ipv4->hdr.next_proto_id != 0xff)
1105                                 return rte_flow_error_set
1106                                         (error, ENOTSUP,
1107                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1108                                          mask.ipv4,
1109                                          "no support for partial mask on"
1110                                          " \"hdr.next_proto_id\" field");
1111                         else if (mask.ipv4->hdr.next_proto_id)
1112                                 next_protocol =
1113                                         ((const struct rte_flow_item_ipv4 *)
1114                                          (items->spec))->hdr.next_proto_id;
1115                         break;
1116                 case RTE_FLOW_ITEM_TYPE_IPV6:
1117                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1118                                                            error);
1119                         if (ret < 0)
1120                                 return ret;
1121                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1122                         mask.ipv6 = flow_tcf_item_mask
1123                                 (items, &rte_flow_item_ipv6_mask,
1124                                  &flow_tcf_mask_supported.ipv6,
1125                                  &flow_tcf_mask_empty.ipv6,
1126                                  sizeof(flow_tcf_mask_supported.ipv6),
1127                                  error);
1128                         if (!mask.ipv6)
1129                                 return -rte_errno;
1130                         if (mask.ipv6->hdr.proto &&
1131                             mask.ipv6->hdr.proto != 0xff)
1132                                 return rte_flow_error_set
1133                                         (error, ENOTSUP,
1134                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1135                                          mask.ipv6,
1136                                          "no support for partial mask on"
1137                                          " \"hdr.proto\" field");
1138                         else if (mask.ipv6->hdr.proto)
1139                                 next_protocol =
1140                                         ((const struct rte_flow_item_ipv6 *)
1141                                          (items->spec))->hdr.proto;
1142                         break;
1143                 case RTE_FLOW_ITEM_TYPE_UDP:
1144                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1145                                                           next_protocol, error);
1146                         if (ret < 0)
1147                                 return ret;
1148                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1149                         mask.udp = flow_tcf_item_mask
1150                                 (items, &rte_flow_item_udp_mask,
1151                                  &flow_tcf_mask_supported.udp,
1152                                  &flow_tcf_mask_empty.udp,
1153                                  sizeof(flow_tcf_mask_supported.udp),
1154                                  error);
1155                         if (!mask.udp)
1156                                 return -rte_errno;
1157                         break;
1158                 case RTE_FLOW_ITEM_TYPE_TCP:
1159                         ret = mlx5_flow_validate_item_tcp
1160                                              (items, item_flags,
1161                                               next_protocol,
1162                                               &flow_tcf_mask_supported.tcp,
1163                                               error);
1164                         if (ret < 0)
1165                                 return ret;
1166                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1167                         mask.tcp = flow_tcf_item_mask
1168                                 (items, &rte_flow_item_tcp_mask,
1169                                  &flow_tcf_mask_supported.tcp,
1170                                  &flow_tcf_mask_empty.tcp,
1171                                  sizeof(flow_tcf_mask_supported.tcp),
1172                                  error);
1173                         if (!mask.tcp)
1174                                 return -rte_errno;
1175                         break;
1176                 default:
1177                         return rte_flow_error_set(error, ENOTSUP,
1178                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1179                                                   NULL, "item not supported");
1180                 }
1181         }
1182         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1183                 unsigned int i;
1184                 uint64_t current_action_flag = 0;
1185
1186                 switch (actions->type) {
1187                 case RTE_FLOW_ACTION_TYPE_VOID:
1188                         break;
1189                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1190                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1191                         if (!actions->conf)
1192                                 break;
1193                         conf.port_id = actions->conf;
1194                         if (conf.port_id->original)
1195                                 i = 0;
1196                         else
1197                                 for (i = 0; ptoi[i].ifindex; ++i)
1198                                         if (ptoi[i].port_id == conf.port_id->id)
1199                                                 break;
1200                         if (!ptoi[i].ifindex)
1201                                 return rte_flow_error_set
1202                                         (error, ENODEV,
1203                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1204                                          conf.port_id,
1205                                          "missing data to convert port ID to"
1206                                          " ifindex");
1207                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1208                         break;
1209                 case RTE_FLOW_ACTION_TYPE_JUMP:
1210                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1211                         if (!actions->conf)
1212                                 break;
1213                         conf.jump = actions->conf;
1214                         if (attr->group >= conf.jump->group)
1215                                 return rte_flow_error_set
1216                                         (error, ENOTSUP,
1217                                          RTE_FLOW_ERROR_TYPE_ACTION,
1218                                          actions,
1219                                          "can jump only to a group forward");
1220                         break;
1221                 case RTE_FLOW_ACTION_TYPE_DROP:
1222                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1223                         break;
1224                 case RTE_FLOW_ACTION_TYPE_COUNT:
1225                         break;
1226                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1227                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1228                         break;
1229                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1230                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1231                         break;
1232                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1233                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1234                                 return rte_flow_error_set
1235                                         (error, ENOTSUP,
1236                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1237                                          "vlan modify is not supported,"
1238                                          " set action must follow push action");
1239                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1240                         break;
1241                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1242                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1243                                 return rte_flow_error_set
1244                                         (error, ENOTSUP,
1245                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1246                                          "vlan modify is not supported,"
1247                                          " set action must follow push action");
1248                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1249                         break;
1250                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1251                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1252                         break;
1253                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1254                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1255                         break;
1256                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1257                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1258                         break;
1259                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1260                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1261                         break;
1262                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1263                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1264                         break;
1265                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1266                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1267                         break;
1268                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1269                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1270                         break;
1271                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1272                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1273                         break;
1274                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1275                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1276                         break;
1277                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1278                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1279                         break;
1280                 default:
1281                         return rte_flow_error_set(error, ENOTSUP,
1282                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1283                                                   actions,
1284                                                   "action not supported");
1285                 }
1286                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1287                         if (!actions->conf)
1288                                 return rte_flow_error_set(error, EINVAL,
1289                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1290                                                 actions,
1291                                                 "action configuration not set");
1292                 }
1293                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1294                     pedit_validated)
1295                         return rte_flow_error_set(error, ENOTSUP,
1296                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1297                                                   actions,
1298                                                   "set actions should be "
1299                                                   "listed successively");
1300                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1301                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1302                         pedit_validated = 1;
1303                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1304                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1305                         return rte_flow_error_set(error, EINVAL,
1306                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1307                                                   actions,
1308                                                   "can't have multiple fate"
1309                                                   " actions");
1310                 action_flags |= current_action_flag;
1311         }
1312         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1313             (action_flags & MLX5_FLOW_ACTION_DROP))
1314                 return rte_flow_error_set(error, ENOTSUP,
1315                                           RTE_FLOW_ERROR_TYPE_ACTION,
1316                                           actions,
1317                                           "set action is not compatible with "
1318                                           "drop action");
1319         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1320             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1321                 return rte_flow_error_set(error, ENOTSUP,
1322                                           RTE_FLOW_ERROR_TYPE_ACTION,
1323                                           actions,
1324                                           "set action must be followed by "
1325                                           "port_id action");
1326         if (action_flags &
1327            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1328                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1329                         return rte_flow_error_set(error, EINVAL,
1330                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1331                                                   actions,
1332                                                   "no ipv4 item found in"
1333                                                   " pattern");
1334         }
1335         if (action_flags &
1336            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1337                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1338                         return rte_flow_error_set(error, EINVAL,
1339                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1340                                                   actions,
1341                                                   "no ipv6 item found in"
1342                                                   " pattern");
1343         }
1344         if (action_flags &
1345            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1346                 if (!(item_flags &
1347                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1348                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1349                         return rte_flow_error_set(error, EINVAL,
1350                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1351                                                   actions,
1352                                                   "no TCP/UDP item found in"
1353                                                   " pattern");
1354         }
1355         /*
1356          * FW syndrome (0xA9C090):
1357          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1358          *     forward to the uplink.
1359          */
1360         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1361             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1362             ((struct priv *)port_id_dev->data->dev_private)->representor)
1363                 return rte_flow_error_set(error, ENOTSUP,
1364                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1365                                           "vlan push can only be applied"
1366                                           " when forwarding to uplink port");
1367         /*
1368          * FW syndrome (0x294609):
1369          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1370          *     are supported only while forwarding to vport.
1371          */
1372         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1373             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1374                 return rte_flow_error_set(error, ENOTSUP,
1375                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1376                                           "vlan actions are supported"
1377                                           " only with port_id action");
1378         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1379                 return rte_flow_error_set(error, EINVAL,
1380                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1381                                           "no fate action is found");
1382         if (action_flags &
1383            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1384                 if (!(item_flags &
1385                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1386                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1387                         return rte_flow_error_set(error, EINVAL,
1388                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1389                                                   actions,
1390                                                   "no IP found in pattern");
1391         }
1392         if (action_flags &
1393             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1394                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1395                         return rte_flow_error_set(error, ENOTSUP,
1396                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1397                                                   actions,
1398                                                   "no ethernet found in"
1399                                                   " pattern");
1400         }
1401         return 0;
1402 }
1403
1404 /**
1405  * Calculate maximum size of memory for flow items of Linux TC flower and
1406  * extract specified items.
1407  *
1408  * @param[in] items
1409  *   Pointer to the list of items.
1410  * @param[out] item_flags
1411  *   Pointer to the detected items.
1412  *
1413  * @return
1414  *   Maximum size of memory for items.
1415  */
1416 static int
1417 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1418                             const struct rte_flow_item items[],
1419                             uint64_t *item_flags)
1420 {
1421         int size = 0;
1422         uint64_t flags = 0;
1423
1424         size += SZ_NLATTR_STRZ_OF("flower") +
1425                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1426                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1427         if (attr->group > 0)
1428                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1429         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1430                 switch (items->type) {
1431                 case RTE_FLOW_ITEM_TYPE_VOID:
1432                         break;
1433                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1434                         break;
1435                 case RTE_FLOW_ITEM_TYPE_ETH:
1436                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1437                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1438                                 /* dst/src MAC addr and mask. */
1439                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1440                         break;
1441                 case RTE_FLOW_ITEM_TYPE_VLAN:
1442                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1443                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1444                                 /* VLAN Ether type. */
1445                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1446                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1447                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1448                         break;
1449                 case RTE_FLOW_ITEM_TYPE_IPV4:
1450                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1451                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1452                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1453                                 /* dst/src IP addr and mask. */
1454                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1455                         break;
1456                 case RTE_FLOW_ITEM_TYPE_IPV6:
1457                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1458                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1459                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1460                                 /* dst/src IP addr and mask. */
1461                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1462                         break;
1463                 case RTE_FLOW_ITEM_TYPE_UDP:
1464                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1465                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1466                                 /* dst/src port and mask. */
1467                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1468                         break;
1469                 case RTE_FLOW_ITEM_TYPE_TCP:
1470                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1471                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1472                                 /* dst/src port and mask. */
1473                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1474                         break;
1475                 default:
1476                         DRV_LOG(WARNING,
1477                                 "unsupported item %p type %d,"
1478                                 " items must be validated before flow creation",
1479                                 (const void *)items, items->type);
1480                         break;
1481                 }
1482         }
1483         *item_flags = flags;
1484         return size;
1485 }
1486
1487 /**
1488  * Calculate maximum size of memory for flow actions of Linux TC flower and
1489  * extract specified actions.
1490  *
1491  * @param[in] actions
1492  *   Pointer to the list of actions.
1493  * @param[out] action_flags
1494  *   Pointer to the detected actions.
1495  *
1496  * @return
1497  *   Maximum size of memory for actions.
1498  */
1499 static int
1500 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1501                               uint64_t *action_flags)
1502 {
1503         int size = 0;
1504         uint64_t flags = 0;
1505
1506         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1507         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1508                 switch (actions->type) {
1509                 case RTE_FLOW_ACTION_TYPE_VOID:
1510                         break;
1511                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1512                         size += SZ_NLATTR_NEST + /* na_act_index. */
1513                                 SZ_NLATTR_STRZ_OF("mirred") +
1514                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1515                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1516                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1517                         break;
1518                 case RTE_FLOW_ACTION_TYPE_JUMP:
1519                         size += SZ_NLATTR_NEST + /* na_act_index. */
1520                                 SZ_NLATTR_STRZ_OF("gact") +
1521                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1522                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1523                         flags |= MLX5_FLOW_ACTION_JUMP;
1524                         break;
1525                 case RTE_FLOW_ACTION_TYPE_DROP:
1526                         size += SZ_NLATTR_NEST + /* na_act_index. */
1527                                 SZ_NLATTR_STRZ_OF("gact") +
1528                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1529                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1530                         flags |= MLX5_FLOW_ACTION_DROP;
1531                         break;
1532                 case RTE_FLOW_ACTION_TYPE_COUNT:
1533                         break;
1534                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1535                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1536                         goto action_of_vlan;
1537                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1538                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1539                         goto action_of_vlan;
1540                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1541                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1542                         goto action_of_vlan;
1543                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1544                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1545                         goto action_of_vlan;
1546 action_of_vlan:
1547                         size += SZ_NLATTR_NEST + /* na_act_index. */
1548                                 SZ_NLATTR_STRZ_OF("vlan") +
1549                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1550                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1551                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1552                                 /* VLAN protocol. */
1553                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1554                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1555                         break;
1556                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1557                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1558                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1559                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1560                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1561                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1562                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1563                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1564                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1565                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1566                         size += flow_tcf_get_pedit_actions_size(&actions,
1567                                                                 &flags);
1568                         break;
1569                 default:
1570                         DRV_LOG(WARNING,
1571                                 "unsupported action %p type %d,"
1572                                 " items must be validated before flow creation",
1573                                 (const void *)actions, actions->type);
1574                         break;
1575                 }
1576         }
1577         *action_flags = flags;
1578         return size;
1579 }
1580
1581 /**
1582  * Brand rtnetlink buffer with unique handle.
1583  *
1584  * This handle should be unique for a given network interface to avoid
1585  * collisions.
1586  *
1587  * @param nlh
1588  *   Pointer to Netlink message.
1589  * @param handle
1590  *   Unique 32-bit handle to use.
1591  */
1592 static void
1593 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1594 {
1595         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1596
1597         tcm->tcm_handle = handle;
1598         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1599                 (void *)nlh, handle);
1600 }
1601
1602 /**
1603  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1604  * memory required, allocates the memory, initializes Netlink message headers
1605  * and set unique TC message handle.
1606  *
1607  * @param[in] attr
1608  *   Pointer to the flow attributes.
1609  * @param[in] items
1610  *   Pointer to the list of items.
1611  * @param[in] actions
1612  *   Pointer to the list of actions.
1613  * @param[out] item_flags
1614  *   Pointer to bit mask of all items detected.
1615  * @param[out] action_flags
1616  *   Pointer to bit mask of all actions detected.
1617  * @param[out] error
1618  *   Pointer to the error structure.
1619  *
1620  * @return
1621  *   Pointer to mlx5_flow object on success,
1622  *   otherwise NULL and rte_ernno is set.
1623  */
1624 static struct mlx5_flow *
1625 flow_tcf_prepare(const struct rte_flow_attr *attr,
1626                  const struct rte_flow_item items[],
1627                  const struct rte_flow_action actions[],
1628                  uint64_t *item_flags, uint64_t *action_flags,
1629                  struct rte_flow_error *error)
1630 {
1631         size_t size = sizeof(struct mlx5_flow) +
1632                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1633                       MNL_ALIGN(sizeof(struct tcmsg));
1634         struct mlx5_flow *dev_flow;
1635         struct nlmsghdr *nlh;
1636         struct tcmsg *tcm;
1637
1638         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1639         size += flow_tcf_get_actions_and_size(actions, action_flags);
1640         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1641         if (!dev_flow) {
1642                 rte_flow_error_set(error, ENOMEM,
1643                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1644                                    "not enough memory to create E-Switch flow");
1645                 return NULL;
1646         }
1647         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1648         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1649         *dev_flow = (struct mlx5_flow){
1650                 .tcf = (struct mlx5_flow_tcf){
1651                         .nlh = nlh,
1652                         .tcm = tcm,
1653                 },
1654         };
1655         /*
1656          * Generate a reasonably unique handle based on the address of the
1657          * target buffer.
1658          *
1659          * This is straightforward on 32-bit systems where the flow pointer can
1660          * be used directly. Otherwise, its least significant part is taken
1661          * after shifting it by the previous power of two of the pointed buffer
1662          * size.
1663          */
1664         if (sizeof(dev_flow) <= 4)
1665                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1666         else
1667                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1668                                        rte_log2_u32(rte_align32prevpow2(size)));
1669         return dev_flow;
1670 }
1671
1672 /**
1673  * Make adjustments for supporting count actions.
1674  *
1675  * @param[in] dev
1676  *   Pointer to the Ethernet device structure.
1677  * @param[in] dev_flow
1678  *   Pointer to mlx5_flow.
1679  * @param[out] error
1680  *   Pointer to error structure.
1681  *
1682  * @return
1683  *   0 On success else a negative errno value is returned and rte_errno is set.
1684  */
1685 static int
1686 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1687                                   struct mlx5_flow *dev_flow,
1688                                   struct rte_flow_error *error)
1689 {
1690         struct rte_flow *flow = dev_flow->flow;
1691
1692         if (!flow->counter) {
1693                 flow->counter = flow_tcf_counter_new();
1694                 if (!flow->counter)
1695                         return rte_flow_error_set(error, rte_errno,
1696                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1697                                                   NULL,
1698                                                   "cannot get counter"
1699                                                   " context.");
1700         }
1701         return 0;
1702 }
1703
1704 /**
1705  * Translate flow for Linux TC flower and construct Netlink message.
1706  *
1707  * @param[in] priv
1708  *   Pointer to the priv structure.
1709  * @param[in, out] flow
1710  *   Pointer to the sub flow.
1711  * @param[in] attr
1712  *   Pointer to the flow attributes.
1713  * @param[in] items
1714  *   Pointer to the list of items.
1715  * @param[in] actions
1716  *   Pointer to the list of actions.
1717  * @param[out] error
1718  *   Pointer to the error structure.
1719  *
1720  * @return
1721  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1722  */
1723 static int
1724 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1725                    const struct rte_flow_attr *attr,
1726                    const struct rte_flow_item items[],
1727                    const struct rte_flow_action actions[],
1728                    struct rte_flow_error *error)
1729 {
1730         union {
1731                 const struct rte_flow_item_port_id *port_id;
1732                 const struct rte_flow_item_eth *eth;
1733                 const struct rte_flow_item_vlan *vlan;
1734                 const struct rte_flow_item_ipv4 *ipv4;
1735                 const struct rte_flow_item_ipv6 *ipv6;
1736                 const struct rte_flow_item_tcp *tcp;
1737                 const struct rte_flow_item_udp *udp;
1738         } spec, mask;
1739         union {
1740                 const struct rte_flow_action_port_id *port_id;
1741                 const struct rte_flow_action_jump *jump;
1742                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1743                 const struct rte_flow_action_of_set_vlan_vid *
1744                         of_set_vlan_vid;
1745                 const struct rte_flow_action_of_set_vlan_pcp *
1746                         of_set_vlan_pcp;
1747         } conf;
1748         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1749         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1750         struct tcmsg *tcm = dev_flow->tcf.tcm;
1751         uint32_t na_act_index_cur;
1752         bool eth_type_set = 0;
1753         bool vlan_present = 0;
1754         bool vlan_eth_type_set = 0;
1755         bool ip_proto_set = 0;
1756         struct nlattr *na_flower;
1757         struct nlattr *na_flower_act;
1758         struct nlattr *na_vlan_id = NULL;
1759         struct nlattr *na_vlan_priority = NULL;
1760         uint64_t item_flags = 0;
1761         int ret;
1762
1763         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1764                                                 PTOI_TABLE_SZ_MAX(dev)));
1765         nlh = dev_flow->tcf.nlh;
1766         tcm = dev_flow->tcf.tcm;
1767         /* Prepare API must have been called beforehand. */
1768         assert(nlh != NULL && tcm != NULL);
1769         tcm->tcm_family = AF_UNSPEC;
1770         tcm->tcm_ifindex = ptoi[0].ifindex;
1771         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1772         /*
1773          * Priority cannot be zero to prevent the kernel from picking one
1774          * automatically.
1775          */
1776         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1777                                   RTE_BE16(ETH_P_ALL));
1778         if (attr->group > 0)
1779                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1780         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1781         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1782         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1783         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1784                 unsigned int i;
1785
1786                 switch (items->type) {
1787                 case RTE_FLOW_ITEM_TYPE_VOID:
1788                         break;
1789                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1790                         mask.port_id = flow_tcf_item_mask
1791                                 (items, &rte_flow_item_port_id_mask,
1792                                  &flow_tcf_mask_supported.port_id,
1793                                  &flow_tcf_mask_empty.port_id,
1794                                  sizeof(flow_tcf_mask_supported.port_id),
1795                                  error);
1796                         assert(mask.port_id);
1797                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1798                                 break;
1799                         spec.port_id = items->spec;
1800                         if (!mask.port_id->id)
1801                                 i = 0;
1802                         else
1803                                 for (i = 0; ptoi[i].ifindex; ++i)
1804                                         if (ptoi[i].port_id == spec.port_id->id)
1805                                                 break;
1806                         assert(ptoi[i].ifindex);
1807                         tcm->tcm_ifindex = ptoi[i].ifindex;
1808                         break;
1809                 case RTE_FLOW_ITEM_TYPE_ETH:
1810                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1811                         mask.eth = flow_tcf_item_mask
1812                                 (items, &rte_flow_item_eth_mask,
1813                                  &flow_tcf_mask_supported.eth,
1814                                  &flow_tcf_mask_empty.eth,
1815                                  sizeof(flow_tcf_mask_supported.eth),
1816                                  error);
1817                         assert(mask.eth);
1818                         if (mask.eth == &flow_tcf_mask_empty.eth)
1819                                 break;
1820                         spec.eth = items->spec;
1821                         if (mask.eth->type) {
1822                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1823                                                  spec.eth->type);
1824                                 eth_type_set = 1;
1825                         }
1826                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1827                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1828                                              ETHER_ADDR_LEN,
1829                                              spec.eth->dst.addr_bytes);
1830                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1831                                              ETHER_ADDR_LEN,
1832                                              mask.eth->dst.addr_bytes);
1833                         }
1834                         if (!is_zero_ether_addr(&mask.eth->src)) {
1835                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1836                                              ETHER_ADDR_LEN,
1837                                              spec.eth->src.addr_bytes);
1838                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1839                                              ETHER_ADDR_LEN,
1840                                              mask.eth->src.addr_bytes);
1841                         }
1842                         break;
1843                 case RTE_FLOW_ITEM_TYPE_VLAN:
1844                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1845                         mask.vlan = flow_tcf_item_mask
1846                                 (items, &rte_flow_item_vlan_mask,
1847                                  &flow_tcf_mask_supported.vlan,
1848                                  &flow_tcf_mask_empty.vlan,
1849                                  sizeof(flow_tcf_mask_supported.vlan),
1850                                  error);
1851                         assert(mask.vlan);
1852                         if (!eth_type_set)
1853                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1854                                                  RTE_BE16(ETH_P_8021Q));
1855                         eth_type_set = 1;
1856                         vlan_present = 1;
1857                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1858                                 break;
1859                         spec.vlan = items->spec;
1860                         if (mask.vlan->inner_type) {
1861                                 mnl_attr_put_u16(nlh,
1862                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1863                                                  spec.vlan->inner_type);
1864                                 vlan_eth_type_set = 1;
1865                         }
1866                         if (mask.vlan->tci & RTE_BE16(0xe000))
1867                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1868                                                 (rte_be_to_cpu_16
1869                                                  (spec.vlan->tci) >> 13) & 0x7);
1870                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1871                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1872                                                  rte_be_to_cpu_16
1873                                                  (spec.vlan->tci &
1874                                                   RTE_BE16(0x0fff)));
1875                         break;
1876                 case RTE_FLOW_ITEM_TYPE_IPV4:
1877                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1878                         mask.ipv4 = flow_tcf_item_mask
1879                                 (items, &rte_flow_item_ipv4_mask,
1880                                  &flow_tcf_mask_supported.ipv4,
1881                                  &flow_tcf_mask_empty.ipv4,
1882                                  sizeof(flow_tcf_mask_supported.ipv4),
1883                                  error);
1884                         assert(mask.ipv4);
1885                         if (!eth_type_set || !vlan_eth_type_set)
1886                                 mnl_attr_put_u16(nlh,
1887                                                  vlan_present ?
1888                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1889                                                  TCA_FLOWER_KEY_ETH_TYPE,
1890                                                  RTE_BE16(ETH_P_IP));
1891                         eth_type_set = 1;
1892                         vlan_eth_type_set = 1;
1893                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1894                                 break;
1895                         spec.ipv4 = items->spec;
1896                         if (mask.ipv4->hdr.next_proto_id) {
1897                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1898                                                 spec.ipv4->hdr.next_proto_id);
1899                                 ip_proto_set = 1;
1900                         }
1901                         if (mask.ipv4->hdr.src_addr) {
1902                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1903                                                  spec.ipv4->hdr.src_addr);
1904                                 mnl_attr_put_u32(nlh,
1905                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1906                                                  mask.ipv4->hdr.src_addr);
1907                         }
1908                         if (mask.ipv4->hdr.dst_addr) {
1909                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1910                                                  spec.ipv4->hdr.dst_addr);
1911                                 mnl_attr_put_u32(nlh,
1912                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1913                                                  mask.ipv4->hdr.dst_addr);
1914                         }
1915                         break;
1916                 case RTE_FLOW_ITEM_TYPE_IPV6:
1917                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1918                         mask.ipv6 = flow_tcf_item_mask
1919                                 (items, &rte_flow_item_ipv6_mask,
1920                                  &flow_tcf_mask_supported.ipv6,
1921                                  &flow_tcf_mask_empty.ipv6,
1922                                  sizeof(flow_tcf_mask_supported.ipv6),
1923                                  error);
1924                         assert(mask.ipv6);
1925                         if (!eth_type_set || !vlan_eth_type_set)
1926                                 mnl_attr_put_u16(nlh,
1927                                                  vlan_present ?
1928                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1929                                                  TCA_FLOWER_KEY_ETH_TYPE,
1930                                                  RTE_BE16(ETH_P_IPV6));
1931                         eth_type_set = 1;
1932                         vlan_eth_type_set = 1;
1933                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1934                                 break;
1935                         spec.ipv6 = items->spec;
1936                         if (mask.ipv6->hdr.proto) {
1937                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1938                                                 spec.ipv6->hdr.proto);
1939                                 ip_proto_set = 1;
1940                         }
1941                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1942                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1943                                              sizeof(spec.ipv6->hdr.src_addr),
1944                                              spec.ipv6->hdr.src_addr);
1945                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1946                                              sizeof(mask.ipv6->hdr.src_addr),
1947                                              mask.ipv6->hdr.src_addr);
1948                         }
1949                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1950                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1951                                              sizeof(spec.ipv6->hdr.dst_addr),
1952                                              spec.ipv6->hdr.dst_addr);
1953                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1954                                              sizeof(mask.ipv6->hdr.dst_addr),
1955                                              mask.ipv6->hdr.dst_addr);
1956                         }
1957                         break;
1958                 case RTE_FLOW_ITEM_TYPE_UDP:
1959                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1960                         mask.udp = flow_tcf_item_mask
1961                                 (items, &rte_flow_item_udp_mask,
1962                                  &flow_tcf_mask_supported.udp,
1963                                  &flow_tcf_mask_empty.udp,
1964                                  sizeof(flow_tcf_mask_supported.udp),
1965                                  error);
1966                         assert(mask.udp);
1967                         if (!ip_proto_set)
1968                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1969                                                 IPPROTO_UDP);
1970                         if (mask.udp == &flow_tcf_mask_empty.udp)
1971                                 break;
1972                         spec.udp = items->spec;
1973                         if (mask.udp->hdr.src_port) {
1974                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1975                                                  spec.udp->hdr.src_port);
1976                                 mnl_attr_put_u16(nlh,
1977                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1978                                                  mask.udp->hdr.src_port);
1979                         }
1980                         if (mask.udp->hdr.dst_port) {
1981                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1982                                                  spec.udp->hdr.dst_port);
1983                                 mnl_attr_put_u16(nlh,
1984                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1985                                                  mask.udp->hdr.dst_port);
1986                         }
1987                         break;
1988                 case RTE_FLOW_ITEM_TYPE_TCP:
1989                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1990                         mask.tcp = flow_tcf_item_mask
1991                                 (items, &rte_flow_item_tcp_mask,
1992                                  &flow_tcf_mask_supported.tcp,
1993                                  &flow_tcf_mask_empty.tcp,
1994                                  sizeof(flow_tcf_mask_supported.tcp),
1995                                  error);
1996                         assert(mask.tcp);
1997                         if (!ip_proto_set)
1998                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1999                                                 IPPROTO_TCP);
2000                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
2001                                 break;
2002                         spec.tcp = items->spec;
2003                         if (mask.tcp->hdr.src_port) {
2004                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2005                                                  spec.tcp->hdr.src_port);
2006                                 mnl_attr_put_u16(nlh,
2007                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
2008                                                  mask.tcp->hdr.src_port);
2009                         }
2010                         if (mask.tcp->hdr.dst_port) {
2011                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2012                                                  spec.tcp->hdr.dst_port);
2013                                 mnl_attr_put_u16(nlh,
2014                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
2015                                                  mask.tcp->hdr.dst_port);
2016                         }
2017                         if (mask.tcp->hdr.tcp_flags) {
2018                                 mnl_attr_put_u16
2019                                         (nlh,
2020                                          TCA_FLOWER_KEY_TCP_FLAGS,
2021                                          rte_cpu_to_be_16
2022                                                 (spec.tcp->hdr.tcp_flags));
2023                                 mnl_attr_put_u16
2024                                         (nlh,
2025                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2026                                          rte_cpu_to_be_16
2027                                                 (mask.tcp->hdr.tcp_flags));
2028                         }
2029                         break;
2030                 default:
2031                         return rte_flow_error_set(error, ENOTSUP,
2032                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2033                                                   NULL, "item not supported");
2034                 }
2035         }
2036         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2037         na_act_index_cur = 1;
2038         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2039                 struct nlattr *na_act_index;
2040                 struct nlattr *na_act;
2041                 unsigned int vlan_act;
2042                 unsigned int i;
2043
2044                 switch (actions->type) {
2045                 case RTE_FLOW_ACTION_TYPE_VOID:
2046                         break;
2047                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2048                         conf.port_id = actions->conf;
2049                         if (conf.port_id->original)
2050                                 i = 0;
2051                         else
2052                                 for (i = 0; ptoi[i].ifindex; ++i)
2053                                         if (ptoi[i].port_id == conf.port_id->id)
2054                                                 break;
2055                         assert(ptoi[i].ifindex);
2056                         na_act_index =
2057                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2058                         assert(na_act_index);
2059                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2060                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2061                         assert(na_act);
2062                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2063                                      sizeof(struct tc_mirred),
2064                                      &(struct tc_mirred){
2065                                         .action = TC_ACT_STOLEN,
2066                                         .eaction = TCA_EGRESS_REDIR,
2067                                         .ifindex = ptoi[i].ifindex,
2068                                      });
2069                         mnl_attr_nest_end(nlh, na_act);
2070                         mnl_attr_nest_end(nlh, na_act_index);
2071                         break;
2072                 case RTE_FLOW_ACTION_TYPE_JUMP:
2073                         conf.jump = actions->conf;
2074                         na_act_index =
2075                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2076                         assert(na_act_index);
2077                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2078                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2079                         assert(na_act);
2080                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2081                                      sizeof(struct tc_gact),
2082                                      &(struct tc_gact){
2083                                         .action = TC_ACT_GOTO_CHAIN |
2084                                                   conf.jump->group,
2085                                      });
2086                         mnl_attr_nest_end(nlh, na_act);
2087                         mnl_attr_nest_end(nlh, na_act_index);
2088                         break;
2089                 case RTE_FLOW_ACTION_TYPE_DROP:
2090                         na_act_index =
2091                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2092                         assert(na_act_index);
2093                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2094                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2095                         assert(na_act);
2096                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2097                                      sizeof(struct tc_gact),
2098                                      &(struct tc_gact){
2099                                         .action = TC_ACT_SHOT,
2100                                      });
2101                         mnl_attr_nest_end(nlh, na_act);
2102                         mnl_attr_nest_end(nlh, na_act_index);
2103                         break;
2104                 case RTE_FLOW_ACTION_TYPE_COUNT:
2105                         /*
2106                          * Driver adds the count action implicitly for
2107                          * each rule it creates.
2108                          */
2109                         ret = flow_tcf_translate_action_count(dev,
2110                                                               dev_flow, error);
2111                         if (ret < 0)
2112                                 return ret;
2113                         break;
2114                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2115                         conf.of_push_vlan = NULL;
2116                         vlan_act = TCA_VLAN_ACT_POP;
2117                         goto action_of_vlan;
2118                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2119                         conf.of_push_vlan = actions->conf;
2120                         vlan_act = TCA_VLAN_ACT_PUSH;
2121                         goto action_of_vlan;
2122                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2123                         conf.of_set_vlan_vid = actions->conf;
2124                         if (na_vlan_id)
2125                                 goto override_na_vlan_id;
2126                         vlan_act = TCA_VLAN_ACT_MODIFY;
2127                         goto action_of_vlan;
2128                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2129                         conf.of_set_vlan_pcp = actions->conf;
2130                         if (na_vlan_priority)
2131                                 goto override_na_vlan_priority;
2132                         vlan_act = TCA_VLAN_ACT_MODIFY;
2133                         goto action_of_vlan;
2134 action_of_vlan:
2135                         na_act_index =
2136                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2137                         assert(na_act_index);
2138                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2139                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2140                         assert(na_act);
2141                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
2142                                      sizeof(struct tc_vlan),
2143                                      &(struct tc_vlan){
2144                                         .action = TC_ACT_PIPE,
2145                                         .v_action = vlan_act,
2146                                      });
2147                         if (vlan_act == TCA_VLAN_ACT_POP) {
2148                                 mnl_attr_nest_end(nlh, na_act);
2149                                 mnl_attr_nest_end(nlh, na_act_index);
2150                                 break;
2151                         }
2152                         if (vlan_act == TCA_VLAN_ACT_PUSH)
2153                                 mnl_attr_put_u16(nlh,
2154                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
2155                                                  conf.of_push_vlan->ethertype);
2156                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2157                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2158                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2159                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2160                         mnl_attr_nest_end(nlh, na_act);
2161                         mnl_attr_nest_end(nlh, na_act_index);
2162                         if (actions->type ==
2163                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2164 override_na_vlan_id:
2165                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2166                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2167                                         rte_be_to_cpu_16
2168                                         (conf.of_set_vlan_vid->vlan_vid);
2169                         } else if (actions->type ==
2170                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2171 override_na_vlan_priority:
2172                                 na_vlan_priority->nla_type =
2173                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
2174                                 *(uint8_t *)mnl_attr_get_payload
2175                                         (na_vlan_priority) =
2176                                         conf.of_set_vlan_pcp->vlan_pcp;
2177                         }
2178                         break;
2179                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2180                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2181                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2182                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2183                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2184                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2185                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2186                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2187                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2188                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2189                         na_act_index =
2190                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2191                         flow_tcf_create_pedit_mnl_msg(nlh,
2192                                                       &actions, item_flags);
2193                         mnl_attr_nest_end(nlh, na_act_index);
2194                         break;
2195                 default:
2196                         return rte_flow_error_set(error, ENOTSUP,
2197                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2198                                                   actions,
2199                                                   "action not supported");
2200                 }
2201         }
2202         assert(na_flower);
2203         assert(na_flower_act);
2204         mnl_attr_nest_end(nlh, na_flower_act);
2205         mnl_attr_nest_end(nlh, na_flower);
2206         return 0;
2207 }
2208
2209 /**
2210  * Send Netlink message with acknowledgment.
2211  *
2212  * @param ctx
2213  *   Flow context to use.
2214  * @param nlh
2215  *   Message to send. This function always raises the NLM_F_ACK flag before
2216  *   sending.
2217  *
2218  * @return
2219  *   0 on success, a negative errno value otherwise and rte_errno is set.
2220  */
2221 static int
2222 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2223 {
2224         alignas(struct nlmsghdr)
2225         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2226                     nlh->nlmsg_len - sizeof(*nlh)];
2227         uint32_t seq = ctx->seq++;
2228         struct mnl_socket *nl = ctx->nl;
2229         int ret;
2230
2231         nlh->nlmsg_flags |= NLM_F_ACK;
2232         nlh->nlmsg_seq = seq;
2233         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2234         if (ret != -1)
2235                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2236         if (ret != -1)
2237                 ret = mnl_cb_run
2238                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2239         if (ret > 0)
2240                 return 0;
2241         rte_errno = errno;
2242         return -rte_errno;
2243 }
2244
2245 /**
2246  * Apply flow to E-Switch by sending Netlink message.
2247  *
2248  * @param[in] dev
2249  *   Pointer to Ethernet device.
2250  * @param[in, out] flow
2251  *   Pointer to the sub flow.
2252  * @param[out] error
2253  *   Pointer to the error structure.
2254  *
2255  * @return
2256  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2257  */
2258 static int
2259 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2260                struct rte_flow_error *error)
2261 {
2262         struct priv *priv = dev->data->dev_private;
2263         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2264         struct mlx5_flow *dev_flow;
2265         struct nlmsghdr *nlh;
2266
2267         dev_flow = LIST_FIRST(&flow->dev_flows);
2268         /* E-Switch flow can't be expanded. */
2269         assert(!LIST_NEXT(dev_flow, next));
2270         nlh = dev_flow->tcf.nlh;
2271         nlh->nlmsg_type = RTM_NEWTFILTER;
2272         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2273         if (!flow_tcf_nl_ack(ctx, nlh))
2274                 return 0;
2275         return rte_flow_error_set(error, rte_errno,
2276                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2277                                   "netlink: failed to create TC flow rule");
2278 }
2279
2280 /**
2281  * Remove flow from E-Switch by sending Netlink message.
2282  *
2283  * @param[in] dev
2284  *   Pointer to Ethernet device.
2285  * @param[in, out] flow
2286  *   Pointer to the sub flow.
2287  */
2288 static void
2289 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2290 {
2291         struct priv *priv = dev->data->dev_private;
2292         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2293         struct mlx5_flow *dev_flow;
2294         struct nlmsghdr *nlh;
2295
2296         if (!flow)
2297                 return;
2298         if (flow->counter) {
2299                 if (--flow->counter->ref_cnt == 0) {
2300                         rte_free(flow->counter);
2301                         flow->counter = NULL;
2302                 }
2303         }
2304         dev_flow = LIST_FIRST(&flow->dev_flows);
2305         if (!dev_flow)
2306                 return;
2307         /* E-Switch flow can't be expanded. */
2308         assert(!LIST_NEXT(dev_flow, next));
2309         nlh = dev_flow->tcf.nlh;
2310         nlh->nlmsg_type = RTM_DELTFILTER;
2311         nlh->nlmsg_flags = NLM_F_REQUEST;
2312         flow_tcf_nl_ack(ctx, nlh);
2313 }
2314
2315 /**
2316  * Remove flow from E-Switch and release resources of the device flow.
2317  *
2318  * @param[in] dev
2319  *   Pointer to Ethernet device.
2320  * @param[in, out] flow
2321  *   Pointer to the sub flow.
2322  */
2323 static void
2324 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2325 {
2326         struct mlx5_flow *dev_flow;
2327
2328         if (!flow)
2329                 return;
2330         flow_tcf_remove(dev, flow);
2331         dev_flow = LIST_FIRST(&flow->dev_flows);
2332         if (!dev_flow)
2333                 return;
2334         /* E-Switch flow can't be expanded. */
2335         assert(!LIST_NEXT(dev_flow, next));
2336         LIST_REMOVE(dev_flow, next);
2337         rte_free(dev_flow);
2338 }
2339
2340 /**
2341  * Helper routine for figuring the space size required for a parse buffer.
2342  *
2343  * @param array
2344  *   array of values to use.
2345  * @param idx
2346  *   Current location in array.
2347  * @param value
2348  *   Value to compare with.
2349  *
2350  * @return
2351  *   The maximum between the given value and the array value on index.
2352  */
2353 static uint16_t
2354 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2355 {
2356         return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2357 }
2358
2359 /**
2360  * Parse rtnetlink message attributes filling the attribute table with the info
2361  * retrieved.
2362  *
2363  * @param tb
2364  *   Attribute table to be filled.
2365  * @param[out] max
2366  *   Maxinum entry in the attribute table.
2367  * @param rte
2368  *   The attributes section in the message to be parsed.
2369  * @param len
2370  *   The length of the attributes section in the message.
2371  */
2372 static void
2373 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2374                          struct rtattr *rta, int len)
2375 {
2376         unsigned short type;
2377         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2378         while (RTA_OK(rta, len)) {
2379                 type = rta->rta_type;
2380                 if (type <= max && !tb[type])
2381                         tb[type] = rta;
2382                 rta = RTA_NEXT(rta, len);
2383         }
2384 }
2385
2386 /**
2387  * Extract flow counters from flower action.
2388  *
2389  * @param rta
2390  *   flower action stats properties in the Netlink message received.
2391  * @param rta_type
2392  *   The backward sequence of rta_types, as written in the attribute table,
2393  *   we need to traverse in order to get to the requested object.
2394  * @param idx
2395  *   Current location in rta_type table.
2396  * @param[out] data
2397  *   data holding the count statistics of the rte_flow retrieved from
2398  *   the message.
2399  *
2400  * @return
2401  *   0 if data was found and retrieved, -1 otherwise.
2402  */
2403 static int
2404 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2405                                        uint16_t rta_type[], int idx,
2406                                        struct gnet_stats_basic *data)
2407 {
2408         int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2409                                                  TCA_STATS_BASIC);
2410         struct rtattr *tbs[tca_stats_max + 1];
2411
2412         if (rta == NULL || idx < 0)
2413                 return -1;
2414         flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2415                                  RTA_DATA(rta), RTA_PAYLOAD(rta));
2416         switch (rta_type[idx]) {
2417         case TCA_STATS_BASIC:
2418                 if (tbs[TCA_STATS_BASIC]) {
2419                         memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2420                                RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2421                                sizeof(*data)));
2422                         return 0;
2423                 }
2424                 break;
2425         default:
2426                 break;
2427         }
2428         return -1;
2429 }
2430
2431 /**
2432  * Parse flower single action retrieving the requested action attribute,
2433  * if found.
2434  *
2435  * @param arg
2436  *   flower action properties in the Netlink message received.
2437  * @param rta_type
2438  *   The backward sequence of rta_types, as written in the attribute table,
2439  *   we need to traverse in order to get to the requested object.
2440  * @param idx
2441  *   Current location in rta_type table.
2442  * @param[out] data
2443  *   Count statistics retrieved from the message query.
2444  *
2445  * @return
2446  *   0 if data was found and retrieved, -1 otherwise.
2447  */
2448 static int
2449 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2450                                      uint16_t rta_type[], int idx, void *data)
2451 {
2452         int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2453         struct rtattr *tb[tca_act_max + 1];
2454
2455         if (arg == NULL || idx < 0)
2456                 return -1;
2457         flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2458                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
2459         if (tb[TCA_ACT_KIND] == NULL)
2460                 return -1;
2461         switch (rta_type[idx]) {
2462         case TCA_ACT_STATS:
2463                 if (tb[TCA_ACT_STATS])
2464                         return flow_tcf_nl_action_stats_parse_and_get
2465                                         (tb[TCA_ACT_STATS],
2466                                          rta_type, --idx,
2467                                          (struct gnet_stats_basic *)data);
2468                 break;
2469         default:
2470                 break;
2471         }
2472         return -1;
2473 }
2474
2475 /**
2476  * Parse flower action section in the message retrieving the requested
2477  * attribute from the first action that provides it.
2478  *
2479  * @param opt
2480  *   flower section in the Netlink message received.
2481  * @param rta_type
2482  *   The backward sequence of rta_types, as written in the attribute table,
2483  *   we need to traverse in order to get to the requested object.
2484  * @param idx
2485  *   Current location in rta_type table.
2486  * @param[out] data
2487  *   data retrieved from the message query.
2488  *
2489  * @return
2490  *   0 if data was found and retrieved, -1 otherwise.
2491  */
2492 static int
2493 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2494                                  uint16_t rta_type[], int idx, void *data)
2495 {
2496         struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2497         int i;
2498
2499         if (arg == NULL || idx < 0)
2500                 return -1;
2501         flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2502                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
2503         switch (rta_type[idx]) {
2504         /*
2505          * flow counters are stored in the actions defined by the flow
2506          * and not in the flow itself, therefore we need to traverse the
2507          * flower chain of actions in search for them.
2508          *
2509          * Note that the index is not decremented here.
2510          */
2511         case TCA_ACT_STATS:
2512                 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2513                         if (tb[i] &&
2514                         !flow_tcf_nl_parse_one_action_and_get(tb[i],
2515                                                               rta_type,
2516                                                               idx, data))
2517                                 return 0;
2518                 }
2519                 break;
2520         default:
2521                 break;
2522         }
2523         return -1;
2524 }
2525
2526 /**
2527  * Parse flower classifier options in the message, retrieving the requested
2528  * attribute if found.
2529  *
2530  * @param opt
2531  *   flower section in the Netlink message received.
2532  * @param rta_type
2533  *   The backward sequence of rta_types, as written in the attribute table,
2534  *   we need to traverse in order to get to the requested object.
2535  * @param idx
2536  *   Current location in rta_type table.
2537  * @param[out] data
2538  *   data retrieved from the message query.
2539  *
2540  * @return
2541  *   0 if data was found and retrieved, -1 otherwise.
2542  */
2543 static int
2544 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2545                                uint16_t rta_type[], int idx, void *data)
2546 {
2547         int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2548                                                   TCA_FLOWER_ACT);
2549         struct rtattr *tb[tca_flower_max + 1];
2550
2551         if (!opt || idx < 0)
2552                 return -1;
2553         flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2554                                  RTA_DATA(opt), RTA_PAYLOAD(opt));
2555         switch (rta_type[idx]) {
2556         case TCA_FLOWER_ACT:
2557                 if (tb[TCA_FLOWER_ACT])
2558                         return flow_tcf_nl_action_parse_and_get
2559                                                         (tb[TCA_FLOWER_ACT],
2560                                                          rta_type, --idx, data);
2561                 break;
2562         default:
2563                 break;
2564         }
2565         return -1;
2566 }
2567
2568 /**
2569  * Parse Netlink reply on filter query, retrieving the flow counters.
2570  *
2571  * @param nlh
2572  *   Message received from Netlink.
2573  * @param rta_type
2574  *   The backward sequence of rta_types, as written in the attribute table,
2575  *   we need to traverse in order to get to the requested object.
2576  * @param idx
2577  *   Current location in rta_type table.
2578  * @param[out] data
2579  *   data retrieved from the message query.
2580  *
2581  * @return
2582  *   0 if data was found and retrieved, -1 otherwise.
2583  */
2584 static int
2585 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2586                                  uint16_t rta_type[], int idx, void *data)
2587 {
2588         struct nlmsghdr *nlh = cnlh;
2589         struct tcmsg *t = NLMSG_DATA(nlh);
2590         int len = nlh->nlmsg_len;
2591         int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2592         struct rtattr *tb[tca_max + 1];
2593
2594         if (idx < 0)
2595                 return -1;
2596         if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2597             nlh->nlmsg_type != RTM_GETTFILTER &&
2598             nlh->nlmsg_type != RTM_DELTFILTER)
2599                 return -1;
2600         len -= NLMSG_LENGTH(sizeof(*t));
2601         if (len < 0)
2602                 return -1;
2603         flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2604         /* Not a TC flower flow - bail out */
2605         if (!tb[TCA_KIND] ||
2606             strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2607                 return -1;
2608         switch (rta_type[idx]) {
2609         case TCA_OPTIONS:
2610                 if (tb[TCA_OPTIONS])
2611                         return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2612                                                               rta_type,
2613                                                               --idx, data);
2614                 break;
2615         default:
2616                 break;
2617         }
2618         return -1;
2619 }
2620
2621 /**
2622  * A callback to parse Netlink reply on TC flower query.
2623  *
2624  * @param nlh
2625  *   Message received from Netlink.
2626  * @param[out] data
2627  *   Pointer to data area to be filled by the parsing routine.
2628  *   assumed to be a pinter to struct flow_tcf_stats_basic.
2629  *
2630  * @return
2631  *   MNL_CB_OK value.
2632  */
2633 static int
2634 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2635 {
2636         /*
2637          * The backward sequence of rta_types to pass in order to get
2638          *  to the counters.
2639          */
2640         uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2641                                 TCA_FLOWER_ACT, TCA_OPTIONS };
2642         struct flow_tcf_stats_basic *sb_data = data;
2643         union {
2644                 const struct nlmsghdr *c;
2645                 struct nlmsghdr *nc;
2646         } tnlh = { .c = nlh };
2647
2648         if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2649                                               RTE_DIM(rta_type) - 1,
2650                                               (void *)&sb_data->counters))
2651                 sb_data->valid = true;
2652         return MNL_CB_OK;
2653 }
2654
2655 /**
2656  * Query a TC flower rule for its statistics via netlink.
2657  *
2658  * @param[in] dev
2659  *   Pointer to Ethernet device.
2660  * @param[in] flow
2661  *   Pointer to the sub flow.
2662  * @param[out] data
2663  *   data retrieved by the query.
2664  * @param[out] error
2665  *   Perform verbose error reporting if not NULL.
2666  *
2667  * @return
2668  *   0 on success, a negative errno value otherwise and rte_errno is set.
2669  */
2670 static int
2671 flow_tcf_query_count(struct rte_eth_dev *dev,
2672                           struct rte_flow *flow,
2673                           void *data,
2674                           struct rte_flow_error *error)
2675 {
2676         struct flow_tcf_stats_basic sb_data = { 0 };
2677         struct rte_flow_query_count *qc = data;
2678         struct priv *priv = dev->data->dev_private;
2679         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2680         struct mnl_socket *nl = ctx->nl;
2681         struct mlx5_flow *dev_flow;
2682         struct nlmsghdr *nlh;
2683         uint32_t seq = priv->tcf_context->seq++;
2684         ssize_t ret;
2685         assert(qc);
2686
2687         dev_flow = LIST_FIRST(&flow->dev_flows);
2688         /* E-Switch flow can't be expanded. */
2689         assert(!LIST_NEXT(dev_flow, next));
2690         if (!dev_flow->flow->counter)
2691                 goto notsup_exit;
2692         nlh = dev_flow->tcf.nlh;
2693         nlh->nlmsg_type = RTM_GETTFILTER;
2694         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2695         nlh->nlmsg_seq = seq;
2696         if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2697                 goto error_exit;
2698         do {
2699                 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2700                 if (ret <= 0)
2701                         break;
2702                 ret = mnl_cb_run(ctx->buf, ret, seq,
2703                                  mnl_socket_get_portid(nl),
2704                                  flow_tcf_nl_message_get_stats_basic,
2705                                  (void *)&sb_data);
2706         } while (ret > 0);
2707         /* Return the delta from last reset. */
2708         if (sb_data.valid) {
2709                 /* Return the delta from last reset. */
2710                 qc->hits_set = 1;
2711                 qc->bytes_set = 1;
2712                 qc->hits = sb_data.counters.packets - flow->counter->hits;
2713                 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2714                 if (qc->reset) {
2715                         flow->counter->hits = sb_data.counters.packets;
2716                         flow->counter->bytes = sb_data.counters.bytes;
2717                 }
2718                 return 0;
2719         }
2720         return rte_flow_error_set(error, EINVAL,
2721                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2722                                   NULL,
2723                                   "flow does not have counter");
2724 error_exit:
2725         return rte_flow_error_set
2726                         (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2727                          NULL, "netlink: failed to read flow rule counters");
2728 notsup_exit:
2729         return rte_flow_error_set
2730                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2731                          NULL, "counters are not available.");
2732 }
2733
2734 /**
2735  * Query a flow.
2736  *
2737  * @see rte_flow_query()
2738  * @see rte_flow_ops
2739  */
2740 static int
2741 flow_tcf_query(struct rte_eth_dev *dev,
2742                struct rte_flow *flow,
2743                const struct rte_flow_action *actions,
2744                void *data,
2745                struct rte_flow_error *error)
2746 {
2747         int ret = -EINVAL;
2748
2749         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2750                 switch (actions->type) {
2751                 case RTE_FLOW_ACTION_TYPE_VOID:
2752                         break;
2753                 case RTE_FLOW_ACTION_TYPE_COUNT:
2754                         ret = flow_tcf_query_count(dev, flow, data, error);
2755                         break;
2756                 default:
2757                         return rte_flow_error_set(error, ENOTSUP,
2758                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2759                                                   actions,
2760                                                   "action not supported");
2761                 }
2762         }
2763         return ret;
2764 }
2765
2766 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2767         .validate = flow_tcf_validate,
2768         .prepare = flow_tcf_prepare,
2769         .translate = flow_tcf_translate,
2770         .apply = flow_tcf_apply,
2771         .remove = flow_tcf_remove,
2772         .destroy = flow_tcf_destroy,
2773         .query = flow_tcf_query,
2774 };
2775
2776 /**
2777  * Create and configure a libmnl socket for Netlink flow rules.
2778  *
2779  * @return
2780  *   A valid libmnl socket object pointer on success, NULL otherwise and
2781  *   rte_errno is set.
2782  */
2783 static struct mnl_socket *
2784 flow_tcf_mnl_socket_create(void)
2785 {
2786         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2787
2788         if (nl) {
2789                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2790                                       sizeof(int));
2791                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2792                         return nl;
2793         }
2794         rte_errno = errno;
2795         if (nl)
2796                 mnl_socket_close(nl);
2797         return NULL;
2798 }
2799
2800 /**
2801  * Destroy a libmnl socket.
2802  *
2803  * @param nl
2804  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2805  */
2806 static void
2807 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2808 {
2809         if (nl)
2810                 mnl_socket_close(nl);
2811 }
2812
2813 /**
2814  * Initialize ingress qdisc of a given network interface.
2815  *
2816  * @param ctx
2817  *   Pointer to tc-flower context to use.
2818  * @param ifindex
2819  *   Index of network interface to initialize.
2820  * @param[out] error
2821  *   Perform verbose error reporting if not NULL.
2822  *
2823  * @return
2824  *   0 on success, a negative errno value otherwise and rte_errno is set.
2825  */
2826 int
2827 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2828                    unsigned int ifindex, struct rte_flow_error *error)
2829 {
2830         struct nlmsghdr *nlh;
2831         struct tcmsg *tcm;
2832         alignas(struct nlmsghdr)
2833         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2834
2835         /* Destroy existing ingress qdisc and everything attached to it. */
2836         nlh = mnl_nlmsg_put_header(buf);
2837         nlh->nlmsg_type = RTM_DELQDISC;
2838         nlh->nlmsg_flags = NLM_F_REQUEST;
2839         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2840         tcm->tcm_family = AF_UNSPEC;
2841         tcm->tcm_ifindex = ifindex;
2842         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2843         tcm->tcm_parent = TC_H_INGRESS;
2844         /* Ignore errors when qdisc is already absent. */
2845         if (flow_tcf_nl_ack(ctx, nlh) &&
2846             rte_errno != EINVAL && rte_errno != ENOENT)
2847                 return rte_flow_error_set(error, rte_errno,
2848                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2849                                           "netlink: failed to remove ingress"
2850                                           " qdisc");
2851         /* Create fresh ingress qdisc. */
2852         nlh = mnl_nlmsg_put_header(buf);
2853         nlh->nlmsg_type = RTM_NEWQDISC;
2854         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2855         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2856         tcm->tcm_family = AF_UNSPEC;
2857         tcm->tcm_ifindex = ifindex;
2858         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2859         tcm->tcm_parent = TC_H_INGRESS;
2860         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2861         if (flow_tcf_nl_ack(ctx, nlh))
2862                 return rte_flow_error_set(error, rte_errno,
2863                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2864                                           "netlink: failed to create ingress"
2865                                           " qdisc");
2866         return 0;
2867 }
2868
2869 /**
2870  * Create libmnl context for Netlink flow rules.
2871  *
2872  * @return
2873  *   A valid libmnl socket object pointer on success, NULL otherwise and
2874  *   rte_errno is set.
2875  */
2876 struct mlx5_flow_tcf_context *
2877 mlx5_flow_tcf_context_create(void)
2878 {
2879         struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2880                                                         sizeof(*ctx),
2881                                                         sizeof(uint32_t));
2882         if (!ctx)
2883                 goto error;
2884         ctx->nl = flow_tcf_mnl_socket_create();
2885         if (!ctx->nl)
2886                 goto error;
2887         ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2888         ctx->buf = rte_zmalloc(__func__,
2889                                ctx->buf_size, sizeof(uint32_t));
2890         if (!ctx->buf)
2891                 goto error;
2892         ctx->seq = random();
2893         return ctx;
2894 error:
2895         mlx5_flow_tcf_context_destroy(ctx);
2896         return NULL;
2897 }
2898
2899 /**
2900  * Destroy a libmnl context.
2901  *
2902  * @param ctx
2903  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2904  */
2905 void
2906 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
2907 {
2908         if (!ctx)
2909                 return;
2910         flow_tcf_mnl_socket_destroy(ctx->nl);
2911         rte_free(ctx->buf);
2912         rte_free(ctx);
2913 }