udp: fix csum computation when offload disabled
[vpp.git] / src / plugins / dpdk / device / flow.c
1 /*
2  * Copyright (c) 2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vnet/vnet.h>
17 #include <vppinfra/vec.h>
18 #include <vppinfra/format.h>
19 #include <assert.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/ethernet/arp_packet.h>
24 #include <vxlan/vxlan.h>
25 #include <dpdk/device/dpdk.h>
26 #include <dpdk/device/dpdk_priv.h>
27 #include <vppinfra/error.h>
28
29 #define FLOW_IS_ETHERNET_CLASS(f) \
30   (f->type == VNET_FLOW_TYPE_ETHERNET)
31
32 #define FLOW_IS_IPV4_CLASS(f)                                                 \
33   ((f->type == VNET_FLOW_TYPE_IP4) ||                                         \
34    (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) ||                                 \
35    (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) ||                          \
36    (f->type == VNET_FLOW_TYPE_IP4_VXLAN) ||                                   \
37    (f->type == VNET_FLOW_TYPE_IP4_GTPC) ||                                    \
38    (f->type == VNET_FLOW_TYPE_IP4_GTPU) ||                                    \
39    (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) ||                               \
40    (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) ||                               \
41    (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH) ||                                \
42    (f->type == VNET_FLOW_TYPE_IP4_IP4) ||                                     \
43    (f->type == VNET_FLOW_TYPE_IP4_IP6) ||                                     \
44    (f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) ||                             \
45    (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE))
46
47 #define FLOW_IS_IPV6_CLASS(f)                                                 \
48   ((f->type == VNET_FLOW_TYPE_IP6) ||                                         \
49    (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) ||                                 \
50    (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) ||                          \
51    (f->type == VNET_FLOW_TYPE_IP6_VXLAN) ||                                   \
52    (f->type == VNET_FLOW_TYPE_IP6_IP4) ||                                     \
53    (f->type == VNET_FLOW_TYPE_IP6_IP6) ||                                     \
54    (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) ||                             \
55    (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
56
57 /* check if flow is VLAN sensitive */
58 #define FLOW_HAS_VLAN_TAG(f) \
59   ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
60     (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED))
61
62 /* check if flow is L3 type */
63 #define FLOW_IS_L3_TYPE(f) \
64   ((f->type == VNET_FLOW_TYPE_IP4) || \
65     (f->type == VNET_FLOW_TYPE_IP6))
66
67 /* check if flow is L4 type */
68 #define FLOW_IS_L4_TYPE(f) \
69   ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
70     (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
71     (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
72     (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED))
73
74 /* check if flow is L4 tunnel type */
75 #define FLOW_IS_L4_TUNNEL_TYPE(f) \
76   ((f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
77     (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
78     (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
79     (f->type == VNET_FLOW_TYPE_IP4_GTPU))
80
81 /* check if flow has a inner TCP/UDP header */
82 #define FLOW_HAS_INNER_N_TUPLE(f)                                             \
83   ((f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) ||                             \
84    (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE) ||                             \
85    (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) ||                             \
86    (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
87
88 /* constant structs */
89 static const struct rte_flow_attr ingress = {.ingress = 1 };
90
91 static inline bool
92 mac_address_is_all_zero (const u8 addr[6])
93 {
94   int i = 0;
95
96   for (i = 0; i < 6; i++)
97     if (addr[i] != 0)
98       return false;
99
100   return true;
101 }
102
103 static inline void
104 dpdk_flow_convert_rss_types (u64 type, u64 * dpdk_rss_type)
105 {
106 #define BIT_IS_SET(v, b) \
107   ((v) & (u64)1<<(b))
108
109   *dpdk_rss_type = 0;
110
111 #undef _
112 #define _(n, f, s) \
113       if (n != -1 && BIT_IS_SET(type, n)) \
114         *dpdk_rss_type |= f;
115
116   foreach_dpdk_rss_hf
117 #undef _
118     return;
119 }
120
121 /** Maximum number of queue indices in struct rte_flow_action_rss. */
122 #define ACTION_RSS_QUEUE_NUM 128
123
124 static inline void
125 dpdk_flow_convert_rss_queues (u32 queue_index, u32 queue_num,
126                               struct rte_flow_action_rss *rss)
127 {
128   u16 *queues = clib_mem_alloc (sizeof (*queues) * ACTION_RSS_QUEUE_NUM);
129   int i;
130
131   for (i = 0; i < queue_num; i++)
132     queues[i] = queue_index++;
133
134   rss->queue_num = queue_num;
135   rss->queue = queues;
136
137   return;
138 }
139
140 static inline enum rte_eth_hash_function
141 dpdk_flow_convert_rss_func (vnet_rss_function_t func)
142 {
143   enum rte_eth_hash_function rss_func;
144
145   switch (func)
146     {
147     case VNET_RSS_FUNC_DEFAULT:
148       rss_func = RTE_ETH_HASH_FUNCTION_DEFAULT;
149       break;
150     case VNET_RSS_FUNC_TOEPLITZ:
151       rss_func = RTE_ETH_HASH_FUNCTION_TOEPLITZ;
152       break;
153     case VNET_RSS_FUNC_SIMPLE_XOR:
154       rss_func = RTE_ETH_HASH_FUNCTION_SIMPLE_XOR;
155       break;
156     case VNET_RSS_FUNC_SYMMETRIC_TOEPLITZ:
157       rss_func = RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
158       break;
159     default:
160       rss_func = RTE_ETH_HASH_FUNCTION_MAX;
161       break;
162     }
163
164   return rss_func;
165 }
166
167 static int
168 dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
169 {
170   struct rte_flow_item_eth eth[2] = { };
171   struct rte_flow_item_ipv4 ip4[2] = {}, in_ip4[2] = {};
172   struct rte_flow_item_ipv6 ip6[2] = {}, in_ip6[2] = {};
173   struct rte_flow_item_udp udp[2] = {}, in_UDP[2] = {};
174   struct rte_flow_item_tcp tcp[2] = {}, in_TCP[2] = {};
175   struct rte_flow_item_gtp gtp[2] = { };
176   struct rte_flow_item_l2tpv3oip l2tp[2] = { };
177   struct rte_flow_item_esp esp[2] = { };
178   struct rte_flow_item_ah ah[2] = { };
179   struct rte_flow_item_raw generic[2] = {};
180   struct rte_flow_action_mark mark = { 0 };
181   struct rte_flow_action_queue queue = { 0 };
182   struct rte_flow_action_rss rss = { 0 };
183   struct rte_flow_item *item, *items = 0;
184   struct rte_flow_action *action, *actions = 0;
185   bool fate = false;
186
187   enum
188   {
189     vxlan_hdr_sz = sizeof (vxlan_header_t),
190     raw_sz = sizeof (struct rte_flow_item_raw)
191   };
192
193   union
194   {
195     struct rte_flow_item_raw item;
196     u8 val[raw_sz + vxlan_hdr_sz];
197   } raw[2];
198
199   u16 src_port = 0, dst_port = 0, src_port_mask = 0, dst_port_mask = 0;
200   u8 protocol = IP_PROTOCOL_RESERVED;
201   int rv = 0;
202
203   /* Handle generic flow first */
204   if (f->type == VNET_FLOW_TYPE_GENERIC)
205     {
206       generic[0].pattern = f->generic.pattern.spec;
207       generic[1].pattern = f->generic.pattern.mask;
208
209       vec_add2 (items, item, 1);
210       item->type = RTE_FLOW_ITEM_TYPE_RAW;
211       item->spec = generic;
212       item->mask = generic + 1;
213
214       goto pattern_end;
215     }
216
217   enum
218   {
219     FLOW_UNKNOWN_CLASS,
220     FLOW_ETHERNET_CLASS,
221     FLOW_IPV4_CLASS,
222     FLOW_IPV6_CLASS,
223   } flow_class = FLOW_UNKNOWN_CLASS;
224
225   if (FLOW_IS_ETHERNET_CLASS (f))
226     flow_class = FLOW_ETHERNET_CLASS;
227   else if (FLOW_IS_IPV4_CLASS (f))
228     flow_class = FLOW_IPV4_CLASS;
229   else if (FLOW_IS_IPV6_CLASS (f))
230     flow_class = FLOW_IPV6_CLASS;
231   else
232     return VNET_FLOW_ERROR_NOT_SUPPORTED;
233
234   if (f->actions & (~xd->supported_flow_actions))
235     return VNET_FLOW_ERROR_NOT_SUPPORTED;
236
237   /* Match items */
238   /* Layer 2, Ethernet */
239   vec_add2 (items, item, 1);
240   item->type = RTE_FLOW_ITEM_TYPE_ETH;
241
242   if (flow_class == FLOW_ETHERNET_CLASS)
243     {
244       vnet_flow_ethernet_t *te = &f->ethernet;
245
246       clib_memset (&eth[0], 0, sizeof (eth[0]));
247       clib_memset (&eth[1], 0, sizeof (eth[1]));
248
249       /* check if SMAC/DMAC/Ether_type assigned */
250       if (!mac_address_is_all_zero (te->eth_hdr.dst_address))
251         {
252           clib_memcpy_fast (&eth[0].dst, &te->eth_hdr.dst_address,
253                             sizeof (eth[0].dst));
254           clib_memset (&eth[1].dst, 0xFF, sizeof (eth[1].dst));
255         }
256
257       if (!mac_address_is_all_zero (te->eth_hdr.src_address))
258         {
259           clib_memcpy_fast (&eth[0].src, &te->eth_hdr.src_address,
260                             sizeof (eth[0].src));
261           clib_memset (&eth[1].src, 0xFF, sizeof (eth[1].src));
262         }
263
264       if (te->eth_hdr.type)
265         {
266           eth[0].type = clib_host_to_net_u16 (te->eth_hdr.type);
267           eth[1].type = clib_host_to_net_u16 (0xFFFF);
268         }
269
270       item->spec = eth;
271       item->mask = eth + 1;
272     }
273   else
274     {
275       item->spec = NULL;
276       item->mask = NULL;
277     }
278
279   /* currently only single empty vlan tag is supported */
280   if (FLOW_HAS_VLAN_TAG (f))
281     {
282       vec_add2 (items, item, 1);
283       item->type = RTE_FLOW_ITEM_TYPE_VLAN;
284       item->spec = NULL;
285       item->mask = NULL;
286     }
287
288   if (FLOW_IS_ETHERNET_CLASS (f))
289     goto pattern_end;
290
291   /* Layer 3, IP */
292   vec_add2 (items, item, 1);
293   if (flow_class == FLOW_IPV4_CLASS)
294     {
295       vnet_flow_ip4_t *ip4_ptr = &f->ip4;
296
297       item->type = RTE_FLOW_ITEM_TYPE_IPV4;
298       if ((!ip4_ptr->src_addr.mask.as_u32) &&
299           (!ip4_ptr->dst_addr.mask.as_u32) && (!ip4_ptr->protocol.mask))
300         {
301           item->spec = NULL;
302           item->mask = NULL;
303         }
304       else
305         {
306           ip4[0].hdr.src_addr = ip4_ptr->src_addr.addr.as_u32;
307           ip4[1].hdr.src_addr = ip4_ptr->src_addr.mask.as_u32;
308           ip4[0].hdr.dst_addr = ip4_ptr->dst_addr.addr.as_u32;
309           ip4[1].hdr.dst_addr = ip4_ptr->dst_addr.mask.as_u32;
310           ip4[0].hdr.next_proto_id = ip4_ptr->protocol.prot;
311           ip4[1].hdr.next_proto_id = ip4_ptr->protocol.mask;
312
313           item->spec = ip4;
314           item->mask = ip4 + 1;
315         }
316
317       if (FLOW_IS_L4_TYPE (f) || FLOW_IS_L4_TUNNEL_TYPE (f))
318         {
319           vnet_flow_ip4_n_tuple_t *ip4_n_ptr = &f->ip4_n_tuple;
320
321           src_port = ip4_n_ptr->src_port.port;
322           dst_port = ip4_n_ptr->dst_port.port;
323           src_port_mask = ip4_n_ptr->src_port.mask;
324           dst_port_mask = ip4_n_ptr->dst_port.mask;
325         }
326
327       protocol = ip4_ptr->protocol.prot;
328     }
329   else if (flow_class == FLOW_IPV6_CLASS)
330     {
331       vnet_flow_ip6_t *ip6_ptr = &f->ip6;
332
333       item->type = RTE_FLOW_ITEM_TYPE_IPV6;
334
335       if ((ip6_ptr->src_addr.mask.as_u64[0] == 0) &&
336           (ip6_ptr->src_addr.mask.as_u64[1] == 0) &&
337           (ip6_ptr->dst_addr.mask.as_u64[0] == 0) &&
338           (ip6_ptr->dst_addr.mask.as_u64[1] == 0) && (!ip6_ptr->protocol.mask))
339         {
340           item->spec = NULL;
341           item->mask = NULL;
342         }
343       else
344         {
345           clib_memcpy (ip6[0].hdr.src_addr, &ip6_ptr->src_addr.addr,
346                        ARRAY_LEN (ip6_ptr->src_addr.addr.as_u8));
347           clib_memcpy (ip6[1].hdr.src_addr, &ip6_ptr->src_addr.mask,
348                        ARRAY_LEN (ip6_ptr->src_addr.mask.as_u8));
349           clib_memcpy (ip6[0].hdr.dst_addr, &ip6_ptr->dst_addr.addr,
350                        ARRAY_LEN (ip6_ptr->dst_addr.addr.as_u8));
351           clib_memcpy (ip6[1].hdr.dst_addr, &ip6_ptr->dst_addr.mask,
352                        ARRAY_LEN (ip6_ptr->dst_addr.mask.as_u8));
353           ip6[0].hdr.proto = ip6_ptr->protocol.prot;
354           ip6[1].hdr.proto = ip6_ptr->protocol.mask;
355
356           item->spec = ip6;
357           item->mask = ip6 + 1;
358         }
359
360       if (FLOW_IS_L4_TYPE (f) || FLOW_IS_L4_TUNNEL_TYPE (f))
361         {
362           vnet_flow_ip6_n_tuple_t *ip6_n_ptr = &f->ip6_n_tuple;
363
364           src_port = ip6_n_ptr->src_port.port;
365           dst_port = ip6_n_ptr->dst_port.port;
366           src_port_mask = ip6_n_ptr->src_port.mask;
367           dst_port_mask = ip6_n_ptr->dst_port.mask;
368         }
369
370       protocol = ip6_ptr->protocol.prot;
371     }
372
373   if (FLOW_IS_L3_TYPE (f))
374     goto pattern_end;
375
376   /* Layer 3, IP */
377   vec_add2 (items, item, 1);
378   switch (protocol)
379     {
380     case IP_PROTOCOL_L2TP:
381       item->type = RTE_FLOW_ITEM_TYPE_L2TPV3OIP;
382       l2tp[0].session_id = clib_host_to_net_u32 (f->ip4_l2tpv3oip.session_id);
383       l2tp[1].session_id = ~0;
384
385       item->spec = l2tp;
386       item->mask = l2tp + 1;
387       break;
388
389     case IP_PROTOCOL_IPSEC_ESP:
390       item->type = RTE_FLOW_ITEM_TYPE_ESP;
391       esp[0].hdr.spi = clib_host_to_net_u32 (f->ip4_ipsec_esp.spi);
392       esp[1].hdr.spi = ~0;
393
394       item->spec = esp;
395       item->mask = esp + 1;
396       break;
397
398     case IP_PROTOCOL_IPSEC_AH:
399       item->type = RTE_FLOW_ITEM_TYPE_AH;
400       ah[0].spi = clib_host_to_net_u32 (f->ip4_ipsec_ah.spi);
401       ah[1].spi = ~0;
402
403       item->spec = ah;
404       item->mask = ah + 1;
405       break;
406     case IP_PROTOCOL_TCP:
407       item->type = RTE_FLOW_ITEM_TYPE_TCP;
408       if ((src_port_mask == 0) && (dst_port_mask == 0))
409         {
410           item->spec = NULL;
411           item->mask = NULL;
412         }
413       else
414         {
415           tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
416           tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
417           tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
418           tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
419           item->spec = tcp;
420           item->mask = tcp + 1;
421         }
422       break;
423
424     case IP_PROTOCOL_UDP:
425       item->type = RTE_FLOW_ITEM_TYPE_UDP;
426       if ((src_port_mask == 0) && (dst_port_mask == 0))
427         {
428           item->spec = NULL;
429           item->mask = NULL;
430         }
431       else
432         {
433           udp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
434           udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
435           udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
436           udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
437           item->spec = udp;
438           item->mask = udp + 1;
439         }
440
441       /* handle the UDP tunnels */
442       if (f->type == VNET_FLOW_TYPE_IP4_GTPC)
443         {
444           gtp[0].teid = clib_host_to_net_u32 (f->ip4_gtpc.teid);
445           gtp[1].teid = ~0;
446
447           vec_add2 (items, item, 1);
448           item->type = RTE_FLOW_ITEM_TYPE_GTPC;
449           item->spec = gtp;
450           item->mask = gtp + 1;
451         }
452       else if (f->type == VNET_FLOW_TYPE_IP4_GTPU)
453         {
454           gtp[0].teid = clib_host_to_net_u32 (f->ip4_gtpu.teid);
455           gtp[1].teid = ~0;
456
457           vec_add2 (items, item, 1);
458           item->type = RTE_FLOW_ITEM_TYPE_GTPU;
459           item->spec = gtp;
460           item->mask = gtp + 1;
461         }
462       else if (f->type == VNET_FLOW_TYPE_IP4_VXLAN)
463         {
464           u32 vni = f->ip4_vxlan.vni;
465
466           vxlan_header_t spec_hdr = {
467             .flags = VXLAN_FLAGS_I,
468             .vni_reserved = clib_host_to_net_u32 (vni << 8)
469           };
470           vxlan_header_t mask_hdr = {
471             .flags = 0xff,
472             .vni_reserved = clib_host_to_net_u32 (((u32) - 1) << 8)
473           };
474
475           clib_memset (raw, 0, sizeof raw);
476           raw[0].item.relative = 1;
477           raw[0].item.length = vxlan_hdr_sz;
478
479           clib_memcpy_fast (raw[0].val + raw_sz, &spec_hdr, vxlan_hdr_sz);
480           raw[0].item.pattern = raw[0].val + raw_sz;
481           clib_memcpy_fast (raw[1].val + raw_sz, &mask_hdr, vxlan_hdr_sz);
482           raw[1].item.pattern = raw[1].val + raw_sz;
483
484           vec_add2 (items, item, 1);
485           item->type = RTE_FLOW_ITEM_TYPE_RAW;
486           item->spec = raw;
487           item->mask = raw + 1;
488         }
489       break;
490     case IP_PROTOCOL_IPV6:
491       item->type = RTE_FLOW_ITEM_TYPE_IPV6;
492
493 #define fill_inner_ip6_with_outer_ipv(OUTER_IP_VER)                           \
494   if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6 ||                     \
495       f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6_N_TUPLE)               \
496     {                                                                         \
497       vnet_flow_ip##OUTER_IP_VER##_ip6_t *ptr = &f->ip##OUTER_IP_VER##_ip6;   \
498       if ((ptr->in_src_addr.mask.as_u64[0] == 0) &&                           \
499           (ptr->in_src_addr.mask.as_u64[1] == 0) &&                           \
500           (ptr->in_dst_addr.mask.as_u64[0] == 0) &&                           \
501           (ptr->in_dst_addr.mask.as_u64[1] == 0) && (!ptr->in_protocol.mask)) \
502         {                                                                     \
503           item->spec = NULL;                                                  \
504           item->mask = NULL;                                                  \
505         }                                                                     \
506       else                                                                    \
507         {                                                                     \
508           clib_memcpy (in_ip6[0].hdr.src_addr, &ptr->in_src_addr.addr,        \
509                        ARRAY_LEN (ptr->in_src_addr.addr.as_u8));              \
510           clib_memcpy (in_ip6[1].hdr.src_addr, &ptr->in_src_addr.mask,        \
511                        ARRAY_LEN (ptr->in_src_addr.mask.as_u8));              \
512           clib_memcpy (in_ip6[0].hdr.dst_addr, &ptr->in_dst_addr.addr,        \
513                        ARRAY_LEN (ptr->in_dst_addr.addr.as_u8));              \
514           clib_memcpy (in_ip6[1].hdr.dst_addr, &ptr->in_dst_addr.mask,        \
515                        ARRAY_LEN (ptr->in_dst_addr.mask.as_u8));              \
516           item->spec = in_ip6;                                                \
517           item->mask = in_ip6 + 1;                                            \
518         }                                                                     \
519     }
520       fill_inner_ip6_with_outer_ipv (6) fill_inner_ip6_with_outer_ipv (4)
521 #undef fill_inner_ip6_with_outer_ipv
522         break;
523     case IP_PROTOCOL_IP_IN_IP:
524       item->type = RTE_FLOW_ITEM_TYPE_IPV4;
525
526 #define fill_inner_ip4_with_outer_ipv(OUTER_IP_VER)                           \
527   if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4 ||                     \
528       f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4_N_TUPLE)               \
529     {                                                                         \
530       vnet_flow_ip##OUTER_IP_VER##_ip4_t *ptr = &f->ip##OUTER_IP_VER##_ip4;   \
531       if ((!ptr->in_src_addr.mask.as_u32) &&                                  \
532           (!ptr->in_dst_addr.mask.as_u32) && (!ptr->in_protocol.mask))        \
533         {                                                                     \
534           item->spec = NULL;                                                  \
535           item->mask = NULL;                                                  \
536         }                                                                     \
537       else                                                                    \
538         {                                                                     \
539           in_ip4[0].hdr.src_addr = ptr->in_src_addr.addr.as_u32;              \
540           in_ip4[1].hdr.src_addr = ptr->in_src_addr.mask.as_u32;              \
541           in_ip4[0].hdr.dst_addr = ptr->in_dst_addr.addr.as_u32;              \
542           in_ip4[1].hdr.dst_addr = ptr->in_dst_addr.mask.as_u32;              \
543           item->spec = in_ip4;                                                \
544           item->mask = in_ip4 + 1;                                            \
545         }                                                                     \
546     }
547       fill_inner_ip4_with_outer_ipv (6) fill_inner_ip4_with_outer_ipv (4)
548 #undef fill_inner_ip4_with_outer_ipv
549         break;
550     default:
551       rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
552       goto done;
553     }
554
555   if (FLOW_HAS_INNER_N_TUPLE (f))
556     {
557       vec_add2 (items, item, 1);
558
559 #define fill_inner_n_tuple_of(proto)                                          \
560   item->type = RTE_FLOW_ITEM_TYPE_##proto;                                    \
561   if ((ptr->in_src_port.mask == 0) && (ptr->in_dst_port.mask == 0))           \
562     {                                                                         \
563       item->spec = NULL;                                                      \
564       item->mask = NULL;                                                      \
565     }                                                                         \
566   else                                                                        \
567     {                                                                         \
568       in_##proto[0].hdr.src_port =                                            \
569         clib_host_to_net_u16 (ptr->in_src_port.port);                         \
570       in_##proto[1].hdr.src_port =                                            \
571         clib_host_to_net_u16 (ptr->in_src_port.mask);                         \
572       in_##proto[0].hdr.dst_port =                                            \
573         clib_host_to_net_u16 (ptr->in_dst_port.port);                         \
574       in_##proto[1].hdr.dst_port =                                            \
575         clib_host_to_net_u16 (ptr->in_dst_port.mask);                         \
576       item->spec = in_##proto;                                                \
577       item->mask = in_##proto + 1;                                            \
578     }
579
580 #define fill_inner_n_tuple(OUTER_IP_VER, INNER_IP_VER)                        \
581   if (f->type ==                                                              \
582       VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP##INNER_IP_VER##_N_TUPLE)           \
583     {                                                                         \
584       vnet_flow_ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple_t *ptr =        \
585         &f->ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple;                    \
586       switch (ptr->in_protocol.prot)                                          \
587         {                                                                     \
588         case IP_PROTOCOL_UDP:                                                 \
589           fill_inner_n_tuple_of (UDP) break;                                  \
590         case IP_PROTOCOL_TCP:                                                 \
591           fill_inner_n_tuple_of (TCP) break;                                  \
592         default:                                                              \
593           break;                                                              \
594         }                                                                     \
595     }
596       fill_inner_n_tuple (6, 4) fill_inner_n_tuple (4, 4)
597         fill_inner_n_tuple (6, 6) fill_inner_n_tuple (4, 6)
598 #undef fill_inner_n_tuple
599 #undef fill_inner_n_tuple_of
600     }
601
602 pattern_end:
603   if ((f->actions & VNET_FLOW_ACTION_RSS) &&
604       (f->rss_types & (1ULL << VNET_FLOW_RSS_TYPES_ESP)))
605     {
606
607       vec_add2 (items, item, 1);
608       item->type = RTE_FLOW_ITEM_TYPE_ESP;
609     }
610
611   vec_add2 (items, item, 1);
612   item->type = RTE_FLOW_ITEM_TYPE_END;
613
614   /* Actions */
615   /* Only one 'fate' can be assigned */
616   if (f->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
617     {
618       vec_add2 (actions, action, 1);
619       queue.index = f->redirect_queue;
620       action->type = RTE_FLOW_ACTION_TYPE_QUEUE;
621       action->conf = &queue;
622       fate = true;
623     }
624
625   if (f->actions & VNET_FLOW_ACTION_DROP)
626     {
627       vec_add2 (actions, action, 1);
628       action->type = RTE_FLOW_ACTION_TYPE_DROP;
629       if (fate == true)
630         {
631           rv = VNET_FLOW_ERROR_INTERNAL;
632           goto done;
633         }
634       else
635         fate = true;
636     }
637
638   if (f->actions & VNET_FLOW_ACTION_RSS)
639     {
640       u64 rss_type = 0;
641
642       vec_add2 (actions, action, 1);
643       action->type = RTE_FLOW_ACTION_TYPE_RSS;
644       action->conf = &rss;
645
646       /* convert types to DPDK rss bitmask */
647       dpdk_flow_convert_rss_types (f->rss_types, &rss_type);
648
649       if (f->queue_num)
650         /* convert rss queues to array */
651         dpdk_flow_convert_rss_queues (f->queue_index, f->queue_num, &rss);
652
653       rss.types = rss_type;
654       if ((rss.func = dpdk_flow_convert_rss_func (f->rss_fun)) ==
655           RTE_ETH_HASH_FUNCTION_MAX)
656         {
657           rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
658           goto done;
659         }
660
661       if (fate == true)
662         {
663           rv = VNET_FLOW_ERROR_INTERNAL;
664           goto done;
665         }
666       else
667         fate = true;
668     }
669
670   if (fate == false)
671     {
672       vec_add2 (actions, action, 1);
673       action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU;
674     }
675
676   if (f->actions & VNET_FLOW_ACTION_MARK)
677     {
678       vec_add2 (actions, action, 1);
679       mark.id = fe->mark;
680       action->type = RTE_FLOW_ACTION_TYPE_MARK;
681       action->conf = &mark;
682     }
683
684   vec_add2 (actions, action, 1);
685   action->type = RTE_FLOW_ACTION_TYPE_END;
686
687   rv = rte_flow_validate (xd->device_index, &ingress, items, actions,
688                           &xd->last_flow_error);
689
690   if (rv)
691     {
692       if (rv == -EINVAL)
693         rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
694       else if (rv == -EEXIST)
695         rv = VNET_FLOW_ERROR_ALREADY_EXISTS;
696       else
697         rv = VNET_FLOW_ERROR_INTERNAL;
698
699       goto done;
700     }
701
702   fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions,
703                                 &xd->last_flow_error);
704
705   if (!fe->handle)
706     rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
707
708 done:
709   vec_free (items);
710   vec_free (actions);
711   return rv;
712 }
713
714 int
715 dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
716                   u32 flow_index, uword * private_data)
717 {
718   vlib_main_t *vm = vlib_get_main ();
719   dpdk_main_t *dm = &dpdk_main;
720   vnet_flow_t *flow = vnet_get_flow (flow_index);
721   dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
722   dpdk_flow_entry_t *fe;
723   dpdk_flow_lookup_entry_t *fle = 0;
724   int rv;
725
726   /* recycle old flow lookup entries only after the main loop counter
727      increases - i.e. previously DMA'ed packets were handled */
728   if (vec_len (xd->parked_lookup_indexes) > 0 &&
729       xd->parked_loop_count != vm->main_loop_count)
730     {
731       u32 *fl_index;
732
733       vec_foreach (fl_index, xd->parked_lookup_indexes)
734         pool_put_index (xd->flow_lookup_entries, *fl_index);
735       vec_reset_length (xd->parked_lookup_indexes);
736     }
737
738   if (op == VNET_FLOW_DEV_OP_DEL_FLOW)
739     {
740       fe = vec_elt_at_index (xd->flow_entries, *private_data);
741
742       if ((rv = rte_flow_destroy (xd->device_index, fe->handle,
743                                   &xd->last_flow_error)))
744         return VNET_FLOW_ERROR_INTERNAL;
745
746       if (fe->mark)
747         {
748           /* make sure no action is taken for in-flight (marked) packets */
749           fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
750           clib_memset (fle, -1, sizeof (*fle));
751           vec_add1 (xd->parked_lookup_indexes, fe->mark);
752           xd->parked_loop_count = vm->main_loop_count;
753         }
754
755       clib_memset (fe, 0, sizeof (*fe));
756       pool_put (xd->flow_entries, fe);
757
758       goto disable_rx_offload;
759     }
760
761   if (op != VNET_FLOW_DEV_OP_ADD_FLOW)
762     return VNET_FLOW_ERROR_NOT_SUPPORTED;
763
764   pool_get (xd->flow_entries, fe);
765   fe->flow_index = flow->index;
766
767   if (flow->actions == 0)
768     {
769       rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
770       goto done;
771     }
772
773   /* if we need to mark packets, assign one mark */
774   if (flow->actions & (VNET_FLOW_ACTION_MARK |
775                        VNET_FLOW_ACTION_REDIRECT_TO_NODE |
776                        VNET_FLOW_ACTION_BUFFER_ADVANCE))
777     {
778       /* reserve slot 0 */
779       if (xd->flow_lookup_entries == 0)
780         pool_get_aligned (xd->flow_lookup_entries, fle,
781                           CLIB_CACHE_LINE_BYTES);
782       pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES);
783       fe->mark = fle - xd->flow_lookup_entries;
784
785       /* install entry in the lookup table */
786       clib_memset (fle, -1, sizeof (*fle));
787       if (flow->actions & VNET_FLOW_ACTION_MARK)
788         fle->flow_id = flow->mark_flow_id;
789       if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE)
790         fle->next_index = flow->redirect_device_input_next_index;
791       if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE)
792         fle->buffer_advance = flow->buffer_advance;
793     }
794   else
795     fe->mark = 0;
796
797   if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) == 0)
798     {
799       xd->flags |= DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
800       dpdk_device_setup (xd);
801     }
802
803   switch (flow->type)
804     {
805     case VNET_FLOW_TYPE_ETHERNET:
806     case VNET_FLOW_TYPE_IP4:
807     case VNET_FLOW_TYPE_IP6:
808     case VNET_FLOW_TYPE_IP4_N_TUPLE:
809     case VNET_FLOW_TYPE_IP6_N_TUPLE:
810     case VNET_FLOW_TYPE_IP4_VXLAN:
811     case VNET_FLOW_TYPE_IP4_GTPC:
812     case VNET_FLOW_TYPE_IP4_GTPU:
813     case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
814     case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
815     case VNET_FLOW_TYPE_IP4_IPSEC_AH:
816     case VNET_FLOW_TYPE_IP4_IP4:
817     case VNET_FLOW_TYPE_IP4_IP4_N_TUPLE:
818     case VNET_FLOW_TYPE_IP4_IP6:
819     case VNET_FLOW_TYPE_IP4_IP6_N_TUPLE:
820     case VNET_FLOW_TYPE_IP6_IP4:
821     case VNET_FLOW_TYPE_IP6_IP4_N_TUPLE:
822     case VNET_FLOW_TYPE_IP6_IP6:
823     case VNET_FLOW_TYPE_IP6_IP6_N_TUPLE:
824     case VNET_FLOW_TYPE_GENERIC:
825       if ((rv = dpdk_flow_add (xd, flow, fe)))
826         goto done;
827       break;
828     default:
829       rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
830       goto done;
831     }
832
833   *private_data = fe - xd->flow_entries;
834
835 done:
836   if (rv)
837     {
838       clib_memset (fe, 0, sizeof (*fe));
839       pool_put (xd->flow_entries, fe);
840       if (fle)
841         {
842           clib_memset (fle, -1, sizeof (*fle));
843           pool_put (xd->flow_lookup_entries, fle);
844         }
845     }
846 disable_rx_offload:
847   if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0
848       && pool_elts (xd->flow_entries) == 0)
849     {
850       xd->flags &= ~DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
851       dpdk_device_setup (xd);
852     }
853
854   return rv;
855 }
856
857 u8 *
858 format_dpdk_flow (u8 * s, va_list * args)
859 {
860   u32 dev_instance = va_arg (*args, u32);
861   u32 flow_index = va_arg (*args, u32);
862   uword private_data = va_arg (*args, uword);
863   dpdk_main_t *dm = &dpdk_main;
864   dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
865   dpdk_flow_entry_t *fe;
866
867   if (flow_index == ~0)
868     {
869       s = format (s, "%-25s: %U\n", "supported flow actions",
870                   format_flow_actions, xd->supported_flow_actions);
871       s = format (s, "%-25s: %d\n", "last DPDK error type",
872                   xd->last_flow_error.type);
873       s = format (s, "%-25s: %s\n", "last DPDK error message",
874                   xd->last_flow_error.message ? xd->last_flow_error.message :
875                   "n/a");
876       return s;
877     }
878
879   if (private_data >= vec_len (xd->flow_entries))
880     return format (s, "unknown flow");
881
882   fe = vec_elt_at_index (xd->flow_entries, private_data);
883   s = format (s, "mark %u", fe->mark);
884   return s;
885 }
886
887 /*
888  * fd.io coding-style-patch-verification: ON
889  *
890  * Local Variables:
891  * eval: (c-set-style "gnu")
892  * End:
893  */