2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/vnet.h>
17 #include <vppinfra/vec.h>
18 #include <vppinfra/format.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/ethernet/arp_packet.h>
24 #include <vnet/vxlan/vxlan.h>
25 #include <dpdk/device/dpdk.h>
26 #include <dpdk/device/dpdk_priv.h>
27 #include <vppinfra/error.h>
29 #define FLOW_IS_ETHERNET_CLASS(f) \
30 (f->type == VNET_FLOW_TYPE_ETHERNET)
32 #define FLOW_IS_IPV4_CLASS(f) \
33 ((f->type == VNET_FLOW_TYPE_IP4) || \
34 (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
35 (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
36 (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
37 (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
38 (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
39 (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
40 (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
41 (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH))
43 #define FLOW_IS_IPV6_CLASS(f) \
44 ((f->type == VNET_FLOW_TYPE_IP6) || \
45 (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
46 (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
47 (f->type == VNET_FLOW_TYPE_IP6_VXLAN))
49 /* check if flow is VLAN sensitive */
50 #define FLOW_HAS_VLAN_TAG(f) \
51 ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
52 (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED))
54 /* check if flow is L3 type */
55 #define FLOW_IS_L3_TYPE(f) \
56 ((f->type == VNET_FLOW_TYPE_IP4) || \
57 (f->type == VNET_FLOW_TYPE_IP6))
59 /* check if flow is L4 type */
60 #define FLOW_IS_L4_TYPE(f) \
61 ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
62 (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
63 (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
64 (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED))
66 /* check if flow is L4 tunnel type */
67 #define FLOW_IS_L4_TUNNEL_TYPE(f) \
68 ((f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
69 (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
70 (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
71 (f->type == VNET_FLOW_TYPE_IP4_GTPU))
73 /* constant structs */
74 static const struct rte_flow_attr ingress = {.ingress = 1 };
77 mac_address_is_all_zero (const u8 addr[6])
81 for (i = 0; i < 6; i++)
89 dpdk_flow_convert_rss_types (u64 type, u64 * dpdk_rss_type)
91 #define BIT_IS_SET(v, b) \
98 if (n != -1 && BIT_IS_SET(type, n)) \
106 static inline enum rte_eth_hash_function
107 dpdk_flow_convert_rss_func (vnet_rss_function_t func)
109 enum rte_eth_hash_function rss_func;
113 case VNET_RSS_FUNC_DEFAULT:
114 rss_func = RTE_ETH_HASH_FUNCTION_DEFAULT;
116 case VNET_RSS_FUNC_TOEPLITZ:
117 rss_func = RTE_ETH_HASH_FUNCTION_TOEPLITZ;
119 case VNET_RSS_FUNC_SIMPLE_XOR:
120 rss_func = RTE_ETH_HASH_FUNCTION_SIMPLE_XOR;
122 case VNET_RSS_FUNC_SYMMETRIC_TOEPLITZ:
123 rss_func = RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
126 rss_func = RTE_ETH_HASH_FUNCTION_MAX;
134 dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
136 struct rte_flow_item_eth eth[2] = { };
137 struct rte_flow_item_ipv4 ip4[2] = { };
138 struct rte_flow_item_ipv6 ip6[2] = { };
139 struct rte_flow_item_udp udp[2] = { };
140 struct rte_flow_item_tcp tcp[2] = { };
141 struct rte_flow_item_gtp gtp[2] = { };
142 struct rte_flow_item_l2tpv3oip l2tp[2] = { };
143 struct rte_flow_item_esp esp[2] = { };
144 struct rte_flow_item_ah ah[2] = { };
145 struct rte_flow_action_mark mark = { 0 };
146 struct rte_flow_action_queue queue = { 0 };
147 struct rte_flow_action_rss rss = { 0 };
148 struct rte_flow_item *item, *items = 0;
149 struct rte_flow_action *action, *actions = 0;
154 vxlan_hdr_sz = sizeof (vxlan_header_t),
155 raw_sz = sizeof (struct rte_flow_item_raw)
160 struct rte_flow_item_raw item;
161 u8 val[raw_sz + vxlan_hdr_sz];
164 u16 src_port = 0, dst_port = 0, src_port_mask = 0, dst_port_mask = 0;
165 u8 protocol = IP_PROTOCOL_RESERVED;
174 } flow_class = FLOW_UNKNOWN_CLASS;
176 if (FLOW_IS_ETHERNET_CLASS (f))
177 flow_class = FLOW_ETHERNET_CLASS;
178 else if (FLOW_IS_IPV4_CLASS (f))
179 flow_class = FLOW_IPV4_CLASS;
180 else if (FLOW_IS_IPV6_CLASS (f))
181 flow_class = FLOW_IPV6_CLASS;
183 return VNET_FLOW_ERROR_NOT_SUPPORTED;
185 if (f->actions & (~xd->supported_flow_actions))
186 return VNET_FLOW_ERROR_NOT_SUPPORTED;
189 /* Layer 2, Ethernet */
190 vec_add2 (items, item, 1);
191 item->type = RTE_FLOW_ITEM_TYPE_ETH;
193 if (flow_class == FLOW_ETHERNET_CLASS)
195 vnet_flow_ethernet_t *te = &f->ethernet;
197 clib_memset (ð[0], 0, sizeof (eth[0]));
198 clib_memset (ð[1], 0, sizeof (eth[1]));
200 /* check if SMAC/DMAC/Ether_type assigned */
201 if (!mac_address_is_all_zero (te->eth_hdr.dst_address))
203 clib_memcpy_fast (ð[0].dst, &te->eth_hdr.dst_address,
204 sizeof (eth[0].dst));
205 clib_memset (ð[1].dst, 0xFF, sizeof (eth[1].dst));
208 if (!mac_address_is_all_zero (te->eth_hdr.src_address))
210 clib_memcpy_fast (ð[0].src, &te->eth_hdr.src_address,
211 sizeof (eth[0].src));
212 clib_memset (ð[1].src, 0xFF, sizeof (eth[1].src));
215 if (te->eth_hdr.type)
217 eth[0].type = clib_host_to_net_u16 (te->eth_hdr.type);
218 eth[1].type = clib_host_to_net_u16 (0xFFFF);
222 item->mask = eth + 1;
230 /* currently only single empty vlan tag is supported */
231 if (FLOW_HAS_VLAN_TAG (f))
233 vec_add2 (items, item, 1);
234 item->type = RTE_FLOW_ITEM_TYPE_VLAN;
239 if (FLOW_IS_ETHERNET_CLASS (f))
243 vec_add2 (items, item, 1);
244 if (flow_class == FLOW_IPV4_CLASS)
246 vnet_flow_ip4_t *ip4_ptr = &f->ip4;
248 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
249 if ((!ip4_ptr->src_addr.mask.as_u32) &&
250 (!ip4_ptr->dst_addr.mask.as_u32) && (!ip4_ptr->protocol.mask))
257 ip4[0].hdr.src_addr = ip4_ptr->src_addr.addr.as_u32;
258 ip4[1].hdr.src_addr = ip4_ptr->src_addr.mask.as_u32;
259 ip4[0].hdr.dst_addr = ip4_ptr->dst_addr.addr.as_u32;
260 ip4[1].hdr.dst_addr = ip4_ptr->dst_addr.mask.as_u32;
261 ip4[0].hdr.next_proto_id = ip4_ptr->protocol.prot;
262 ip4[1].hdr.next_proto_id = ip4_ptr->protocol.mask;
265 item->mask = ip4 + 1;
268 if (FLOW_IS_L4_TYPE (f) || FLOW_IS_L4_TUNNEL_TYPE (f))
270 vnet_flow_ip4_n_tuple_t *ip4_n_ptr = &f->ip4_n_tuple;
272 src_port = ip4_n_ptr->src_port.port;
273 dst_port = ip4_n_ptr->dst_port.port;
274 src_port_mask = ip4_n_ptr->src_port.mask;
275 dst_port_mask = ip4_n_ptr->dst_port.mask;
278 protocol = ip4_ptr->protocol.prot;
280 else if (flow_class == FLOW_IPV6_CLASS)
282 vnet_flow_ip6_t *ip6_ptr = &f->ip6;
284 item->type = RTE_FLOW_ITEM_TYPE_IPV6;
286 if ((ip6_ptr->src_addr.mask.as_u64[0] == 0) &&
287 (ip6_ptr->src_addr.mask.as_u64[1] == 0) &&
288 (!ip6_ptr->protocol.mask))
295 clib_memcpy (ip6[0].hdr.src_addr, &ip6_ptr->src_addr.addr,
296 ARRAY_LEN (ip6_ptr->src_addr.addr.as_u8));
297 clib_memcpy (ip6[1].hdr.src_addr, &ip6_ptr->src_addr.mask,
298 ARRAY_LEN (ip6_ptr->src_addr.mask.as_u8));
299 clib_memcpy (ip6[0].hdr.dst_addr, &ip6_ptr->dst_addr.addr,
300 ARRAY_LEN (ip6_ptr->dst_addr.addr.as_u8));
301 clib_memcpy (ip6[1].hdr.dst_addr, &ip6_ptr->dst_addr.mask,
302 ARRAY_LEN (ip6_ptr->dst_addr.mask.as_u8));
303 ip6[0].hdr.proto = ip6_ptr->protocol.prot;
304 ip6[1].hdr.proto = ip6_ptr->protocol.mask;
307 item->mask = ip6 + 1;
310 if (FLOW_IS_L4_TYPE (f) || FLOW_IS_L4_TUNNEL_TYPE (f))
312 vnet_flow_ip6_n_tuple_t *ip6_n_ptr = &f->ip6_n_tuple;
314 src_port = ip6_n_ptr->src_port.port;
315 dst_port = ip6_n_ptr->dst_port.port;
316 src_port_mask = ip6_n_ptr->src_port.mask;
317 dst_port_mask = ip6_n_ptr->dst_port.mask;
320 protocol = ip6_ptr->protocol.prot;
323 if (FLOW_IS_L3_TYPE (f))
327 vec_add2 (items, item, 1);
330 case IP_PROTOCOL_L2TP:
331 item->type = RTE_FLOW_ITEM_TYPE_L2TPV3OIP;
332 l2tp[0].session_id = clib_host_to_net_u32 (f->ip4_l2tpv3oip.session_id);
333 l2tp[1].session_id = ~0;
336 item->mask = l2tp + 1;
339 case IP_PROTOCOL_IPSEC_ESP:
340 item->type = RTE_FLOW_ITEM_TYPE_ESP;
341 esp[0].hdr.spi = clib_host_to_net_u32 (f->ip4_ipsec_esp.spi);
345 item->mask = esp + 1;
348 case IP_PROTOCOL_IPSEC_AH:
349 item->type = RTE_FLOW_ITEM_TYPE_AH;
350 ah[0].spi = clib_host_to_net_u32 (f->ip4_ipsec_ah.spi);
356 case IP_PROTOCOL_TCP:
357 item->type = RTE_FLOW_ITEM_TYPE_TCP;
358 if ((src_port_mask == 0) && (dst_port_mask == 0))
365 tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
366 tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
367 tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
368 tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
370 item->mask = tcp + 1;
374 case IP_PROTOCOL_UDP:
375 item->type = RTE_FLOW_ITEM_TYPE_UDP;
376 if ((src_port_mask == 0) && (dst_port_mask == 0))
383 udp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
384 udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
385 udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
386 udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
388 item->mask = udp + 1;
391 /* handle the UDP tunnels */
392 if (f->type == VNET_FLOW_TYPE_IP4_GTPC)
394 gtp[0].teid = clib_host_to_net_u32 (f->ip4_gtpc.teid);
397 vec_add2 (items, item, 1);
398 item->type = RTE_FLOW_ITEM_TYPE_GTPC;
400 item->mask = gtp + 1;
402 else if (f->type == VNET_FLOW_TYPE_IP4_GTPU)
404 gtp[0].teid = clib_host_to_net_u32 (f->ip4_gtpu.teid);
407 vec_add2 (items, item, 1);
408 item->type = RTE_FLOW_ITEM_TYPE_GTPU;
410 item->mask = gtp + 1;
412 else if (f->type == VNET_FLOW_TYPE_IP4_VXLAN)
414 u32 vni = f->ip4_vxlan.vni;
416 vxlan_header_t spec_hdr = {
417 .flags = VXLAN_FLAGS_I,
418 .vni_reserved = clib_host_to_net_u32 (vni << 8)
420 vxlan_header_t mask_hdr = {
422 .vni_reserved = clib_host_to_net_u32 (((u32) - 1) << 8)
425 clib_memset (raw, 0, sizeof raw);
426 raw[0].item.relative = 1;
427 raw[0].item.length = vxlan_hdr_sz;
429 clib_memcpy_fast (raw[0].val + raw_sz, &spec_hdr, vxlan_hdr_sz);
430 raw[0].item.pattern = raw[0].val + raw_sz;
431 clib_memcpy_fast (raw[1].val + raw_sz, &mask_hdr, vxlan_hdr_sz);
432 raw[1].item.pattern = raw[1].val + raw_sz;
434 vec_add2 (items, item, 1);
435 item->type = RTE_FLOW_ITEM_TYPE_RAW;
437 item->mask = raw + 1;
442 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
447 vec_add2 (items, item, 1);
448 item->type = RTE_FLOW_ITEM_TYPE_END;
451 /* Only one 'fate' can be assigned */
452 if (f->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
454 vec_add2 (actions, action, 1);
455 queue.index = f->redirect_queue;
456 action->type = RTE_FLOW_ACTION_TYPE_QUEUE;
457 action->conf = &queue;
461 if (f->actions & VNET_FLOW_ACTION_DROP)
463 vec_add2 (actions, action, 1);
464 action->type = RTE_FLOW_ACTION_TYPE_DROP;
467 rv = VNET_FLOW_ERROR_INTERNAL;
474 if (f->actions & VNET_FLOW_ACTION_RSS)
478 vec_add2 (actions, action, 1);
479 action->type = RTE_FLOW_ACTION_TYPE_RSS;
482 /* convert types to DPDK rss bitmask */
483 dpdk_flow_convert_rss_types (f->rss_types, &rss_type);
485 rss.types = rss_type;
486 if ((rss.func = dpdk_flow_convert_rss_func (f->rss_fun)) ==
487 RTE_ETH_HASH_FUNCTION_MAX)
489 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
495 rv = VNET_FLOW_ERROR_INTERNAL;
504 vec_add2 (actions, action, 1);
505 action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU;
508 if (f->actions & VNET_FLOW_ACTION_MARK)
510 vec_add2 (actions, action, 1);
512 action->type = RTE_FLOW_ACTION_TYPE_MARK;
513 action->conf = &mark;
516 vec_add2 (actions, action, 1);
517 action->type = RTE_FLOW_ACTION_TYPE_END;
519 rv = rte_flow_validate (xd->device_index, &ingress, items, actions,
520 &xd->last_flow_error);
525 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
526 else if (rv == -EEXIST)
527 rv = VNET_FLOW_ERROR_ALREADY_EXISTS;
529 rv = VNET_FLOW_ERROR_INTERNAL;
534 fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions,
535 &xd->last_flow_error);
538 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
547 dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
548 u32 flow_index, uword * private_data)
550 dpdk_main_t *dm = &dpdk_main;
551 vnet_flow_t *flow = vnet_get_flow (flow_index);
552 dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
553 dpdk_flow_entry_t *fe;
554 dpdk_flow_lookup_entry_t *fle = 0;
557 /* recycle old flow lookup entries only after the main loop counter
558 increases - i.e. previously DMA'ed packets were handled */
559 if (vec_len (xd->parked_lookup_indexes) > 0 &&
560 xd->parked_loop_count != dm->vlib_main->main_loop_count)
564 vec_foreach (fl_index, xd->parked_lookup_indexes)
565 pool_put_index (xd->flow_lookup_entries, *fl_index);
566 vec_reset_length (xd->parked_lookup_indexes);
569 if (op == VNET_FLOW_DEV_OP_DEL_FLOW)
571 fe = vec_elt_at_index (xd->flow_entries, *private_data);
573 if ((rv = rte_flow_destroy (xd->device_index, fe->handle,
574 &xd->last_flow_error)))
575 return VNET_FLOW_ERROR_INTERNAL;
579 /* make sure no action is taken for in-flight (marked) packets */
580 fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
581 clib_memset (fle, -1, sizeof (*fle));
582 vec_add1 (xd->parked_lookup_indexes, fe->mark);
583 xd->parked_loop_count = dm->vlib_main->main_loop_count;
586 clib_memset (fe, 0, sizeof (*fe));
587 pool_put (xd->flow_entries, fe);
589 goto disable_rx_offload;
592 if (op != VNET_FLOW_DEV_OP_ADD_FLOW)
593 return VNET_FLOW_ERROR_NOT_SUPPORTED;
595 pool_get (xd->flow_entries, fe);
596 fe->flow_index = flow->index;
598 if (flow->actions == 0)
600 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
604 /* if we need to mark packets, assign one mark */
605 if (flow->actions & (VNET_FLOW_ACTION_MARK |
606 VNET_FLOW_ACTION_REDIRECT_TO_NODE |
607 VNET_FLOW_ACTION_BUFFER_ADVANCE))
610 if (xd->flow_lookup_entries == 0)
611 pool_get_aligned (xd->flow_lookup_entries, fle,
612 CLIB_CACHE_LINE_BYTES);
613 pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES);
614 fe->mark = fle - xd->flow_lookup_entries;
616 /* install entry in the lookup table */
617 clib_memset (fle, -1, sizeof (*fle));
618 if (flow->actions & VNET_FLOW_ACTION_MARK)
619 fle->flow_id = flow->mark_flow_id;
620 if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE)
621 fle->next_index = flow->redirect_device_input_next_index;
622 if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE)
623 fle->buffer_advance = flow->buffer_advance;
628 if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) == 0)
630 xd->flags |= DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
631 dpdk_device_setup (xd);
636 case VNET_FLOW_TYPE_ETHERNET:
637 case VNET_FLOW_TYPE_IP4:
638 case VNET_FLOW_TYPE_IP6:
639 case VNET_FLOW_TYPE_IP4_N_TUPLE:
640 case VNET_FLOW_TYPE_IP6_N_TUPLE:
641 case VNET_FLOW_TYPE_IP4_GTPC:
642 case VNET_FLOW_TYPE_IP4_GTPU:
643 case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
644 case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
645 case VNET_FLOW_TYPE_IP4_IPSEC_AH:
646 if ((rv = dpdk_flow_add (xd, flow, fe)))
650 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
654 *private_data = fe - xd->flow_entries;
659 clib_memset (fe, 0, sizeof (*fe));
660 pool_put (xd->flow_entries, fe);
663 clib_memset (fle, -1, sizeof (*fle));
664 pool_put (xd->flow_lookup_entries, fle);
668 if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0
669 && pool_elts (xd->flow_entries) == 0)
671 xd->flags &= ~DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
672 dpdk_device_setup (xd);
679 format_dpdk_flow (u8 * s, va_list * args)
681 u32 dev_instance = va_arg (*args, u32);
682 u32 flow_index = va_arg (*args, u32);
683 uword private_data = va_arg (*args, uword);
684 dpdk_main_t *dm = &dpdk_main;
685 dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
686 dpdk_flow_entry_t *fe;
688 if (flow_index == ~0)
690 s = format (s, "%-25s: %U\n", "supported flow actions",
691 format_flow_actions, xd->supported_flow_actions);
692 s = format (s, "%-25s: %d\n", "last DPDK error type",
693 xd->last_flow_error.type);
694 s = format (s, "%-25s: %s\n", "last DPDK error message",
695 xd->last_flow_error.message ? xd->last_flow_error.message :
700 if (private_data >= vec_len (xd->flow_entries))
701 return format (s, "unknown flow");
703 fe = vec_elt_at_index (xd->flow_entries, private_data);
704 s = format (s, "mark %u", fe->mark);
709 * fd.io coding-style-patch-verification: ON
712 * eval: (c-set-style "gnu")