2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/vnet.h>
17 #include <vppinfra/vec.h>
18 #include <vppinfra/format.h>
19 #include <vlib/unix/cj.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/ethernet/arp_packet.h>
25 #include <vnet/vxlan/vxlan.h>
26 #include <dpdk/device/dpdk.h>
28 #include <dpdk/device/dpdk_priv.h>
29 #include <vppinfra/error.h>
31 /* constant structs */
32 static const struct rte_flow_attr ingress = {.ingress = 1 };
35 dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
37 struct rte_flow_item_ipv4 ip4[2] = { };
38 struct rte_flow_item_ipv4 inner_ip4[2] = { };
39 struct rte_flow_item_ipv6 ip6[2] = { };
40 struct rte_flow_item_ipv6 inner_ip6[2] = { };
41 struct rte_flow_item_udp udp[2] = { };
42 struct rte_flow_item_tcp tcp[2] = { };
43 struct rte_flow_item_gtp gtp[2] = { };
44 struct rte_flow_action_mark mark = { 0 };
45 struct rte_flow_action_queue queue = { 0 };
46 struct rte_flow_item *item, *items = 0;
47 struct rte_flow_action *action, *actions = 0;
52 vxlan_hdr_sz = sizeof (vxlan_header_t),
53 raw_sz = sizeof (struct rte_flow_item_raw)
58 struct rte_flow_item_raw item;
59 u8 val[raw_sz + vxlan_hdr_sz];
62 u16 src_port, dst_port, src_port_mask, dst_port_mask;
66 if (f->actions & (~xd->supported_flow_actions))
67 return VNET_FLOW_ERROR_NOT_SUPPORTED;
71 vec_add2 (items, item, 1);
72 item->type = RTE_FLOW_ITEM_TYPE_ETH;
77 if ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) ||
78 (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE))
80 vec_add2 (items, item, 1);
81 item->type = RTE_FLOW_ITEM_TYPE_VLAN;
87 vec_add2 (items, item, 1);
88 if ((f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) ||
89 (f->type == VNET_FLOW_TYPE_IP6_GTPC) ||
90 (f->type == VNET_FLOW_TYPE_IP6_GTPU) ||
91 (f->type == VNET_FLOW_TYPE_IP6_GTPU_IP4) ||
92 (f->type == VNET_FLOW_TYPE_IP6_GTPU_IP6))
94 vnet_flow_ip6_n_tuple_t *t6 = &f->ip6_n_tuple;
95 item->type = RTE_FLOW_ITEM_TYPE_IPV6;
97 if (!clib_memcmp (&t6->src_addr.mask, &zero_addr, 16) &&
98 !clib_memcmp (&t6->dst_addr.mask, &zero_addr, 16))
105 clib_memcpy_fast (ip6[0].hdr.src_addr, &t6->src_addr.addr, 16);
106 clib_memcpy_fast (ip6[1].hdr.src_addr, &t6->src_addr.mask, 16);
107 clib_memcpy_fast (ip6[0].hdr.dst_addr, &t6->dst_addr.addr, 16);
108 clib_memcpy_fast (ip6[1].hdr.dst_addr, &t6->dst_addr.mask, 16);
110 item->mask = ip6 + 1;
113 src_port = t6->src_port.port;
114 dst_port = t6->dst_port.port;
115 src_port_mask = t6->src_port.mask;
116 dst_port_mask = t6->dst_port.mask;
117 protocol = t6->protocol;
119 else if ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) ||
120 (f->type == VNET_FLOW_TYPE_IP4_GTPC) ||
121 (f->type == VNET_FLOW_TYPE_IP4_GTPU) ||
122 (f->type == VNET_FLOW_TYPE_IP4_GTPU_IP4) ||
123 (f->type == VNET_FLOW_TYPE_IP4_GTPU_IP6))
125 vnet_flow_ip4_n_tuple_t *t4 = &f->ip4_n_tuple;
126 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
128 if (!t4->src_addr.mask.as_u32 && !t4->dst_addr.mask.as_u32)
135 ip4[0].hdr.src_addr = t4->src_addr.addr.as_u32;
136 ip4[1].hdr.src_addr = t4->src_addr.mask.as_u32;
137 ip4[0].hdr.dst_addr = t4->dst_addr.addr.as_u32;
138 ip4[1].hdr.dst_addr = t4->dst_addr.mask.as_u32;
140 item->mask = ip4 + 1;
143 src_port = t4->src_port.port;
144 dst_port = t4->dst_port.port;
145 src_port_mask = t4->src_port.mask;
146 dst_port_mask = t4->dst_port.mask;
147 protocol = t4->protocol;
149 else if (f->type == VNET_FLOW_TYPE_IP4_VXLAN)
151 vnet_flow_ip4_vxlan_t *v4 = &f->ip4_vxlan;
152 ip4[0].hdr.src_addr = v4->src_addr.as_u32;
153 ip4[1].hdr.src_addr = -1;
154 ip4[0].hdr.dst_addr = v4->dst_addr.as_u32;
155 ip4[1].hdr.dst_addr = -1;
156 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
158 item->mask = ip4 + 1;
160 dst_port = v4->dst_port;
164 protocol = IP_PROTOCOL_UDP;
168 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
173 vec_add2 (items, item, 1);
174 if (protocol == IP_PROTOCOL_UDP)
176 item->type = RTE_FLOW_ITEM_TYPE_UDP;
178 if ((src_port_mask == 0) && (dst_port_mask == 0))
185 udp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
186 udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
187 udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
188 udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
190 item->mask = udp + 1;
193 else if (protocol == IP_PROTOCOL_TCP)
195 item->type = RTE_FLOW_ITEM_TYPE_TCP;
197 if ((src_port_mask == 0) && (dst_port_mask == 0))
203 tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
204 tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
205 tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
206 tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
208 item->mask = tcp + 1;
212 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
216 /* Tunnel header match */
217 if (f->type == VNET_FLOW_TYPE_IP4_VXLAN)
219 u32 vni = f->ip4_vxlan.vni;
220 vxlan_header_t spec_hdr = {
221 .flags = VXLAN_FLAGS_I,
222 .vni_reserved = clib_host_to_net_u32 (vni << 8)
224 vxlan_header_t mask_hdr = {
226 .vni_reserved = clib_host_to_net_u32 (((u32) - 1) << 8)
229 clib_memset (raw, 0, sizeof raw);
230 raw[0].item.relative = 1;
231 raw[0].item.length = vxlan_hdr_sz;
233 clib_memcpy_fast (raw[0].val + raw_sz, &spec_hdr, vxlan_hdr_sz);
234 raw[0].item.pattern = raw[0].val + raw_sz;
235 clib_memcpy_fast (raw[1].val + raw_sz, &mask_hdr, vxlan_hdr_sz);
236 raw[1].item.pattern = raw[1].val + raw_sz;
238 vec_add2 (items, item, 1);
239 item->type = RTE_FLOW_ITEM_TYPE_RAW;
241 item->mask = raw + 1;
243 else if (f->type == VNET_FLOW_TYPE_IP4_GTPC)
245 vnet_flow_ip4_gtpc_t *gc = &f->ip4_gtpc;
246 gtp[0].teid = clib_host_to_net_u32 (gc->teid);
249 vec_add2 (items, item, 1);
250 item->type = RTE_FLOW_ITEM_TYPE_GTPC;
252 item->mask = gtp + 1;
254 else if (f->type == VNET_FLOW_TYPE_IP4_GTPU)
256 vnet_flow_ip4_gtpu_t *gu = &f->ip4_gtpu;
257 gtp[0].teid = clib_host_to_net_u32 (gu->teid);
260 vec_add2 (items, item, 1);
261 item->type = RTE_FLOW_ITEM_TYPE_GTPU;
263 item->mask = gtp + 1;
265 else if ((f->type == VNET_FLOW_TYPE_IP4_GTPU_IP4) ||
266 (f->type == VNET_FLOW_TYPE_IP4_GTPU_IP6))
268 vnet_flow_ip4_gtpu_t *gu = &f->ip4_gtpu;
269 gtp[0].teid = clib_host_to_net_u32 (gu->teid);
272 vec_add2 (items, item, 1);
273 item->type = RTE_FLOW_ITEM_TYPE_GTPU;
275 item->mask = gtp + 1;
277 /* inner IP4 header */
278 if (f->type == VNET_FLOW_TYPE_IP4_GTPU_IP4)
280 vec_add2 (items, item, 1);
281 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
283 vnet_flow_ip4_gtpu_ip4_t *gu4 = &f->ip4_gtpu_ip4;
284 if (!gu4->inner_src_addr.mask.as_u32 &&
285 !gu4->inner_dst_addr.mask.as_u32)
292 inner_ip4[0].hdr.src_addr = gu4->inner_src_addr.addr.as_u32;
293 inner_ip4[1].hdr.src_addr = gu4->inner_src_addr.mask.as_u32;
294 inner_ip4[0].hdr.dst_addr = gu4->inner_dst_addr.addr.as_u32;
295 inner_ip4[1].hdr.dst_addr = gu4->inner_dst_addr.mask.as_u32;
296 item->spec = inner_ip4;
297 item->mask = inner_ip4 + 1;
300 else if (f->type == VNET_FLOW_TYPE_IP4_GTPU_IP6)
302 ip6_address_t zero_addr;
303 vnet_flow_ip4_gtpu_ip6_t *gu6 = &f->ip4_gtpu_ip6;
305 clib_memset (&zero_addr, 0, sizeof (ip6_address_t));
307 vec_add2 (items, item, 1);
308 item->type = RTE_FLOW_ITEM_TYPE_IPV6;
310 if (!clib_memcmp (&gu6->inner_src_addr.mask, &zero_addr, 16) &&
311 !clib_memcmp (&gu6->inner_dst_addr.mask, &zero_addr, 16))
318 clib_memcpy_fast (inner_ip6[0].hdr.src_addr,
319 &gu6->inner_src_addr.addr, 16);
320 clib_memcpy_fast (inner_ip6[1].hdr.src_addr,
321 &gu6->inner_src_addr.mask, 16);
322 clib_memcpy_fast (inner_ip6[0].hdr.dst_addr,
323 &gu6->inner_dst_addr.addr, 16);
324 clib_memcpy_fast (inner_ip6[1].hdr.dst_addr,
325 &gu6->inner_dst_addr.mask, 16);
326 item->spec = inner_ip6;
327 item->mask = inner_ip6 + 1;
331 else if (f->type == VNET_FLOW_TYPE_IP6_GTPC)
333 vnet_flow_ip6_gtpc_t *gc = &f->ip6_gtpc;
334 gtp[0].teid = clib_host_to_net_u32 (gc->teid);
337 vec_add2 (items, item, 1);
338 item->type = RTE_FLOW_ITEM_TYPE_GTPC;
340 item->mask = gtp + 1;
342 else if (f->type == VNET_FLOW_TYPE_IP6_GTPU)
344 vnet_flow_ip6_gtpu_t *gu = &f->ip6_gtpu;
345 gtp[0].teid = clib_host_to_net_u32 (gu->teid);
348 vec_add2 (items, item, 1);
349 item->type = RTE_FLOW_ITEM_TYPE_GTPU;
351 item->mask = gtp + 1;
353 else if ((f->type == VNET_FLOW_TYPE_IP6_GTPU_IP4) ||
354 (f->type == VNET_FLOW_TYPE_IP6_GTPU_IP6))
356 vnet_flow_ip6_gtpu_t *gu = &f->ip6_gtpu;
357 gtp[0].teid = clib_host_to_net_u32 (gu->teid);
360 vec_add2 (items, item, 1);
361 item->type = RTE_FLOW_ITEM_TYPE_GTPU;
363 item->mask = gtp + 1;
365 /* inner IP4 header */
366 if (f->type == VNET_FLOW_TYPE_IP6_GTPU_IP4)
368 vec_add2 (items, item, 1);
369 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
371 vnet_flow_ip6_gtpu_ip4_t *gu4 = &f->ip6_gtpu_ip4;
373 if (!gu4->inner_src_addr.mask.as_u32 &&
374 !gu4->inner_dst_addr.mask.as_u32)
381 inner_ip4[0].hdr.src_addr = gu4->inner_src_addr.addr.as_u32;
382 inner_ip4[1].hdr.src_addr = gu4->inner_src_addr.mask.as_u32;
383 inner_ip4[0].hdr.dst_addr = gu4->inner_dst_addr.addr.as_u32;
384 inner_ip4[1].hdr.dst_addr = gu4->inner_dst_addr.mask.as_u32;
385 item->spec = inner_ip4;
386 item->mask = inner_ip4 + 1;
390 if (f->type == VNET_FLOW_TYPE_IP6_GTPU_IP6)
392 ip6_address_t zero_addr;
393 vnet_flow_ip6_gtpu_ip6_t *gu6 = &f->ip6_gtpu_ip6;
395 clib_memset (&zero_addr, 0, sizeof (ip6_address_t));
397 vec_add2 (items, item, 1);
398 item->type = RTE_FLOW_ITEM_TYPE_IPV6;
400 if (!clib_memcmp (&gu6->inner_src_addr.mask, &zero_addr, 16) &&
401 !clib_memcmp (&gu6->inner_dst_addr.mask, &zero_addr, 16))
408 clib_memcpy_fast (inner_ip6[0].hdr.src_addr,
409 &gu6->inner_src_addr.addr, 16);
410 clib_memcpy_fast (inner_ip6[1].hdr.src_addr,
411 &gu6->inner_src_addr.mask, 16);
412 clib_memcpy_fast (inner_ip6[0].hdr.dst_addr,
413 &gu6->inner_dst_addr.addr, 16);
414 clib_memcpy_fast (inner_ip6[1].hdr.dst_addr,
415 &gu6->inner_dst_addr.mask, 16);
416 item->spec = inner_ip6;
417 item->mask = inner_ip6 + 1;
423 vec_add2 (items, item, 1);
424 item->type = RTE_FLOW_ITEM_TYPE_END;
427 /* Only one 'fate' can be assigned */
428 if (f->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
430 vec_add2 (actions, action, 1);
431 queue.index = f->redirect_queue;
432 action->type = RTE_FLOW_ACTION_TYPE_QUEUE;
433 action->conf = &queue;
436 if (f->actions & VNET_FLOW_ACTION_DROP)
438 vec_add2 (actions, action, 1);
439 action->type = RTE_FLOW_ACTION_TYPE_DROP;
442 rv = VNET_FLOW_ERROR_INTERNAL;
450 vec_add2 (actions, action, 1);
451 action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU;
454 if (f->actions & VNET_FLOW_ACTION_MARK)
456 vec_add2 (actions, action, 1);
458 action->type = RTE_FLOW_ACTION_TYPE_MARK;
459 action->conf = &mark;
462 vec_add2 (actions, action, 1);
463 action->type = RTE_FLOW_ACTION_TYPE_END;
465 rv = rte_flow_validate (xd->device_index, &ingress, items, actions,
466 &xd->last_flow_error);
471 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
472 else if (rv == -EEXIST)
473 rv = VNET_FLOW_ERROR_ALREADY_EXISTS;
475 rv = VNET_FLOW_ERROR_INTERNAL;
479 fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions,
480 &xd->last_flow_error);
483 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
492 dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
493 u32 flow_index, uword * private_data)
495 dpdk_main_t *dm = &dpdk_main;
496 vnet_flow_t *flow = vnet_get_flow (flow_index);
497 dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
498 dpdk_flow_entry_t *fe;
499 dpdk_flow_lookup_entry_t *fle = 0;
502 /* recycle old flow lookup entries only after the main loop counter
503 increases - i.e. previously DMA'ed packets were handled */
504 if (vec_len (xd->parked_lookup_indexes) > 0 &&
505 xd->parked_loop_count != dm->vlib_main->main_loop_count)
509 vec_foreach (fl_index, xd->parked_lookup_indexes)
510 pool_put_index (xd->flow_lookup_entries, *fl_index);
511 vec_reset_length (xd->flow_lookup_entries);
514 if (op == VNET_FLOW_DEV_OP_DEL_FLOW)
516 fe = vec_elt_at_index (xd->flow_entries, *private_data);
518 if ((rv = rte_flow_destroy (xd->device_index, fe->handle,
519 &xd->last_flow_error)))
520 return VNET_FLOW_ERROR_INTERNAL;
524 /* make sure no action is taken for in-flight (marked) packets */
525 fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
526 clib_memset (fle, -1, sizeof (*fle));
527 vec_add1 (xd->parked_lookup_indexes, fe->mark);
528 xd->parked_loop_count = dm->vlib_main->main_loop_count;
531 clib_memset (fe, 0, sizeof (*fe));
532 pool_put (xd->flow_entries, fe);
534 goto disable_rx_offload;
537 if (op != VNET_FLOW_DEV_OP_ADD_FLOW)
538 return VNET_FLOW_ERROR_NOT_SUPPORTED;
540 pool_get (xd->flow_entries, fe);
541 fe->flow_index = flow->index;
543 if (flow->actions == 0)
545 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
549 /* if we need to mark packets, assign one mark */
550 if (flow->actions & (VNET_FLOW_ACTION_MARK |
551 VNET_FLOW_ACTION_REDIRECT_TO_NODE |
552 VNET_FLOW_ACTION_BUFFER_ADVANCE))
555 if (xd->flow_lookup_entries == 0)
556 pool_get_aligned (xd->flow_lookup_entries, fle,
557 CLIB_CACHE_LINE_BYTES);
558 pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES);
559 fe->mark = fle - xd->flow_lookup_entries;
561 /* install entry in the lookup table */
562 clib_memset (fle, -1, sizeof (*fle));
563 if (flow->actions & VNET_FLOW_ACTION_MARK)
564 fle->flow_id = flow->mark_flow_id;
565 if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE)
566 fle->next_index = flow->redirect_device_input_next_index;
567 if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE)
568 fle->buffer_advance = flow->buffer_advance;
573 if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) == 0)
575 xd->flags |= DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
576 dpdk_device_setup (xd);
581 case VNET_FLOW_TYPE_IP4_N_TUPLE:
582 case VNET_FLOW_TYPE_IP6_N_TUPLE:
583 case VNET_FLOW_TYPE_IP4_VXLAN:
584 case VNET_FLOW_TYPE_IP4_GTPC:
585 case VNET_FLOW_TYPE_IP4_GTPU:
586 case VNET_FLOW_TYPE_IP4_GTPU_IP4:
587 case VNET_FLOW_TYPE_IP4_GTPU_IP6:
588 case VNET_FLOW_TYPE_IP6_GTPC:
589 case VNET_FLOW_TYPE_IP6_GTPU:
590 case VNET_FLOW_TYPE_IP6_GTPU_IP4:
591 case VNET_FLOW_TYPE_IP6_GTPU_IP6:
592 if ((rv = dpdk_flow_add (xd, flow, fe)))
596 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
600 *private_data = fe - xd->flow_entries;
605 clib_memset (fe, 0, sizeof (*fe));
606 pool_put (xd->flow_entries, fe);
609 clib_memset (fle, -1, sizeof (*fle));
610 pool_put (xd->flow_lookup_entries, fle);
614 if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0
615 && pool_elts (xd->flow_entries) == 0)
617 xd->flags &= ~DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
618 dpdk_device_setup (xd);
625 format_dpdk_flow (u8 * s, va_list * args)
627 u32 dev_instance = va_arg (*args, u32);
628 u32 flow_index = va_arg (*args, u32);
629 uword private_data = va_arg (*args, uword);
630 dpdk_main_t *dm = &dpdk_main;
631 dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
632 dpdk_flow_entry_t *fe;
634 if (flow_index == ~0)
636 s = format (s, "%-25s: %U\n", "supported flow actions",
637 format_flow_actions, xd->supported_flow_actions);
638 s = format (s, "%-25s: %d\n", "last DPDK error type",
639 xd->last_flow_error.type);
640 s = format (s, "%-25s: %s\n", "last DPDK error message",
641 xd->last_flow_error.message ? xd->last_flow_error.message :
646 if (private_data >= vec_len (xd->flow_entries))
647 return format (s, "unknown flow");
649 fe = vec_elt_at_index (xd->flow_entries, private_data);
650 s = format (s, "mark %u", fe->mark);
655 * fd.io coding-style-patch-verification: ON
658 * eval: (c-set-style "gnu")