2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/vnet.h>
17 #include <vppinfra/vec.h>
18 #include <vppinfra/format.h>
19 #include <vlib/unix/cj.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/ethernet/arp_packet.h>
25 #include <vnet/vxlan/vxlan.h>
26 #include <dpdk/device/dpdk.h>
28 #include <dpdk/device/dpdk_priv.h>
29 #include <vppinfra/error.h>
31 /* constant structs */
32 static const struct rte_flow_attr ingress = {.ingress = 1 };
33 static const struct rte_flow_item_eth any_eth[2] = { };
34 static const struct rte_flow_item_vlan any_vlan[2] = { };
37 dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
39 struct rte_flow_item_ipv4 ip4[2] = { };
40 struct rte_flow_item_ipv6 ip6[2] = { };
41 struct rte_flow_item_udp udp[2] = { };
42 struct rte_flow_item_tcp tcp[2] = { };
43 struct rte_flow_action_mark mark = { 0 };
44 struct rte_flow_action_queue queue = { 0 };
45 struct rte_flow_item *item, *items = 0;
46 struct rte_flow_action *action, *actions = 0;
51 vxlan_hdr_sz = sizeof (vxlan_header_t),
52 raw_sz = sizeof (struct rte_flow_item_raw)
57 struct rte_flow_item_raw item;
58 u8 val[raw_sz + vxlan_hdr_sz];
61 u16 src_port, dst_port, src_port_mask, dst_port_mask;
65 if (f->actions & (~xd->supported_flow_actions))
66 return VNET_FLOW_ERROR_NOT_SUPPORTED;
70 vec_add2 (items, item, 1);
71 item->type = RTE_FLOW_ITEM_TYPE_ETH;
73 item->mask = any_eth + 1;
76 if (f->type != VNET_FLOW_TYPE_IP4_VXLAN)
78 vec_add2 (items, item, 1);
79 item->type = RTE_FLOW_ITEM_TYPE_VLAN;
80 item->spec = any_vlan;
81 item->mask = any_vlan + 1;
85 vec_add2 (items, item, 1);
86 if (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE)
88 vnet_flow_ip6_n_tuple_t *t6 = &f->ip6_n_tuple;
89 clib_memcpy_fast (ip6[0].hdr.src_addr, &t6->src_addr.addr, 16);
90 clib_memcpy_fast (ip6[1].hdr.src_addr, &t6->src_addr.mask, 16);
91 clib_memcpy_fast (ip6[0].hdr.dst_addr, &t6->dst_addr.addr, 16);
92 clib_memcpy_fast (ip6[1].hdr.dst_addr, &t6->dst_addr.mask, 16);
93 item->type = RTE_FLOW_ITEM_TYPE_IPV6;
97 src_port = t6->src_port.port;
98 dst_port = t6->dst_port.port;
99 src_port_mask = t6->src_port.mask;
100 dst_port_mask = t6->dst_port.mask;
101 protocol = t6->protocol;
103 else if (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE)
105 vnet_flow_ip4_n_tuple_t *t4 = &f->ip4_n_tuple;
106 ip4[0].hdr.src_addr = t4->src_addr.addr.as_u32;
107 ip4[1].hdr.src_addr = t4->src_addr.mask.as_u32;
108 ip4[0].hdr.dst_addr = t4->dst_addr.addr.as_u32;
109 ip4[1].hdr.dst_addr = t4->dst_addr.mask.as_u32;
110 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
112 item->mask = ip4 + 1;
114 src_port = t4->src_port.port;
115 dst_port = t4->dst_port.port;
116 src_port_mask = t4->src_port.mask;
117 dst_port_mask = t4->dst_port.mask;
118 protocol = t4->protocol;
120 else if (f->type == VNET_FLOW_TYPE_IP4_VXLAN)
122 vnet_flow_ip4_vxlan_t *v4 = &f->ip4_vxlan;
123 ip4[0].hdr.src_addr = v4->src_addr.as_u32;
124 ip4[1].hdr.src_addr = -1;
125 ip4[0].hdr.dst_addr = v4->dst_addr.as_u32;
126 ip4[1].hdr.dst_addr = -1;
127 item->type = RTE_FLOW_ITEM_TYPE_IPV4;
129 item->mask = ip4 + 1;
131 dst_port = v4->dst_port;
135 protocol = IP_PROTOCOL_UDP;
139 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
144 vec_add2 (items, item, 1);
145 if (protocol == IP_PROTOCOL_UDP)
147 udp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
148 udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
149 udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
150 udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
151 item->type = RTE_FLOW_ITEM_TYPE_UDP;
153 item->mask = udp + 1;
155 else if (protocol == IP_PROTOCOL_TCP)
157 tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
158 tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
159 tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
160 tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
161 item->type = RTE_FLOW_ITEM_TYPE_TCP;
163 item->mask = tcp + 1;
167 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
171 /* Tunnel header match */
172 if (f->type == VNET_FLOW_TYPE_IP4_VXLAN)
174 u32 vni = f->ip4_vxlan.vni;
175 vxlan_header_t spec_hdr = {
176 .flags = VXLAN_FLAGS_I,
177 .vni_reserved = clib_host_to_net_u32 (vni << 8)
179 vxlan_header_t mask_hdr = {
181 .vni_reserved = clib_host_to_net_u32 (((u32) - 1) << 8)
184 clib_memset (raw, 0, sizeof raw);
185 raw[0].item.relative = 1;
186 raw[0].item.length = vxlan_hdr_sz;
188 clib_memcpy_fast (raw[0].val + raw_sz, &spec_hdr, vxlan_hdr_sz);
189 raw[0].item.pattern = raw[0].val + raw_sz;
190 clib_memcpy_fast (raw[1].val + raw_sz, &mask_hdr, vxlan_hdr_sz);
191 raw[1].item.pattern = raw[1].val + raw_sz;
193 vec_add2 (items, item, 1);
194 item->type = RTE_FLOW_ITEM_TYPE_RAW;
196 item->mask = raw + 1;
199 vec_add2 (items, item, 1);
200 item->type = RTE_FLOW_ITEM_TYPE_END;
203 /* Only one 'fate' can be assigned */
204 if (f->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
206 vec_add2 (actions, action, 1);
207 queue.index = f->redirect_queue;
208 action->type = RTE_FLOW_ACTION_TYPE_QUEUE;
209 action->conf = &queue;
212 if (f->actions & VNET_FLOW_ACTION_DROP)
214 vec_add2 (actions, action, 1);
215 action->type = RTE_FLOW_ACTION_TYPE_DROP;
218 rv = VNET_FLOW_ERROR_INTERNAL;
226 vec_add2 (actions, action, 1);
227 action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU;
230 if (f->actions & VNET_FLOW_ACTION_MARK)
232 vec_add2 (actions, action, 1);
234 action->type = RTE_FLOW_ACTION_TYPE_MARK;
235 action->conf = &mark;
238 vec_add2 (actions, action, 1);
239 action->type = RTE_FLOW_ACTION_TYPE_END;
241 rv = rte_flow_validate (xd->device_index, &ingress, items, actions,
242 &xd->last_flow_error);
247 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
248 else if (rv == -EEXIST)
249 rv = VNET_FLOW_ERROR_ALREADY_EXISTS;
251 rv = VNET_FLOW_ERROR_INTERNAL;
255 fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions,
256 &xd->last_flow_error);
259 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
268 dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
269 u32 flow_index, uword * private_data)
271 dpdk_main_t *dm = &dpdk_main;
272 vnet_flow_t *flow = vnet_get_flow (flow_index);
273 dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
274 dpdk_flow_entry_t *fe;
275 dpdk_flow_lookup_entry_t *fle = 0;
278 /* recycle old flow lookup entries only after the main loop counter
279 increases - i.e. previously DMA'ed packets were handled */
280 if (vec_len (xd->parked_lookup_indexes) > 0 &&
281 xd->parked_loop_count != dm->vlib_main->main_loop_count)
285 vec_foreach (fl_index, xd->parked_lookup_indexes)
286 pool_put_index (xd->flow_lookup_entries, *fl_index);
287 vec_reset_length (xd->flow_lookup_entries);
290 if (op == VNET_FLOW_DEV_OP_DEL_FLOW)
292 ASSERT (*private_data >= vec_len (xd->flow_entries));
294 fe = vec_elt_at_index (xd->flow_entries, *private_data);
296 if ((rv = rte_flow_destroy (xd->device_index, fe->handle,
297 &xd->last_flow_error)))
298 return VNET_FLOW_ERROR_INTERNAL;
302 /* make sure no action is taken for in-flight (marked) packets */
303 fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
304 clib_memset (fle, -1, sizeof (*fle));
305 vec_add1 (xd->parked_lookup_indexes, fe->mark);
306 xd->parked_loop_count = dm->vlib_main->main_loop_count;
309 clib_memset (fe, 0, sizeof (*fe));
310 pool_put (xd->flow_entries, fe);
312 goto disable_rx_offload;
315 if (op != VNET_FLOW_DEV_OP_ADD_FLOW)
316 return VNET_FLOW_ERROR_NOT_SUPPORTED;
318 pool_get (xd->flow_entries, fe);
319 fe->flow_index = flow->index;
321 if (flow->actions == 0)
323 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
327 /* if we need to mark packets, assign one mark */
328 if (flow->actions & (VNET_FLOW_ACTION_MARK |
329 VNET_FLOW_ACTION_REDIRECT_TO_NODE |
330 VNET_FLOW_ACTION_BUFFER_ADVANCE))
333 if (xd->flow_lookup_entries == 0)
334 pool_get_aligned (xd->flow_lookup_entries, fle,
335 CLIB_CACHE_LINE_BYTES);
336 pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES);
337 fe->mark = fle - xd->flow_lookup_entries;
339 /* install entry in the lookup table */
340 clib_memset (fle, -1, sizeof (*fle));
341 if (flow->actions & VNET_FLOW_ACTION_MARK)
342 fle->flow_id = flow->mark_flow_id;
343 if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE)
344 fle->next_index = flow->redirect_device_input_next_index;
345 if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE)
346 fle->buffer_advance = flow->buffer_advance;
351 if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) == 0)
353 xd->flags |= DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
354 dpdk_device_setup (xd);
359 case VNET_FLOW_TYPE_IP4_N_TUPLE:
360 case VNET_FLOW_TYPE_IP6_N_TUPLE:
361 case VNET_FLOW_TYPE_IP4_VXLAN:
362 if ((rv = dpdk_flow_add (xd, flow, fe)))
366 rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
370 *private_data = fe - xd->flow_entries;
375 clib_memset (fe, 0, sizeof (*fe));
376 pool_put (xd->flow_entries, fe);
379 clib_memset (fle, -1, sizeof (*fle));
380 pool_put (xd->flow_lookup_entries, fle);
384 if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0
385 && pool_elts (xd->flow_entries) == 0)
387 xd->flags &= ~DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD;
388 dpdk_device_setup (xd);
395 format_dpdk_flow (u8 * s, va_list * args)
397 u32 dev_instance = va_arg (*args, u32);
398 u32 flow_index = va_arg (*args, u32);
399 uword private_data = va_arg (*args, uword);
400 dpdk_main_t *dm = &dpdk_main;
401 dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
402 dpdk_flow_entry_t *fe;
404 if (flow_index == ~0)
406 s = format (s, "%-25s: %U\n", "supported flow actions",
407 format_flow_actions, xd->supported_flow_actions);
408 s = format (s, "%-25s: %d\n", "last DPDK error type",
409 xd->last_flow_error.type);
410 s = format (s, "%-25s: %s\n", "last DPDK error message",
411 xd->last_flow_error.message ? xd->last_flow_error.message :
416 if (private_data >= vec_len (xd->flow_entries))
417 return format (s, "unknown flow");
419 fe = vec_elt_at_index (xd->flow_entries, private_data);
420 s = format (s, "mark %u", fe->mark);
425 * fd.io coding-style-patch-verification: ON
428 * eval: (c-set-style "gnu")