2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/xxhash.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <dpdk/device/dpdk.h>
23 #include <vnet/classify/vnet_classify.h>
24 #include <vnet/mpls/packet.h>
25 #include <vnet/handoff.h>
26 #include <vnet/devices/devices.h>
27 #include <vnet/feature/feature.h>
29 #include <dpdk/device/dpdk_priv.h>
31 #ifndef CLIB_MULTIARCH_VARIANT
32 static char *dpdk_error_strings[] = {
39 STATIC_ASSERT (VNET_DEVICE_INPUT_NEXT_IP4_INPUT - 1 ==
40 VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT,
41 "IP4_INPUT must follow IP4_NCS_INPUT");
45 DPDK_RX_F_CKSUM_GOOD = 7,
46 DPDK_RX_F_CKSUM_BAD = 4,
50 /* currently we are just copying bit positions from DPDK, but that
51 might change in future, in case we strart to be interested in something
52 stored in upper bytes. Curently we store only lower byte for perf reasons */
53 STATIC_ASSERT (1 << DPDK_RX_F_CKSUM_GOOD == PKT_RX_IP_CKSUM_GOOD, "");
54 STATIC_ASSERT (1 << DPDK_RX_F_CKSUM_BAD == PKT_RX_IP_CKSUM_BAD, "");
55 STATIC_ASSERT (1 << DPDK_RX_F_FDIR == PKT_RX_FDIR, "");
56 STATIC_ASSERT ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) <
57 256, "dpdk flags not un lower byte, fix needed");
60 dpdk_rx_next (vlib_node_runtime_t * node, u16 etype, u8 flags)
62 if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)))
64 /* keep it branchless */
65 u32 is_good = (flags >> DPDK_RX_F_CKSUM_GOOD) & 1;
66 return VNET_DEVICE_INPUT_NEXT_IP4_INPUT - is_good;
68 else if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)))
69 return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
70 else if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)))
71 return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
73 return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
76 static_always_inline uword
77 dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
78 struct rte_mbuf * mb, vlib_buffer_free_list_t * fl)
81 struct rte_mbuf *mb_seg = 0;
82 vlib_buffer_t *b_seg, *b_chain = 0;
89 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
90 b->total_length_not_including_first_buffer = 0;
92 while (nb_seg < mb->nb_segs)
96 b_seg = vlib_buffer_from_rte_mbuf (mb_seg);
97 vlib_buffer_init_for_free_list (b_seg, fl);
99 ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
100 ASSERT (b_seg->current_data == 0);
103 * The driver (e.g. virtio) may not put the packet data at the start
104 * of the segment, so don't assume b_seg->current_data == 0 is correct.
106 b_seg->current_data =
107 (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data;
109 b_seg->current_length = mb_seg->data_len;
110 b->total_length_not_including_first_buffer += mb_seg->data_len;
112 b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT;
113 b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg);
116 mb_seg = mb_seg->next;
119 return b->total_length_not_including_first_buffer;
122 static_always_inline void
123 dpdk_prefetch_mbuf_x4 (struct rte_mbuf *mb[])
125 CLIB_PREFETCH (mb[0], CLIB_CACHE_LINE_BYTES, LOAD);
126 CLIB_PREFETCH (mb[1], CLIB_CACHE_LINE_BYTES, LOAD);
127 CLIB_PREFETCH (mb[2], CLIB_CACHE_LINE_BYTES, LOAD);
128 CLIB_PREFETCH (mb[3], CLIB_CACHE_LINE_BYTES, LOAD);
131 static_always_inline void
132 dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
135 b = vlib_buffer_from_rte_mbuf (mb[0]);
136 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
137 b = vlib_buffer_from_rte_mbuf (mb[1]);
138 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
139 b = vlib_buffer_from_rte_mbuf (mb[2]);
140 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
141 b = vlib_buffer_from_rte_mbuf (mb[3]);
142 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
145 static_always_inline void
146 dpdk_prefetch_buffer_data_x4 (struct rte_mbuf *mb[])
149 b = vlib_buffer_from_rte_mbuf (mb[0]);
150 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
151 b = vlib_buffer_from_rte_mbuf (mb[1]);
152 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
153 b = vlib_buffer_from_rte_mbuf (mb[2]);
154 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
155 b = vlib_buffer_from_rte_mbuf (mb[3]);
156 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
160 poll_rate_limit (dpdk_main_t * dm)
162 /* Limit the poll rate by sleeping for N msec between polls */
163 if (PREDICT_FALSE (dm->poll_sleep_usec != 0))
165 struct timespec ts, tsrem;
168 ts.tv_nsec = 1000 * dm->poll_sleep_usec;
170 while (nanosleep (&ts, &tsrem) < 0)
177 /** \brief Main DPDK input node
180 This is the main DPDK input node: across each assigned interface,
181 call rte_eth_rx_burst(...) or similar to obtain a vector of
182 packets to process. Derive @c vlib_buffer_t metadata from
183 <code>struct rte_mbuf</code> metadata,
184 Depending on the resulting metadata: adjust <code>b->current_data,
185 b->current_length </code> and dispatch directly to
186 ip4-input-no-checksum, or ip6-input. Trace the packet if required.
188 @param vm vlib_main_t corresponding to the current thread
189 @param node vlib_node_runtime_t
190 @param f vlib_frame_t input-node, not used.
192 @par Graph mechanics: buffer metadata, next index usage
195 - <code>struct rte_mbuf mb->ol_flags</code>
196 - PKT_RX_IP_CKSUM_BAD
199 - <code>b->error</code> if the packet is to be dropped immediately
200 - <code>b->current_data, b->current_length</code>
201 - adjusted as needed to skip the L2 header in direct-dispatch cases
202 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
203 - rx interface sw_if_index
204 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
205 - required by ipX-lookup
206 - <code>b->flags</code>
207 - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
210 - Static arcs to: error-drop, ethernet-input,
211 ip4-input-no-checksum, ip6-input, mpls-input
212 - per-interface redirection, controlled by
213 <code>xd->per_interface_next_index</code>
216 static_always_inline void
217 dpdk_mbufs_to_buffer_indices (vlib_main_t * vm, struct rte_mbuf **mb,
218 u32 * bi, uword n_left)
220 #ifdef CLIB_HAVE_VEC256
221 u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 };
222 u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start -
223 sizeof (struct rte_mbuf));
228 #ifdef CLIB_HAVE_VEC256
229 /* load 4 pointers into 256-bit register */
230 u64x4 v0 = u64x4_load_unaligned (mb);
231 u64x4 v1 = u64x4_load_unaligned (mb + 4);
234 /* calculate 4 buffer indices in parallel
235 vlib_buffer_t is straight after rte_mbuf so advance all 4
236 pointers for size of rte_mbuf */
240 v0 >>= CLIB_LOG2_CACHE_LINE_BYTES;
241 v1 >>= CLIB_LOG2_CACHE_LINE_BYTES;
243 /* permute 256-bit register so lower u32s of each buffer index are
244 * placed into lower 128-bits */
245 v2 = u32x8_permute ((u32x8) v0, mask);
246 v3 = u32x8_permute ((u32x8) v1, mask);
248 /* extract lower 128-bits and save them to the array of buffer indices */
249 u32x4_store_unaligned (u32x8_extract_lo (v2), bi);
250 u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4);
252 /* equivalent non-nector implementation */
253 bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
254 bi[1] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[1]));
255 bi[2] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[2]));
256 bi[3] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[3]));
257 bi[4] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[4]));
258 bi[5] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[5]));
259 bi[6] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[6]));
260 bi[7] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[7]));
268 bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
275 static_always_inline u8
276 dpdk_ol_flags_extract (struct rte_mbuf **mb, u8 * flags, int count)
280 for (i = 0; i < count; i++)
282 /* all flags we are interested in are in lower 8 bits but
284 flags[i] = (u8) mb[i]->ol_flags;
290 static_always_inline uword
291 dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
292 uword n_rx_packets, int maybe_multiseg, u8 * or_flagsp)
294 u32 n_left = n_rx_packets;
296 vlib_buffer_free_list_t *fl;
297 struct rte_mbuf **mb = ptd->mbufs;
300 u8 *flags, or_flags = 0;
303 fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
311 CLIB_PREFETCH (mb + 8, CLIB_CACHE_LINE_BYTES, LOAD);
313 dpdk_prefetch_buffer_x4 (mb + 4);
315 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
316 b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
317 b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
318 b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
320 clib_memcpy64_x4 (b[0], b[1], b[2], b[3], &ptd->buffer_template);
322 dpdk_prefetch_mbuf_x4 (mb + 4);
324 or_flags |= dpdk_ol_flags_extract (mb, flags, 4);
327 /* we temporary store relative offset of ethertype into next[x]
328 so we can prefetch and get it faster later */
330 off = mb[0]->data_off;
331 next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
332 off -= RTE_PKTMBUF_HEADROOM;
333 vnet_buffer (b[0])->l2_hdr_offset = off;
334 b[0]->current_data = off;
336 off = mb[1]->data_off;
337 next[1] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
338 off -= RTE_PKTMBUF_HEADROOM;
339 vnet_buffer (b[1])->l2_hdr_offset = off;
340 b[1]->current_data = off;
342 off = mb[2]->data_off;
343 next[2] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
344 off -= RTE_PKTMBUF_HEADROOM;
345 vnet_buffer (b[2])->l2_hdr_offset = off;
346 b[2]->current_data = off;
348 off = mb[3]->data_off;
349 next[3] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
350 off -= RTE_PKTMBUF_HEADROOM;
351 vnet_buffer (b[3])->l2_hdr_offset = off;
352 b[3]->current_data = off;
354 b[0]->current_length = mb[0]->data_len;
355 b[1]->current_length = mb[1]->data_len;
356 b[2]->current_length = mb[2]->data_len;
357 b[3]->current_length = mb[3]->data_len;
359 n_bytes += mb[0]->data_len;
360 n_bytes += mb[1]->data_len;
361 n_bytes += mb[2]->data_len;
362 n_bytes += mb[3]->data_len;
366 n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
367 n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], fl);
368 n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], fl);
369 n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], fl);
372 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
373 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
374 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
375 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
385 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
386 clib_memcpy (b[0], &ptd->buffer_template, 64);
387 or_flags |= dpdk_ol_flags_extract (mb, flags, 1);
390 off = mb[0]->data_off;
391 next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
392 off -= RTE_PKTMBUF_HEADROOM;
393 vnet_buffer (b[0])->l2_hdr_offset = off;
394 b[0]->current_data = off;
395 b[0]->current_length = mb[0]->data_len;
396 n_bytes += mb[0]->data_len;
398 n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
399 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
407 *or_flagsp = or_flags;
411 static_always_inline void
412 dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node,
413 dpdk_per_thread_data_t * ptd, uword n_rx_packets)
418 struct rte_mbuf **mb = ptd->mbufs;
419 u8 *flags = ptd->flags;
420 u16 *next = ptd->next;
421 u32 n_left = n_rx_packets;
425 dpdk_prefetch_buffer_data_x4 (mb + 8);
426 dpdk_prefetch_buffer_x4 (mb + 8);
428 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
429 b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
430 b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
431 b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
432 etype[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
433 etype[1] = *(u16 *) ((u8 *) mb[1] + next[1] + sizeof (vlib_buffer_t));
434 etype[2] = *(u16 *) ((u8 *) mb[2] + next[2] + sizeof (vlib_buffer_t));
435 etype[3] = *(u16 *) ((u8 *) mb[3] + next[3] + sizeof (vlib_buffer_t));
436 next[0] = dpdk_rx_next (node, etype[0], flags[0]);
437 next[1] = dpdk_rx_next (node, etype[1], flags[1]);
438 next[2] = dpdk_rx_next (node, etype[2], flags[2]);
439 next[3] = dpdk_rx_next (node, etype[3], flags[3]);
440 adv[0] = device_input_next_node_advance[next[0]];
441 adv[1] = device_input_next_node_advance[next[1]];
442 adv[2] = device_input_next_node_advance[next[2]];
443 adv[3] = device_input_next_node_advance[next[3]];
444 b[0]->current_data += adv[0];
445 b[1]->current_data += adv[1];
446 b[2]->current_data += adv[2];
447 b[3]->current_data += adv[3];
448 b[0]->current_length -= adv[0];
449 b[1]->current_length -= adv[1];
450 b[2]->current_length -= adv[2];
451 b[3]->current_length -= adv[3];
462 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
463 next[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
464 next[0] = dpdk_rx_next (node, next[0], flags[0]);
465 adv[0] = device_input_next_node_advance[next[0]];
466 b[0]->current_data += adv[0];
467 b[0]->current_length -= adv[0];
477 static_always_inline void
478 dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
482 dpdk_flow_lookup_entry_t *fle;
485 /* TODO prefetch and quad-loop */
486 for (n = 0; n < n_rx_packets; n++)
488 if ((ptd->flags[n] & (1 << DPDK_RX_F_FDIR)) == 0)
491 fle = vec_elt_at_index (xd->flow_lookup_entries,
492 ptd->mbufs[n]->hash.fdir.hi);
494 if (fle->next_index != (u16) ~ 0)
495 ptd->next[n] = fle->next_index;
497 if (fle->flow_id != ~0)
499 b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
500 b0->flow_id = fle->flow_id;
503 if (fle->buffer_advance != ~0)
505 b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
506 vlib_buffer_advance (b0, fle->buffer_advance);
511 static_always_inline u32
512 dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
513 vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
515 uword n_rx_packets = 0, n_rx_bytes;
518 u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
519 struct rte_mbuf **mb;
526 dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
528 vlib_buffer_t *bt = &ptd->buffer_template;
530 if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
533 /* get up to DPDK_RX_BURST_SZ buffers from PMD */
534 while (n_rx_packets < DPDK_RX_BURST_SZ)
536 n = rte_eth_rx_burst (xd->device_index, queue_id,
537 ptd->mbufs + n_rx_packets,
538 DPDK_RX_BURST_SZ - n_rx_packets);
545 if (n_rx_packets == 0)
548 /* Update buffer template */
549 vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
550 bt->error = node->errors[DPDK_ERROR_NONE];
551 /* as DPDK is allocating empty buffers from mempool provided before interface
552 start for each queue, it is safe to store this in the template */
553 bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
555 /* receive burst of packets from DPDK PMD */
556 if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
559 next_index = xd->per_interface_next_index;
562 /* as all packets belong to thr same interface feature arc lookup
563 can be don once and result stored in the buffer template */
564 if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
566 vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
570 if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
571 n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
573 n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
575 if (PREDICT_FALSE (known_next))
577 for (n = 0; n < n_rx_packets; n++)
578 ptd->next[n] = next_index;
580 vnet_buffer (bt)->feature_arc_index = 0;
581 bt->current_config_index = 0;
584 dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets);
586 /* flow offload - process if rx flow offlaod enabled and at least one packet
588 if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) &&
589 (or_flags & (1 << DPDK_RX_F_FDIR))))
590 dpdk_process_flow_offload (xd, ptd, n_rx_packets);
592 /* is at least one packet marked as ip4 checksum bad? */
593 if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD)))
594 for (n = 0; n < n_rx_packets; n++)
596 if ((ptd->flags[n] & (1 << DPDK_RX_F_CKSUM_BAD)) == 0)
598 if (ptd->next[n] != VNET_DEVICE_INPUT_NEXT_IP4_INPUT)
601 b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
602 b0->error = node->errors[DPDK_ERROR_IP_CHECKSUM_ERROR];
603 ptd->next[n] = VNET_DEVICE_INPUT_NEXT_DROP;
606 /* enqueue buffers to the next node */
607 dpdk_mbufs_to_buffer_indices (vm, ptd->mbufs, ptd->buffers, n_rx_packets);
608 n_left = n_rx_packets;
610 buffers = ptd->buffers;
616 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
617 #ifdef CLIB_HAVE_VEC256
618 while (n_left >= 16 && n_left_to_next >= 16)
620 u16x16 next16 = u16x16_load_unaligned (next);
621 if (u16x16_is_all_equal (next16, next_index))
623 clib_memcpy (to_next, buffers, 16 * sizeof (u32));
625 n_left_to_next -= 16;
633 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
637 vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
638 n_left_to_next, buffers[0],
639 buffers[1], buffers[2],
640 buffers[3], next[0], next[1],
650 while (n_left >= 4 && n_left_to_next >= 4)
652 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
656 vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
657 n_left_to_next, buffers[0],
658 buffers[1], buffers[2], buffers[3],
659 next[0], next[1], next[2],
667 while (n_left && n_left_to_next)
669 clib_memcpy (to_next, buffers, 1 * sizeof (u32));
672 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
673 n_left_to_next, buffers[0],
681 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
684 /* packet trace if enabled */
685 if ((n_trace = vlib_get_trace_count (vm, node)))
687 n_left = n_rx_packets;
688 buffers = ptd->buffers;
691 while (n_trace && n_left)
693 b0 = vlib_get_buffer (vm, buffers[0]);
694 vlib_trace_buffer (vm, node, next[0], b0, /* follow_chain */ 0);
696 dpdk_rx_trace_t *t0 = vlib_add_trace (vm, node, b0, sizeof t0[0]);
697 t0->queue_index = queue_id;
698 t0->device_index = xd->device_index;
699 t0->buffer_index = vlib_get_buffer_index (vm, b0);
701 clib_memcpy (&t0->mb, mb[0], sizeof t0->mb);
702 clib_memcpy (&t0->buffer, b0, sizeof b0[0] - sizeof b0->pre_data);
703 clib_memcpy (t0->buffer.pre_data, b0->data,
704 sizeof t0->buffer.pre_data);
705 clib_memcpy (&t0->data, mb[0]->buf_addr + mb[0]->data_off,
713 vlib_set_trace_count (vm, node, n_trace);
716 vlib_increment_combined_counter
717 (vnet_get_main ()->interface_main.combined_sw_if_counters
718 + VNET_INTERFACE_COUNTER_RX, thread_index, xd->sw_if_index,
719 n_rx_packets, n_rx_bytes);
721 vnet_device_increment_rx_packets (thread_index, n_rx_packets);
726 uword CLIB_CPU_OPTIMIZED
727 CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node,
730 dpdk_main_t *dm = &dpdk_main;
732 uword n_rx_packets = 0;
733 vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
734 vnet_device_and_queue_t *dq;
735 u32 thread_index = node->thread_index;
738 * Poll all devices on this cpu for input/interrupts.
741 foreach_device_and_queue (dq, rt->devices_and_queues)
743 xd = vec_elt_at_index(dm->devices, dq->dev_instance);
744 if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE))
745 continue; /* Do not poll slave to a bonded interface */
746 n_rx_packets += dpdk_device_input (vm, dm, xd, node, thread_index,
751 poll_rate_limit (dm);
756 #ifndef CLIB_MULTIARCH_VARIANT
758 VLIB_REGISTER_NODE (dpdk_input_node) = {
759 .function = dpdk_input,
760 .type = VLIB_NODE_TYPE_INPUT,
761 .name = "dpdk-input",
762 .sibling_of = "device-input",
764 /* Will be enabled if/when hardware is detected. */
765 .state = VLIB_NODE_STATE_DISABLED,
767 .format_buffer = format_ethernet_header_with_length,
768 .format_trace = format_dpdk_rx_trace,
770 .n_errors = DPDK_N_ERROR,
771 .error_strings = dpdk_error_strings,
775 vlib_node_function_t __clib_weak dpdk_input_avx512;
776 vlib_node_function_t __clib_weak dpdk_input_avx2;
779 static void __clib_constructor
780 dpdk_input_multiarch_select (void)
782 if (dpdk_input_avx512 && clib_cpu_supports_avx512f ())
783 dpdk_input_node.function = dpdk_input_avx512;
784 else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ())
785 dpdk_input_node.function = dpdk_input_avx2;
791 * fd.io coding-style-patch-verification: ON
794 * eval: (c-set-style "gnu")