2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/xxhash.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <dpdk/device/dpdk.h>
23 #include <vnet/classify/vnet_classify.h>
24 #include <vnet/mpls/packet.h>
25 #include <vnet/handoff.h>
26 #include <vnet/devices/devices.h>
27 #include <vnet/feature/feature.h>
29 #include <dpdk/device/dpdk_priv.h>
31 #ifndef CLIB_MULTIARCH_VARIANT
32 static char *dpdk_error_strings[] = {
39 STATIC_ASSERT (VNET_DEVICE_INPUT_NEXT_IP4_INPUT - 1 ==
40 VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT,
41 "IP4_INPUT must follow IP4_NCS_INPUT");
45 DPDK_RX_F_CKSUM_GOOD = 7,
46 DPDK_RX_F_CKSUM_BAD = 4,
50 /* currently we are just copying bit positions from DPDK, but that
51 might change in future, in case we strart to be interested in something
52 stored in upper bytes. Curently we store only lower byte for perf reasons */
53 STATIC_ASSERT (1 << DPDK_RX_F_CKSUM_GOOD == PKT_RX_IP_CKSUM_GOOD, "");
54 STATIC_ASSERT (1 << DPDK_RX_F_CKSUM_BAD == PKT_RX_IP_CKSUM_BAD, "");
55 STATIC_ASSERT (1 << DPDK_RX_F_FDIR == PKT_RX_FDIR, "");
56 STATIC_ASSERT ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) <
57 256, "dpdk flags not un lower byte, fix needed");
60 dpdk_rx_next (vlib_node_runtime_t * node, u16 etype, u8 flags)
62 if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)))
64 /* keep it branchless */
65 u32 is_good = (flags >> DPDK_RX_F_CKSUM_GOOD) & 1;
66 return VNET_DEVICE_INPUT_NEXT_IP4_INPUT - is_good;
68 else if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)))
69 return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
70 else if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)))
71 return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
73 return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
76 static_always_inline uword
77 dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
78 struct rte_mbuf * mb, vlib_buffer_free_list_t * fl)
81 struct rte_mbuf *mb_seg = 0;
82 vlib_buffer_t *b_seg, *b_chain = 0;
89 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
90 b->total_length_not_including_first_buffer = 0;
92 while (nb_seg < mb->nb_segs)
96 b_seg = vlib_buffer_from_rte_mbuf (mb_seg);
97 vlib_buffer_init_for_free_list (b_seg, fl);
99 ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
100 ASSERT (b_seg->current_data == 0);
103 * The driver (e.g. virtio) may not put the packet data at the start
104 * of the segment, so don't assume b_seg->current_data == 0 is correct.
106 b_seg->current_data =
107 (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data;
109 b_seg->current_length = mb_seg->data_len;
110 b->total_length_not_including_first_buffer += mb_seg->data_len;
112 b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT;
113 b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg);
116 mb_seg = mb_seg->next;
119 return b->total_length_not_including_first_buffer;
122 static_always_inline void
123 dpdk_prefetch_mbuf_x4 (struct rte_mbuf *mb[])
125 CLIB_PREFETCH (mb[0], CLIB_CACHE_LINE_BYTES, LOAD);
126 CLIB_PREFETCH (mb[1], CLIB_CACHE_LINE_BYTES, LOAD);
127 CLIB_PREFETCH (mb[2], CLIB_CACHE_LINE_BYTES, LOAD);
128 CLIB_PREFETCH (mb[3], CLIB_CACHE_LINE_BYTES, LOAD);
131 static_always_inline void
132 dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
135 b = vlib_buffer_from_rte_mbuf (mb[0]);
136 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
137 b = vlib_buffer_from_rte_mbuf (mb[1]);
138 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
139 b = vlib_buffer_from_rte_mbuf (mb[2]);
140 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
141 b = vlib_buffer_from_rte_mbuf (mb[3]);
142 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
145 static_always_inline void
146 dpdk_prefetch_buffer_data_x4 (struct rte_mbuf *mb[])
149 b = vlib_buffer_from_rte_mbuf (mb[0]);
150 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
151 b = vlib_buffer_from_rte_mbuf (mb[1]);
152 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
153 b = vlib_buffer_from_rte_mbuf (mb[2]);
154 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
155 b = vlib_buffer_from_rte_mbuf (mb[3]);
156 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
160 poll_rate_limit (dpdk_main_t * dm)
162 /* Limit the poll rate by sleeping for N msec between polls */
163 if (PREDICT_FALSE (dm->poll_sleep_usec != 0))
165 struct timespec ts, tsrem;
168 ts.tv_nsec = 1000 * dm->poll_sleep_usec;
170 while (nanosleep (&ts, &tsrem) < 0)
177 /** \brief Main DPDK input node
180 This is the main DPDK input node: across each assigned interface,
181 call rte_eth_rx_burst(...) or similar to obtain a vector of
182 packets to process. Derive @c vlib_buffer_t metadata from
183 <code>struct rte_mbuf</code> metadata,
184 Depending on the resulting metadata: adjust <code>b->current_data,
185 b->current_length </code> and dispatch directly to
186 ip4-input-no-checksum, or ip6-input. Trace the packet if required.
188 @param vm vlib_main_t corresponding to the current thread
189 @param node vlib_node_runtime_t
190 @param f vlib_frame_t input-node, not used.
192 @par Graph mechanics: buffer metadata, next index usage
195 - <code>struct rte_mbuf mb->ol_flags</code>
196 - PKT_RX_IP_CKSUM_BAD
199 - <code>b->error</code> if the packet is to be dropped immediately
200 - <code>b->current_data, b->current_length</code>
201 - adjusted as needed to skip the L2 header in direct-dispatch cases
202 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
203 - rx interface sw_if_index
204 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
205 - required by ipX-lookup
206 - <code>b->flags</code>
207 - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
210 - Static arcs to: error-drop, ethernet-input,
211 ip4-input-no-checksum, ip6-input, mpls-input
212 - per-interface redirection, controlled by
213 <code>xd->per_interface_next_index</code>
216 static_always_inline void
217 dpdk_mbuf_to_buffer_index_x4 (vlib_main_t * vm, struct rte_mbuf **mb,
220 #ifdef CLIB_HAVE_VEC256
221 vlib_buffer_main_t *bm = &buffer_main;
222 u64x4 v = *(u64x4 *) mb;
223 u32x8 v2, mask = { 0, 2, 4, 6, 1, 3, 5, 7 };
225 /* load 4 pointers into 256-bit register */
226 v = u64x4_load_unaligned (mb);
228 /* vlib_buffer_t is straight after rte_mbuf so advance all 4
229 pointers for size of rte_mbuf */
230 v += u64x4_splat (sizeof (struct rte_mbuf));
232 /* calculate 4 buffer indices in paralled */
233 v = (v - u64x4_splat (bm->buffer_mem_start)) >> CLIB_LOG2_CACHE_LINE_BYTES;
235 /* permute 256-bit register so lower u32s of each buffer index are
236 * placed into lower 128-bits */
237 v2 = u32x8_permute ((u32x8) v, mask);
239 /* extract lower 128-bits and save them to the array of buffer indices */
240 u32x4_store_unaligned (u32x8_extract_lo (v2), buffers);
242 /* equivalent non-nector implementation */
243 buffers[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
244 buffers[1] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[1]));
245 buffers[2] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[2]));
246 buffers[3] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[3]));
250 static_always_inline u8
251 dpdk_ol_flags_extract (struct rte_mbuf **mb, u8 * flags, int count)
255 for (i = 0; i < count; i++)
257 /* all flags we are interested in are in lower 8 bits but
259 flags[i] = (u8) mb[i]->ol_flags;
265 static_always_inline uword
266 dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
267 uword n_rx_packets, int maybe_multiseg, u8 * or_flagsp)
269 u32 n_left = n_rx_packets;
271 vlib_buffer_free_list_t *fl;
272 struct rte_mbuf **mb = ptd->mbufs;
275 u8 *flags, or_flags = 0;
278 fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
286 CLIB_PREFETCH (mb + 8, CLIB_CACHE_LINE_BYTES, LOAD);
288 dpdk_prefetch_buffer_x4 (mb + 4);
290 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
291 b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
292 b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
293 b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
295 clib_memcpy64_x4 (b[0], b[1], b[2], b[3], &ptd->buffer_template);
297 dpdk_prefetch_mbuf_x4 (mb + 4);
299 or_flags |= dpdk_ol_flags_extract (mb, flags, 4);
302 /* we temporary store relative offset of ethertype into next[x]
303 so we can prefetch and get it faster later */
305 off = mb[0]->data_off;
306 next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
307 off -= RTE_PKTMBUF_HEADROOM;
308 vnet_buffer (b[0])->l2_hdr_offset = off;
309 b[0]->current_data = off;
311 off = mb[0]->data_off;
312 next[1] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
313 off -= RTE_PKTMBUF_HEADROOM;
314 vnet_buffer (b[1])->l2_hdr_offset = off;
315 b[1]->current_data = off;
317 off = mb[0]->data_off;
318 next[2] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
319 off -= RTE_PKTMBUF_HEADROOM;
320 vnet_buffer (b[2])->l2_hdr_offset = off;
321 b[2]->current_data = off;
323 off = mb[0]->data_off;
324 next[3] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
325 off -= RTE_PKTMBUF_HEADROOM;
326 vnet_buffer (b[3])->l2_hdr_offset = off;
327 b[3]->current_data = off;
329 b[0]->current_length = mb[0]->data_len;
330 b[1]->current_length = mb[1]->data_len;
331 b[2]->current_length = mb[2]->data_len;
332 b[3]->current_length = mb[3]->data_len;
334 n_bytes += mb[0]->data_len;
335 n_bytes += mb[1]->data_len;
336 n_bytes += mb[2]->data_len;
337 n_bytes += mb[3]->data_len;
341 n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
342 n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], fl);
343 n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], fl);
344 n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], fl);
347 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
348 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
349 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
350 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
360 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
361 clib_memcpy (b[0], &ptd->buffer_template, 64);
362 or_flags |= dpdk_ol_flags_extract (mb, flags, 1);
365 off = mb[0]->data_off;
366 next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
367 off -= RTE_PKTMBUF_HEADROOM;
368 vnet_buffer (b[0])->l2_hdr_offset = off;
369 b[0]->current_data = off;
370 b[0]->current_length = mb[0]->data_len;
371 n_bytes += mb[0]->data_len;
373 n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
374 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
382 *or_flagsp = or_flags;
386 static_always_inline void
387 dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node,
388 dpdk_per_thread_data_t * ptd, uword n_rx_packets)
393 struct rte_mbuf **mb = ptd->mbufs;
394 u8 *flags = ptd->flags;
395 u16 *next = ptd->next;
396 u32 n_left = n_rx_packets;
400 dpdk_prefetch_buffer_data_x4 (mb + 8);
401 dpdk_prefetch_buffer_x4 (mb + 8);
403 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
404 b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
405 b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
406 b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
407 etype[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
408 etype[1] = *(u16 *) ((u8 *) mb[1] + next[1] + sizeof (vlib_buffer_t));
409 etype[2] = *(u16 *) ((u8 *) mb[2] + next[2] + sizeof (vlib_buffer_t));
410 etype[3] = *(u16 *) ((u8 *) mb[3] + next[3] + sizeof (vlib_buffer_t));
411 next[0] = dpdk_rx_next (node, etype[0], flags[0]);
412 next[1] = dpdk_rx_next (node, etype[1], flags[1]);
413 next[2] = dpdk_rx_next (node, etype[2], flags[2]);
414 next[3] = dpdk_rx_next (node, etype[3], flags[3]);
415 adv[0] = device_input_next_node_advance[next[0]];
416 adv[1] = device_input_next_node_advance[next[1]];
417 adv[2] = device_input_next_node_advance[next[2]];
418 adv[3] = device_input_next_node_advance[next[3]];
419 b[0]->current_data += adv[0];
420 b[1]->current_data += adv[1];
421 b[2]->current_data += adv[2];
422 b[3]->current_data += adv[3];
423 b[0]->current_length -= adv[0];
424 b[1]->current_length -= adv[1];
425 b[2]->current_length -= adv[2];
426 b[3]->current_length -= adv[3];
437 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
438 next[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
439 next[0] = dpdk_rx_next (node, next[0], flags[0]);
440 adv[0] = device_input_next_node_advance[next[0]];
441 b[0]->current_data += adv[0];
442 b[0]->current_length -= adv[0];
452 static_always_inline u32
453 dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
454 vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
456 uword n_rx_packets = 0, n_rx_bytes;
459 u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
460 struct rte_mbuf **mb;
467 dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
469 vlib_buffer_t *bt = &ptd->buffer_template;
471 if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
474 /* get up to DPDK_RX_BURST_SZ buffers from PMD */
475 while (n_rx_packets < DPDK_RX_BURST_SZ)
477 n = rte_eth_rx_burst (xd->device_index, queue_id,
478 ptd->mbufs + n_rx_packets,
479 DPDK_RX_BURST_SZ - n_rx_packets);
486 if (n_rx_packets == 0)
489 /* Update buffer template */
490 vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
491 bt->error = node->errors[DPDK_ERROR_NONE];
492 /* as DPDK is allocating empty buffers from mempool provided before interface
493 start for each queue, it is safe to store this in the template */
494 bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
496 /* receive burst of packets from DPDK PMD */
497 if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
500 next_index = xd->per_interface_next_index;
503 /* as all packets belong to thr same interface feature arc lookup
504 can be don once and result stored in the buffer template */
505 if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
507 vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
511 if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
512 n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
514 n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
516 if (PREDICT_FALSE (known_next))
518 for (n = 0; n < n_rx_packets; n++)
519 ptd->next[n] = next_index;
521 vnet_buffer (bt)->feature_arc_index = 0;
522 bt->current_config_index = 0;
525 dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets);
527 /* is at least one packet marked as ip4 checksum bad? */
528 if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD)))
529 for (n = 0; n < n_rx_packets; n++)
531 if ((ptd->flags[n] & (1 << DPDK_RX_F_CKSUM_BAD)) == 0)
533 if (ptd->next[n] != VNET_DEVICE_INPUT_NEXT_IP4_INPUT)
536 b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
537 b0->error = node->errors[DPDK_ERROR_IP_CHECKSUM_ERROR];
538 ptd->next[n] = VNET_DEVICE_INPUT_NEXT_DROP;
541 /* enqueue buffers to the next node */
542 n_left = n_rx_packets;
544 buffers = ptd->buffers;
550 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
551 #ifdef CLIB_HAVE_VEC256
552 while (n_left >= 16 && n_left_to_next >= 16)
554 u16x16 next16 = u16x16_load_unaligned (next);
555 if (u16x16_is_all_equal (next16, next_index))
557 dpdk_mbuf_to_buffer_index_x4 (vm, mb, buffers);
558 dpdk_mbuf_to_buffer_index_x4 (vm, mb + 4, buffers + 4);
559 dpdk_mbuf_to_buffer_index_x4 (vm, mb + 8, buffers + 8);
560 dpdk_mbuf_to_buffer_index_x4 (vm, mb + 12, buffers + 12);
561 clib_memcpy (to_next, buffers, 16 * sizeof (u32));
563 n_left_to_next -= 16;
571 dpdk_mbuf_to_buffer_index_x4 (vm, mb, buffers);
572 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
576 vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
577 n_left_to_next, buffers[0],
578 buffers[1], buffers[2],
579 buffers[3], next[0], next[1],
589 while (n_left >= 4 && n_left_to_next >= 4)
591 dpdk_mbuf_to_buffer_index_x4 (vm, mb, buffers);
592 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
596 vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
597 n_left_to_next, buffers[0],
598 buffers[1], buffers[2], buffers[3],
599 next[0], next[1], next[2],
607 while (n_left && n_left_to_next)
609 to_next[0] = buffers[0] =
610 vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
613 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
614 n_left_to_next, buffers[0],
622 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
625 /* packet trace if enabled */
626 if ((n_trace = vlib_get_trace_count (vm, node)))
628 n_left = n_rx_packets;
629 buffers = ptd->buffers;
632 while (n_trace && n_left)
634 b0 = vlib_get_buffer (vm, buffers[0]);
635 vlib_trace_buffer (vm, node, next[0], b0, /* follow_chain */ 0);
637 dpdk_rx_trace_t *t0 = vlib_add_trace (vm, node, b0, sizeof t0[0]);
638 t0->queue_index = queue_id;
639 t0->device_index = xd->device_index;
640 t0->buffer_index = vlib_get_buffer_index (vm, b0);
642 clib_memcpy (&t0->mb, mb[0], sizeof t0->mb);
643 clib_memcpy (&t0->buffer, b0, sizeof b0[0] - sizeof b0->pre_data);
644 clib_memcpy (t0->buffer.pre_data, b0->data,
645 sizeof t0->buffer.pre_data);
646 clib_memcpy (&t0->data, mb[0]->buf_addr + mb[0]->data_off,
654 vlib_set_trace_count (vm, node, n_trace);
657 vlib_increment_combined_counter
658 (vnet_get_main ()->interface_main.combined_sw_if_counters
659 + VNET_INTERFACE_COUNTER_RX, thread_index, xd->sw_if_index,
660 n_rx_packets, n_rx_bytes);
662 vnet_device_increment_rx_packets (thread_index, n_rx_packets);
667 uword CLIB_CPU_OPTIMIZED
668 CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node,
671 dpdk_main_t *dm = &dpdk_main;
673 uword n_rx_packets = 0;
674 vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
675 vnet_device_and_queue_t *dq;
676 u32 thread_index = node->thread_index;
679 * Poll all devices on this cpu for input/interrupts.
682 foreach_device_and_queue (dq, rt->devices_and_queues)
684 xd = vec_elt_at_index(dm->devices, dq->dev_instance);
685 if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE))
686 continue; /* Do not poll slave to a bonded interface */
687 n_rx_packets += dpdk_device_input (vm, dm, xd, node, thread_index,
692 poll_rate_limit (dm);
697 #ifndef CLIB_MULTIARCH_VARIANT
699 VLIB_REGISTER_NODE (dpdk_input_node) = {
700 .function = dpdk_input,
701 .type = VLIB_NODE_TYPE_INPUT,
702 .name = "dpdk-input",
703 .sibling_of = "device-input",
705 /* Will be enabled if/when hardware is detected. */
706 .state = VLIB_NODE_STATE_DISABLED,
708 .format_buffer = format_ethernet_header_with_length,
709 .format_trace = format_dpdk_rx_trace,
711 .n_errors = DPDK_N_ERROR,
712 .error_strings = dpdk_error_strings,
716 vlib_node_function_t __clib_weak dpdk_input_avx512;
717 vlib_node_function_t __clib_weak dpdk_input_avx2;
720 static void __clib_constructor
721 dpdk_input_multiarch_select (void)
723 if (dpdk_input_avx512 && clib_cpu_supports_avx512f ())
724 dpdk_input_node.function = dpdk_input_avx512;
725 else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ())
726 dpdk_input_node.function = dpdk_input_avx2;
732 * fd.io coding-style-patch-verification: ON
735 * eval: (c-set-style "gnu")