2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/xxhash.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <dpdk/device/dpdk.h>
23 #include <vnet/classify/vnet_classify.h>
24 #include <vnet/mpls/packet.h>
25 #include <vnet/handoff.h>
26 #include <vnet/devices/devices.h>
27 #include <vnet/feature/feature.h>
29 #include <dpdk/device/dpdk_priv.h>
31 #ifndef CLIB_MULTIARCH_VARIANT
32 static char *dpdk_error_strings[] = {
39 STATIC_ASSERT (VNET_DEVICE_INPUT_NEXT_IP4_INPUT - 1 ==
40 VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT,
41 "IP4_INPUT must follow IP4_NCS_INPUT");
45 DPDK_RX_F_CKSUM_GOOD = 7,
46 DPDK_RX_F_CKSUM_BAD = 4,
50 /* currently we are just copying bit positions from DPDK, but that
51 might change in future, in case we strart to be interested in something
52 stored in upper bytes. Curently we store only lower byte for perf reasons */
53 STATIC_ASSERT (1 << DPDK_RX_F_CKSUM_GOOD == PKT_RX_IP_CKSUM_GOOD, "");
54 STATIC_ASSERT (1 << DPDK_RX_F_CKSUM_BAD == PKT_RX_IP_CKSUM_BAD, "");
55 STATIC_ASSERT (1 << DPDK_RX_F_FDIR == PKT_RX_FDIR, "");
56 STATIC_ASSERT ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) <
57 256, "dpdk flags not un lower byte, fix needed");
60 dpdk_rx_next (vlib_node_runtime_t * node, u16 etype, u8 flags)
62 if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)))
64 /* keep it branchless */
65 u32 is_good = (flags >> DPDK_RX_F_CKSUM_GOOD) & 1;
66 return VNET_DEVICE_INPUT_NEXT_IP4_INPUT - is_good;
68 else if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)))
69 return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
70 else if (PREDICT_TRUE (etype == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)))
71 return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
73 return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
76 static_always_inline uword
77 dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
78 struct rte_mbuf * mb, vlib_buffer_free_list_t * fl)
81 struct rte_mbuf *mb_seg = 0;
82 vlib_buffer_t *b_seg, *b_chain = 0;
89 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
90 b->total_length_not_including_first_buffer = 0;
92 while (nb_seg < mb->nb_segs)
96 b_seg = vlib_buffer_from_rte_mbuf (mb_seg);
97 vlib_buffer_init_for_free_list (b_seg, fl);
99 ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
100 ASSERT (b_seg->current_data == 0);
103 * The driver (e.g. virtio) may not put the packet data at the start
104 * of the segment, so don't assume b_seg->current_data == 0 is correct.
106 b_seg->current_data =
107 (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data;
109 b_seg->current_length = mb_seg->data_len;
110 b->total_length_not_including_first_buffer += mb_seg->data_len;
112 b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT;
113 b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg);
116 mb_seg = mb_seg->next;
119 return b->total_length_not_including_first_buffer;
122 static_always_inline void
123 dpdk_prefetch_mbuf_x4 (struct rte_mbuf *mb[])
125 CLIB_PREFETCH (mb[0], CLIB_CACHE_LINE_BYTES, LOAD);
126 CLIB_PREFETCH (mb[1], CLIB_CACHE_LINE_BYTES, LOAD);
127 CLIB_PREFETCH (mb[2], CLIB_CACHE_LINE_BYTES, LOAD);
128 CLIB_PREFETCH (mb[3], CLIB_CACHE_LINE_BYTES, LOAD);
131 static_always_inline void
132 dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
135 b = vlib_buffer_from_rte_mbuf (mb[0]);
136 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
137 b = vlib_buffer_from_rte_mbuf (mb[1]);
138 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
139 b = vlib_buffer_from_rte_mbuf (mb[2]);
140 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
141 b = vlib_buffer_from_rte_mbuf (mb[3]);
142 CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
145 static_always_inline void
146 dpdk_prefetch_buffer_data_x4 (struct rte_mbuf *mb[])
149 b = vlib_buffer_from_rte_mbuf (mb[0]);
150 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
151 b = vlib_buffer_from_rte_mbuf (mb[1]);
152 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
153 b = vlib_buffer_from_rte_mbuf (mb[2]);
154 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
155 b = vlib_buffer_from_rte_mbuf (mb[3]);
156 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
160 poll_rate_limit (dpdk_main_t * dm)
162 /* Limit the poll rate by sleeping for N msec between polls */
163 if (PREDICT_FALSE (dm->poll_sleep_usec != 0))
165 struct timespec ts, tsrem;
168 ts.tv_nsec = 1000 * dm->poll_sleep_usec;
170 while (nanosleep (&ts, &tsrem) < 0)
177 /** \brief Main DPDK input node
180 This is the main DPDK input node: across each assigned interface,
181 call rte_eth_rx_burst(...) or similar to obtain a vector of
182 packets to process. Derive @c vlib_buffer_t metadata from
183 <code>struct rte_mbuf</code> metadata,
184 Depending on the resulting metadata: adjust <code>b->current_data,
185 b->current_length </code> and dispatch directly to
186 ip4-input-no-checksum, or ip6-input. Trace the packet if required.
188 @param vm vlib_main_t corresponding to the current thread
189 @param node vlib_node_runtime_t
190 @param f vlib_frame_t input-node, not used.
192 @par Graph mechanics: buffer metadata, next index usage
195 - <code>struct rte_mbuf mb->ol_flags</code>
196 - PKT_RX_IP_CKSUM_BAD
199 - <code>b->error</code> if the packet is to be dropped immediately
200 - <code>b->current_data, b->current_length</code>
201 - adjusted as needed to skip the L2 header in direct-dispatch cases
202 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
203 - rx interface sw_if_index
204 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
205 - required by ipX-lookup
206 - <code>b->flags</code>
207 - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
210 - Static arcs to: error-drop, ethernet-input,
211 ip4-input-no-checksum, ip6-input, mpls-input
212 - per-interface redirection, controlled by
213 <code>xd->per_interface_next_index</code>
216 static_always_inline void
217 dpdk_mbufs_to_buffer_indices (vlib_main_t * vm, struct rte_mbuf **mb,
218 u32 * bi, uword n_left)
220 #ifdef CLIB_HAVE_VEC256
221 u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 };
222 u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start -
223 sizeof (struct rte_mbuf));
228 #ifdef CLIB_HAVE_VEC256
229 /* load 4 pointers into 256-bit register */
230 u64x4 v0 = u64x4_load_unaligned (mb);
231 u64x4 v1 = u64x4_load_unaligned (mb + 4);
234 /* calculate 4 buffer indices in parallel
235 vlib_buffer_t is straight after rte_mbuf so advance all 4
236 pointers for size of rte_mbuf */
240 v0 >>= CLIB_LOG2_CACHE_LINE_BYTES;
241 v1 >>= CLIB_LOG2_CACHE_LINE_BYTES;
243 /* permute 256-bit register so lower u32s of each buffer index are
244 * placed into lower 128-bits */
245 v2 = u32x8_permute ((u32x8) v0, mask);
246 v3 = u32x8_permute ((u32x8) v1, mask);
248 /* extract lower 128-bits and save them to the array of buffer indices */
249 u32x4_store_unaligned (u32x8_extract_lo (v2), bi);
250 u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4);
252 /* equivalent non-nector implementation */
253 bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
254 bi[1] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[1]));
255 bi[2] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[2]));
256 bi[3] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[3]));
257 bi[4] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[4]));
258 bi[5] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[5]));
259 bi[6] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[6]));
260 bi[7] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[7]));
268 bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
275 static_always_inline u8
276 dpdk_ol_flags_extract (struct rte_mbuf **mb, u8 * flags, int count)
280 for (i = 0; i < count; i++)
282 /* all flags we are interested in are in lower 8 bits but
284 flags[i] = (u8) mb[i]->ol_flags;
290 static_always_inline uword
291 dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
292 uword n_rx_packets, int maybe_multiseg, u8 * or_flagsp)
294 u32 n_left = n_rx_packets;
296 vlib_buffer_free_list_t *fl;
297 struct rte_mbuf **mb = ptd->mbufs;
300 u8 *flags, or_flags = 0;
303 fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
311 CLIB_PREFETCH (mb + 8, CLIB_CACHE_LINE_BYTES, LOAD);
313 dpdk_prefetch_buffer_x4 (mb + 4);
315 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
316 b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
317 b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
318 b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
320 clib_memcpy64_x4 (b[0], b[1], b[2], b[3], &ptd->buffer_template);
322 dpdk_prefetch_mbuf_x4 (mb + 4);
324 or_flags |= dpdk_ol_flags_extract (mb, flags, 4);
327 /* we temporary store relative offset of ethertype into next[x]
328 so we can prefetch and get it faster later */
330 off = mb[0]->data_off;
331 next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
332 off -= RTE_PKTMBUF_HEADROOM;
333 vnet_buffer (b[0])->l2_hdr_offset = off;
334 b[0]->current_data = off;
336 off = mb[1]->data_off;
337 next[1] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
338 off -= RTE_PKTMBUF_HEADROOM;
339 vnet_buffer (b[1])->l2_hdr_offset = off;
340 b[1]->current_data = off;
342 off = mb[2]->data_off;
343 next[2] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
344 off -= RTE_PKTMBUF_HEADROOM;
345 vnet_buffer (b[2])->l2_hdr_offset = off;
346 b[2]->current_data = off;
348 off = mb[3]->data_off;
349 next[3] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
350 off -= RTE_PKTMBUF_HEADROOM;
351 vnet_buffer (b[3])->l2_hdr_offset = off;
352 b[3]->current_data = off;
354 b[0]->current_length = mb[0]->data_len;
355 b[1]->current_length = mb[1]->data_len;
356 b[2]->current_length = mb[2]->data_len;
357 b[3]->current_length = mb[3]->data_len;
359 n_bytes += mb[0]->data_len;
360 n_bytes += mb[1]->data_len;
361 n_bytes += mb[2]->data_len;
362 n_bytes += mb[3]->data_len;
366 n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
367 n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], fl);
368 n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], fl);
369 n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], fl);
372 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
373 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
374 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
375 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
385 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
386 clib_memcpy (b[0], &ptd->buffer_template, 64);
387 or_flags |= dpdk_ol_flags_extract (mb, flags, 1);
390 off = mb[0]->data_off;
391 next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
392 off -= RTE_PKTMBUF_HEADROOM;
393 vnet_buffer (b[0])->l2_hdr_offset = off;
394 b[0]->current_data = off;
395 b[0]->current_length = mb[0]->data_len;
396 n_bytes += mb[0]->data_len;
398 n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
399 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
407 *or_flagsp = or_flags;
411 static_always_inline void
412 dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node,
413 dpdk_per_thread_data_t * ptd, uword n_rx_packets)
418 struct rte_mbuf **mb = ptd->mbufs;
419 u8 *flags = ptd->flags;
420 u16 *next = ptd->next;
421 u32 n_left = n_rx_packets;
425 dpdk_prefetch_buffer_data_x4 (mb + 8);
426 dpdk_prefetch_buffer_x4 (mb + 8);
428 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
429 b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
430 b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
431 b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
432 etype[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
433 etype[1] = *(u16 *) ((u8 *) mb[1] + next[1] + sizeof (vlib_buffer_t));
434 etype[2] = *(u16 *) ((u8 *) mb[2] + next[2] + sizeof (vlib_buffer_t));
435 etype[3] = *(u16 *) ((u8 *) mb[3] + next[3] + sizeof (vlib_buffer_t));
436 next[0] = dpdk_rx_next (node, etype[0], flags[0]);
437 next[1] = dpdk_rx_next (node, etype[1], flags[1]);
438 next[2] = dpdk_rx_next (node, etype[2], flags[2]);
439 next[3] = dpdk_rx_next (node, etype[3], flags[3]);
440 adv[0] = device_input_next_node_advance[next[0]];
441 adv[1] = device_input_next_node_advance[next[1]];
442 adv[2] = device_input_next_node_advance[next[2]];
443 adv[3] = device_input_next_node_advance[next[3]];
444 b[0]->current_data += adv[0];
445 b[1]->current_data += adv[1];
446 b[2]->current_data += adv[2];
447 b[3]->current_data += adv[3];
448 b[0]->current_length -= adv[0];
449 b[1]->current_length -= adv[1];
450 b[2]->current_length -= adv[2];
451 b[3]->current_length -= adv[3];
462 b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
463 next[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
464 next[0] = dpdk_rx_next (node, next[0], flags[0]);
465 adv[0] = device_input_next_node_advance[next[0]];
466 b[0]->current_data += adv[0];
467 b[0]->current_length -= adv[0];
477 static_always_inline u32
478 dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
479 vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
481 uword n_rx_packets = 0, n_rx_bytes;
484 u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
485 struct rte_mbuf **mb;
492 dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
494 vlib_buffer_t *bt = &ptd->buffer_template;
496 if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
499 /* get up to DPDK_RX_BURST_SZ buffers from PMD */
500 while (n_rx_packets < DPDK_RX_BURST_SZ)
502 n = rte_eth_rx_burst (xd->device_index, queue_id,
503 ptd->mbufs + n_rx_packets,
504 DPDK_RX_BURST_SZ - n_rx_packets);
511 if (n_rx_packets == 0)
514 /* Update buffer template */
515 vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
516 bt->error = node->errors[DPDK_ERROR_NONE];
517 /* as DPDK is allocating empty buffers from mempool provided before interface
518 start for each queue, it is safe to store this in the template */
519 bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
521 /* receive burst of packets from DPDK PMD */
522 if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
525 next_index = xd->per_interface_next_index;
528 /* as all packets belong to thr same interface feature arc lookup
529 can be don once and result stored in the buffer template */
530 if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
532 vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
536 if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
537 n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
539 n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
541 if (PREDICT_FALSE (known_next))
543 for (n = 0; n < n_rx_packets; n++)
544 ptd->next[n] = next_index;
546 vnet_buffer (bt)->feature_arc_index = 0;
547 bt->current_config_index = 0;
550 dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets);
552 /* is at least one packet marked as ip4 checksum bad? */
553 if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD)))
554 for (n = 0; n < n_rx_packets; n++)
556 if ((ptd->flags[n] & (1 << DPDK_RX_F_CKSUM_BAD)) == 0)
558 if (ptd->next[n] != VNET_DEVICE_INPUT_NEXT_IP4_INPUT)
561 b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
562 b0->error = node->errors[DPDK_ERROR_IP_CHECKSUM_ERROR];
563 ptd->next[n] = VNET_DEVICE_INPUT_NEXT_DROP;
566 /* enqueue buffers to the next node */
567 dpdk_mbufs_to_buffer_indices (vm, ptd->mbufs, ptd->buffers, n_rx_packets);
568 n_left = n_rx_packets;
570 buffers = ptd->buffers;
576 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
577 #ifdef CLIB_HAVE_VEC256
578 while (n_left >= 16 && n_left_to_next >= 16)
580 u16x16 next16 = u16x16_load_unaligned (next);
581 if (u16x16_is_all_equal (next16, next_index))
583 clib_memcpy (to_next, buffers, 16 * sizeof (u32));
585 n_left_to_next -= 16;
593 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
597 vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
598 n_left_to_next, buffers[0],
599 buffers[1], buffers[2],
600 buffers[3], next[0], next[1],
610 while (n_left >= 4 && n_left_to_next >= 4)
612 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
616 vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
617 n_left_to_next, buffers[0],
618 buffers[1], buffers[2], buffers[3],
619 next[0], next[1], next[2],
627 while (n_left && n_left_to_next)
629 clib_memcpy (to_next, buffers, 1 * sizeof (u32));
632 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
633 n_left_to_next, buffers[0],
641 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
644 /* packet trace if enabled */
645 if ((n_trace = vlib_get_trace_count (vm, node)))
647 n_left = n_rx_packets;
648 buffers = ptd->buffers;
651 while (n_trace && n_left)
653 b0 = vlib_get_buffer (vm, buffers[0]);
654 vlib_trace_buffer (vm, node, next[0], b0, /* follow_chain */ 0);
656 dpdk_rx_trace_t *t0 = vlib_add_trace (vm, node, b0, sizeof t0[0]);
657 t0->queue_index = queue_id;
658 t0->device_index = xd->device_index;
659 t0->buffer_index = vlib_get_buffer_index (vm, b0);
661 clib_memcpy (&t0->mb, mb[0], sizeof t0->mb);
662 clib_memcpy (&t0->buffer, b0, sizeof b0[0] - sizeof b0->pre_data);
663 clib_memcpy (t0->buffer.pre_data, b0->data,
664 sizeof t0->buffer.pre_data);
665 clib_memcpy (&t0->data, mb[0]->buf_addr + mb[0]->data_off,
673 vlib_set_trace_count (vm, node, n_trace);
676 vlib_increment_combined_counter
677 (vnet_get_main ()->interface_main.combined_sw_if_counters
678 + VNET_INTERFACE_COUNTER_RX, thread_index, xd->sw_if_index,
679 n_rx_packets, n_rx_bytes);
681 vnet_device_increment_rx_packets (thread_index, n_rx_packets);
686 uword CLIB_CPU_OPTIMIZED
687 CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node,
690 dpdk_main_t *dm = &dpdk_main;
692 uword n_rx_packets = 0;
693 vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
694 vnet_device_and_queue_t *dq;
695 u32 thread_index = node->thread_index;
698 * Poll all devices on this cpu for input/interrupts.
701 foreach_device_and_queue (dq, rt->devices_and_queues)
703 xd = vec_elt_at_index(dm->devices, dq->dev_instance);
704 if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE))
705 continue; /* Do not poll slave to a bonded interface */
706 n_rx_packets += dpdk_device_input (vm, dm, xd, node, thread_index,
711 poll_rate_limit (dm);
716 #ifndef CLIB_MULTIARCH_VARIANT
718 VLIB_REGISTER_NODE (dpdk_input_node) = {
719 .function = dpdk_input,
720 .type = VLIB_NODE_TYPE_INPUT,
721 .name = "dpdk-input",
722 .sibling_of = "device-input",
724 /* Will be enabled if/when hardware is detected. */
725 .state = VLIB_NODE_STATE_DISABLED,
727 .format_buffer = format_ethernet_header_with_length,
728 .format_trace = format_dpdk_rx_trace,
730 .n_errors = DPDK_N_ERROR,
731 .error_strings = dpdk_error_strings,
735 vlib_node_function_t __clib_weak dpdk_input_avx512;
736 vlib_node_function_t __clib_weak dpdk_input_avx2;
739 static void __clib_constructor
740 dpdk_input_multiarch_select (void)
742 if (dpdk_input_avx512 && clib_cpu_supports_avx512f ())
743 dpdk_input_node.function = dpdk_input_avx512;
744 else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ())
745 dpdk_input_node.function = dpdk_input_avx2;
751 * fd.io coding-style-patch-verification: ON
754 * eval: (c-set-style "gnu")