+static_always_inline void
+dpdk_mbufs_to_buffer_indices (vlib_main_t * vm, struct rte_mbuf **mb,
+ u32 * bi, uword n_left)
+{
+#ifdef CLIB_HAVE_VEC256
+ u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 };
+ u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start -
+ sizeof (struct rte_mbuf));
+#endif
+
+ while (n_left >= 8)
+ {
+#ifdef CLIB_HAVE_VEC256
+ /* load 4 pointers into 256-bit register */
+ u64x4 v0 = u64x4_load_unaligned (mb);
+ u64x4 v1 = u64x4_load_unaligned (mb + 4);
+ u32x8 v2, v3;
+
+ /* calculate 4 buffer indices in parallel
+ vlib_buffer_t is straight after rte_mbuf so advance all 4
+ pointers for size of rte_mbuf */
+ v0 -= off4;
+ v1 -= off4;
+
+ v0 >>= CLIB_LOG2_CACHE_LINE_BYTES;
+ v1 >>= CLIB_LOG2_CACHE_LINE_BYTES;
+
+ /* permute 256-bit register so lower u32s of each buffer index are
+ * placed into lower 128-bits */
+ v2 = u32x8_permute ((u32x8) v0, mask);
+ v3 = u32x8_permute ((u32x8) v1, mask);
+
+ /* extract lower 128-bits and save them to the array of buffer indices */
+ u32x4_store_unaligned (u32x8_extract_lo (v2), bi);
+ u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4);
+#else
+ /* equivalent non-nector implementation */
+ bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
+ bi[1] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[1]));
+ bi[2] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[2]));
+ bi[3] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[3]));
+ bi[4] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[4]));
+ bi[5] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[4]));
+ bi[6] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[6]));
+ bi[7] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[7]));
+#endif
+ bi += 8;
+ mb += 8;
+ n_left -= 8;
+ }
+ while (n_left)
+ {
+ bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0]));
+ bi += 1;
+ mb += 1;
+ n_left -= 1;
+ }
+}
+
+static_always_inline u8
+dpdk_ol_flags_extract (struct rte_mbuf **mb, u8 * flags, int count)
+{
+ u8 rv = 0;
+ int i;
+ for (i = 0; i < count; i++)
+ {
+ /* all flags we are interested in are in lower 8 bits but
+ that might change */
+ flags[i] = (u8) mb[i]->ol_flags;
+ rv |= flags[i];
+ }
+ return rv;
+}
+
+static_always_inline uword
+dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
+ uword n_rx_packets, int maybe_multiseg, u8 * or_flagsp)
+{
+ u32 n_left = n_rx_packets;
+ vlib_buffer_t *b[4];
+ vlib_buffer_free_list_t *fl;
+ struct rte_mbuf **mb = ptd->mbufs;
+ uword n_bytes = 0;
+ i16 off;
+ u8 *flags, or_flags = 0;
+ u16 *next;
+
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ mb = ptd->mbufs;
+ flags = ptd->flags;
+ next = ptd->next;
+
+ while (n_left >= 8)
+ {
+ CLIB_PREFETCH (mb + 8, CLIB_CACHE_LINE_BYTES, LOAD);
+
+ dpdk_prefetch_buffer_x4 (mb + 4);
+
+ b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
+ b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
+ b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
+ b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
+
+ clib_memcpy64_x4 (b[0], b[1], b[2], b[3], &ptd->buffer_template);
+
+ dpdk_prefetch_mbuf_x4 (mb + 4);
+
+ or_flags |= dpdk_ol_flags_extract (mb, flags, 4);
+ flags += 4;
+
+ /* we temporary store relative offset of ethertype into next[x]
+ so we can prefetch and get it faster later */
+
+ off = mb[0]->data_off;
+ next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
+ off -= RTE_PKTMBUF_HEADROOM;
+ vnet_buffer (b[0])->l2_hdr_offset = off;
+ b[0]->current_data = off;
+
+ off = mb[0]->data_off;
+ next[1] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
+ off -= RTE_PKTMBUF_HEADROOM;
+ vnet_buffer (b[1])->l2_hdr_offset = off;
+ b[1]->current_data = off;
+
+ off = mb[0]->data_off;
+ next[2] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
+ off -= RTE_PKTMBUF_HEADROOM;
+ vnet_buffer (b[2])->l2_hdr_offset = off;
+ b[2]->current_data = off;
+
+ off = mb[0]->data_off;
+ next[3] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
+ off -= RTE_PKTMBUF_HEADROOM;
+ vnet_buffer (b[3])->l2_hdr_offset = off;
+ b[3]->current_data = off;
+
+ b[0]->current_length = mb[0]->data_len;
+ b[1]->current_length = mb[1]->data_len;
+ b[2]->current_length = mb[2]->data_len;
+ b[3]->current_length = mb[3]->data_len;
+
+ n_bytes += mb[0]->data_len;
+ n_bytes += mb[1]->data_len;
+ n_bytes += mb[2]->data_len;
+ n_bytes += mb[3]->data_len;
+
+ if (maybe_multiseg)
+ {
+ n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
+ n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], fl);
+ n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], fl);
+ n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], fl);
+ }
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
+
+ /* next */
+ mb += 4;
+ n_left -= 4;
+ next += 4;
+ }
+
+ while (n_left)
+ {
+ b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
+ clib_memcpy (b[0], &ptd->buffer_template, 64);
+ or_flags |= dpdk_ol_flags_extract (mb, flags, 1);
+ flags += 1;
+
+ off = mb[0]->data_off;
+ next[0] = off + STRUCT_OFFSET_OF (ethernet_header_t, type);
+ off -= RTE_PKTMBUF_HEADROOM;
+ vnet_buffer (b[0])->l2_hdr_offset = off;
+ b[0]->current_data = off;
+ b[0]->current_length = mb[0]->data_len;
+ n_bytes += mb[0]->data_len;
+ if (maybe_multiseg)
+ n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
+
+ /* next */
+ mb += 1;
+ n_left -= 1;
+ next += 1;
+ }
+
+ *or_flagsp = or_flags;
+ return n_bytes;
+}
+
+static_always_inline void
+dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node,
+ dpdk_per_thread_data_t * ptd, uword n_rx_packets)
+{
+ vlib_buffer_t *b[4];
+ i16 adv[4];
+ u16 etype[4];
+ struct rte_mbuf **mb = ptd->mbufs;
+ u8 *flags = ptd->flags;
+ u16 *next = ptd->next;
+ u32 n_left = n_rx_packets;
+
+ while (n_left >= 12)
+ {
+ dpdk_prefetch_buffer_data_x4 (mb + 8);
+ dpdk_prefetch_buffer_x4 (mb + 8);
+
+ b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
+ b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
+ b[2] = vlib_buffer_from_rte_mbuf (mb[2]);
+ b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
+ etype[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
+ etype[1] = *(u16 *) ((u8 *) mb[1] + next[1] + sizeof (vlib_buffer_t));
+ etype[2] = *(u16 *) ((u8 *) mb[2] + next[2] + sizeof (vlib_buffer_t));
+ etype[3] = *(u16 *) ((u8 *) mb[3] + next[3] + sizeof (vlib_buffer_t));
+ next[0] = dpdk_rx_next (node, etype[0], flags[0]);
+ next[1] = dpdk_rx_next (node, etype[1], flags[1]);
+ next[2] = dpdk_rx_next (node, etype[2], flags[2]);
+ next[3] = dpdk_rx_next (node, etype[3], flags[3]);
+ adv[0] = device_input_next_node_advance[next[0]];
+ adv[1] = device_input_next_node_advance[next[1]];
+ adv[2] = device_input_next_node_advance[next[2]];
+ adv[3] = device_input_next_node_advance[next[3]];
+ b[0]->current_data += adv[0];
+ b[1]->current_data += adv[1];
+ b[2]->current_data += adv[2];
+ b[3]->current_data += adv[3];
+ b[0]->current_length -= adv[0];
+ b[1]->current_length -= adv[1];
+ b[2]->current_length -= adv[2];
+ b[3]->current_length -= adv[3];
+
+ /* next */
+ next += 4;
+ mb += 4;
+ n_left -= 4;
+ flags += 4;
+ }
+
+ while (n_left)
+ {
+ b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
+ next[0] = *(u16 *) ((u8 *) mb[0] + next[0] + sizeof (vlib_buffer_t));
+ next[0] = dpdk_rx_next (node, next[0], flags[0]);
+ adv[0] = device_input_next_node_advance[next[0]];
+ b[0]->current_data += adv[0];
+ b[0]->current_length -= adv[0];
+
+ /* next */
+ next += 1;
+ mb += 1;
+ n_left -= 1;
+ flags += 1;
+ }
+}
+
+static_always_inline u32
+dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
+ vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
+{
+ uword n_rx_packets = 0, n_rx_bytes;
+ u32 n_left, n_trace;
+ u32 *buffers;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ struct rte_mbuf **mb;
+ vlib_buffer_t *b0;
+ int known_next = 0;
+ u16 *next;
+ u8 or_flags;
+ u32 n;
+
+ dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
+ thread_index);
+ vlib_buffer_t *bt = &ptd->buffer_template;
+
+ if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
+ return 0;
+
+ /* get up to DPDK_RX_BURST_SZ buffers from PMD */
+ while (n_rx_packets < DPDK_RX_BURST_SZ)
+ {
+ n = rte_eth_rx_burst (xd->device_index, queue_id,
+ ptd->mbufs + n_rx_packets,
+ DPDK_RX_BURST_SZ - n_rx_packets);
+ n_rx_packets += n;
+
+ if (n < 32)
+ break;
+ }
+
+ if (n_rx_packets == 0)
+ return 0;
+
+ /* Update buffer template */
+ vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
+ bt->error = node->errors[DPDK_ERROR_NONE];
+ /* as DPDK is allocating empty buffers from mempool provided before interface
+ start for each queue, it is safe to store this in the template */
+ bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
+
+ /* receive burst of packets from DPDK PMD */
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ {
+ known_next = 1;
+ next_index = xd->per_interface_next_index;
+ }
+
+ /* as all packets belong to thr same interface feature arc lookup
+ can be don once and result stored in the buffer template */
+ if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
+ {
+ vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
+ known_next = 1;
+ }
+
+ if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
+ n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
+ else
+ n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
+
+ if (PREDICT_FALSE (known_next))
+ {
+ for (n = 0; n < n_rx_packets; n++)
+ ptd->next[n] = next_index;
+
+ vnet_buffer (bt)->feature_arc_index = 0;
+ bt->current_config_index = 0;
+ }
+ else
+ dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets);
+
+ /* is at least one packet marked as ip4 checksum bad? */
+ if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD)))
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ if ((ptd->flags[n] & (1 << DPDK_RX_F_CKSUM_BAD)) == 0)
+ continue;
+ if (ptd->next[n] != VNET_DEVICE_INPUT_NEXT_IP4_INPUT)
+ continue;
+
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ b0->error = node->errors[DPDK_ERROR_IP_CHECKSUM_ERROR];
+ ptd->next[n] = VNET_DEVICE_INPUT_NEXT_DROP;
+ }
+
+ /* enqueue buffers to the next node */
+ dpdk_mbufs_to_buffer_indices (vm, ptd->mbufs, ptd->buffers, n_rx_packets);
+ n_left = n_rx_packets;
+ next = ptd->next;
+ buffers = ptd->buffers;
+ mb = ptd->mbufs;
+ while (n_left)
+ {
+ u32 n_left_to_next;
+ u32 *to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+#ifdef CLIB_HAVE_VEC256
+ while (n_left >= 16 && n_left_to_next >= 16)
+ {
+ u16x16 next16 = u16x16_load_unaligned (next);
+ if (u16x16_is_all_equal (next16, next_index))
+ {
+ clib_memcpy (to_next, buffers, 16 * sizeof (u32));
+ to_next += 16;
+ n_left_to_next -= 16;
+ buffers += 16;
+ n_left -= 16;
+ next += 16;
+ mb += 16;
+ }
+ else
+ {
+ clib_memcpy (to_next, buffers, 4 * sizeof (u32));
+ to_next += 4;
+ n_left_to_next -= 4;
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, buffers[0],
+ buffers[1], buffers[2],
+ buffers[3], next[0], next[1],
+ next[2], next[3]);
+ /* next */
+ buffers += 4;
+ n_left -= 4;
+ next += 4;
+ mb += 4;
+ }
+ }
+#endif
+ while (n_left >= 4 && n_left_to_next >= 4)
+ {
+ clib_memcpy (to_next, buffers, 4 * sizeof (u32));
+ to_next += 4;
+ n_left_to_next -= 4;
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, buffers[0],
+ buffers[1], buffers[2], buffers[3],
+ next[0], next[1], next[2],
+ next[3]);
+ /* next */
+ buffers += 4;
+ n_left -= 4;
+ next += 4;
+ mb += 4;
+ }
+ while (n_left && n_left_to_next)
+ {
+ clib_memcpy (to_next, buffers, 1 * sizeof (u32));
+ to_next += 1;
+ n_left_to_next -= 1;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, buffers[0],
+ next[0]);
+ /* next */
+ buffers += 1;
+ n_left -= 1;
+ next += 1;
+ mb += 1;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* packet trace if enabled */
+ if ((n_trace = vlib_get_trace_count (vm, node)))
+ {
+ n_left = n_rx_packets;
+ buffers = ptd->buffers;
+ mb = ptd->mbufs;
+ next = ptd->next;
+ while (n_trace && n_left)
+ {
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ vlib_trace_buffer (vm, node, next[0], b0, /* follow_chain */ 0);
+
+ dpdk_rx_trace_t *t0 = vlib_add_trace (vm, node, b0, sizeof t0[0]);
+ t0->queue_index = queue_id;
+ t0->device_index = xd->device_index;
+ t0->buffer_index = vlib_get_buffer_index (vm, b0);
+
+ clib_memcpy (&t0->mb, mb[0], sizeof t0->mb);
+ clib_memcpy (&t0->buffer, b0, sizeof b0[0] - sizeof b0->pre_data);
+ clib_memcpy (t0->buffer.pre_data, b0->data,
+ sizeof t0->buffer.pre_data);
+ clib_memcpy (&t0->data, mb[0]->buf_addr + mb[0]->data_off,
+ sizeof t0->data);
+ n_trace--;
+ n_left--;
+ buffers++;
+ mb++;
+ next++;
+ }
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX, thread_index, xd->sw_if_index,
+ n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+
+ return n_rx_packets;
+}
+
+uword CLIB_CPU_OPTIMIZED