#include <dpdk/device/dpdk_priv.h>
+#ifndef CLIB_MULTIARCH_VARIANT
static char *dpdk_error_strings[] = {
#define _(n,s) s,
foreach_dpdk_error
#undef _
};
-
-always_inline int
-vlib_buffer_is_ip4 (vlib_buffer_t * b)
-{
- ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
-}
-
-always_inline int
-vlib_buffer_is_ip6 (vlib_buffer_t * b)
-{
- ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
-}
-
-always_inline int
-vlib_buffer_is_mpls (vlib_buffer_t * b)
-{
- ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
-}
+#endif
always_inline u32
-dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
+dpdk_rx_next_from_etype (struct rte_mbuf *mb)
{
- if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+ ethernet_header_t *h = rte_pktmbuf_mtod (mb, ethernet_header_t *);
+ if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)))
{
if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
else
return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
}
- else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+ else if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)))
return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+ else
+ if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)))
return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
else
return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
*error = DPDK_ERROR_NONE;
}
-void
+static void
dpdk_rx_trace (dpdk_main_t * dm,
vlib_node_runtime_t * node,
dpdk_device_t * xd,
if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
next0 = xd->per_interface_next_index;
else
- next0 = dpdk_rx_next_from_etype (mb, b0);
+ next0 = dpdk_rx_next_from_etype (mb);
dpdk_rx_error_from_mb (mb, &next0, &error0);
mb_seg = mb->next;
b_chain = b;
- while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs))
+ if (mb->nb_segs < 2)
+ return;
+
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b->total_length_not_including_first_buffer = 0;
+
+ while (nb_seg < mb->nb_segs)
{
ASSERT (mb_seg != 0);
CLIB_CACHE_LINE_BYTES, LOAD);
}
-
-/*
- This function should fill 1st cacheline of vlib_buffer_t metadata with data
- from buffer template. Instead of filling field by field, we construct
- template and then use 128/256 bit vector instruction to copy data.
- This code first loads whole cacheline into 4 128-bit registers (xmm)
- or two 256 bit registers (ymm) and then stores data into all 4 buffers
- efectively saving on register load operations.
-*/
-
-static_always_inline void
-dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3,
- void *s)
-{
- int i;
- for (i = 0; i < 2; i++)
- {
- *(u8x32 *) (((u8 *) d0) + i * 32) =
- *(u8x32 *) (((u8 *) d1) + i * 32) =
- *(u8x32 *) (((u8 *) d2) + i * 32) =
- *(u8x32 *) (((u8 *) d3) + i * 32) = *(u8x32 *) (((u8 *) s) + i * 32);
- }
-}
-
/*
* This function is used when there are no worker threads.
* The main thread performs IO and forwards the packets.
*/
static_always_inline u32
dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
- vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id,
+ vlib_node_runtime_t * node, u32 thread_index, u16 queue_id,
int maybe_multiseg)
{
u32 n_buffers;
uword n_rx_bytes = 0;
u32 n_trace, trace_cnt __attribute__ ((unused));
vlib_buffer_free_list_t *fl;
- vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index);
+ vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index);
if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
return 0;
return 0;
}
- vec_reset_length (xd->d_trace_buffers[cpu_index]);
+ vec_reset_length (xd->d_trace_buffers[thread_index]);
trace_cnt = n_trace = vlib_get_trace_count (vm, node);
if (n_trace > 0)
{
struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
- vec_add1 (xd->d_trace_buffers[cpu_index],
+ vec_add1 (xd->d_trace_buffers[thread_index],
vlib_get_buffer_index (vm, b));
}
}
/* Update buffer template */
vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
bt->error = node->errors[DPDK_ERROR_NONE];
+ /* as DPDK is allocating empty buffers from mempool provided before interface
+ start for each queue, it is safe to store this in the template */
+ bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
mb_index = 0;
u32 bi2, next2;
u32 bi3, next3;
u8 error0, error1, error2, error3;
+ i16 offset0, offset1, offset2, offset3;
u64 or_ol_flags;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
b2 = vlib_buffer_from_rte_mbuf (mb2);
b3 = vlib_buffer_from_rte_mbuf (mb3);
- dpdk_buffer_init_from_template (b0, b1, b2, b3, bt);
-
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]);
- /* current_data must be set to -RTE_PKTMBUF_HEADROOM in template */
- b0->current_data += mb0->data_off;
- b1->current_data += mb1->data_off;
- b2->current_data += mb2->data_off;
- b3->current_data += mb3->data_off;
-
- b0->current_length = mb0->data_len;
- b1->current_length = mb1->data_len;
- b2->current_length = mb2->data_len;
- b3->current_length = mb3->data_len;
+ clib_memcpy64_x4 (b0, b1, b2, b3, bt);
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 10]);
- dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]);
bi0 = vlib_get_buffer_index (vm, b0);
bi1 = vlib_get_buffer_index (vm, b1);
}
else
{
- next0 = dpdk_rx_next_from_etype (mb0, b0);
- next1 = dpdk_rx_next_from_etype (mb1, b1);
- next2 = dpdk_rx_next_from_etype (mb2, b2);
- next3 = dpdk_rx_next_from_etype (mb3, b3);
+ next0 = dpdk_rx_next_from_etype (mb0);
+ next1 = dpdk_rx_next_from_etype (mb1);
+ next2 = dpdk_rx_next_from_etype (mb2);
+ next3 = dpdk_rx_next_from_etype (mb3);
}
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 11]);
- dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]);
or_ol_flags = (mb0->ol_flags | mb1->ol_flags |
mb2->ol_flags | mb3->ol_flags);
b3->error = node->errors[error3];
}
- vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
- vlib_buffer_advance (b1, device_input_next_node_advance[next1]);
- vlib_buffer_advance (b2, device_input_next_node_advance[next2]);
- vlib_buffer_advance (b3, device_input_next_node_advance[next3]);
-
+ offset0 = device_input_next_node_advance[next0];
+ b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM;
+ b0->flags |= device_input_next_node_flags[next0];
+ vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
+ vnet_buffer (b0)->l2_hdr_offset =
+ mb0->data_off - RTE_PKTMBUF_HEADROOM;
+ b0->current_length = mb0->data_len - offset0;
n_rx_bytes += mb0->pkt_len;
+
+ offset1 = device_input_next_node_advance[next1];
+ b1->current_data = mb1->data_off + offset1 - RTE_PKTMBUF_HEADROOM;
+ b1->flags |= device_input_next_node_flags[next1];
+ vnet_buffer (b1)->l3_hdr_offset = b1->current_data;
+ vnet_buffer (b1)->l2_hdr_offset =
+ mb1->data_off - RTE_PKTMBUF_HEADROOM;
+ b1->current_length = mb1->data_len - offset1;
n_rx_bytes += mb1->pkt_len;
+
+ offset2 = device_input_next_node_advance[next2];
+ b2->current_data = mb2->data_off + offset2 - RTE_PKTMBUF_HEADROOM;
+ b2->flags |= device_input_next_node_flags[next2];
+ vnet_buffer (b2)->l3_hdr_offset = b2->current_data;
+ vnet_buffer (b2)->l2_hdr_offset =
+ mb2->data_off - RTE_PKTMBUF_HEADROOM;
+ b2->current_length = mb2->data_len - offset2;
n_rx_bytes += mb2->pkt_len;
+
+ offset3 = device_input_next_node_advance[next3];
+ b3->current_data = mb3->data_off + offset3 - RTE_PKTMBUF_HEADROOM;
+ b3->flags |= device_input_next_node_flags[next3];
+ vnet_buffer (b3)->l3_hdr_offset = b3->current_data;
+ vnet_buffer (b3)->l2_hdr_offset =
+ mb3->data_off - RTE_PKTMBUF_HEADROOM;
+ b3->current_length = mb3->data_len - offset3;
n_rx_bytes += mb3->pkt_len;
+
/* Process subsequent segments of multi-segment packets */
if (maybe_multiseg)
{
clib_memcpy (b0, bt, CLIB_CACHE_LINE_BYTES);
- ASSERT (b0->current_data == -RTE_PKTMBUF_HEADROOM);
- b0->current_data += mb0->data_off;
- b0->current_length = mb0->data_len;
-
bi0 = vlib_get_buffer_index (vm, b0);
to_next[0] = bi0;
if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
next0 = xd->per_interface_next_index;
else
- next0 = dpdk_rx_next_from_etype (mb0, b0);
+ next0 = dpdk_rx_next_from_etype (mb0);
dpdk_rx_error_from_mb (mb0, &next0, &error0);
-
- vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
-
+ b0->error = node->errors[error0];
+
+ offset0 = device_input_next_node_advance[next0];
+ b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM;
+ b0->flags |= device_input_next_node_flags[next0];
+ vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
+ vnet_buffer (b0)->l2_hdr_offset =
+ mb0->data_off - RTE_PKTMBUF_HEADROOM;
+ b0->current_length = mb0->data_len - offset0;
n_rx_bytes += mb0->pkt_len;
/* Process subsequent segments of multi-segment packets */
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
- if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0))
+ if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0))
{
- dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index],
- vec_len (xd->d_trace_buffers[cpu_index]));
- vlib_set_trace_count (vm, node, n_trace -
- vec_len (xd->d_trace_buffers[cpu_index]));
+ dpdk_rx_trace (dm, node, xd, queue_id,
+ xd->d_trace_buffers[thread_index],
+ vec_len (xd->d_trace_buffers[thread_index]));
+ vlib_set_trace_count (vm, node,
+ n_trace -
+ vec_len (xd->d_trace_buffers[thread_index]));
}
vlib_increment_combined_counter
(vnet_get_main ()->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
- cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
+ thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
- vnet_device_increment_rx_packets (cpu_index, mb_index);
+ vnet_device_increment_rx_packets (thread_index, mb_index);
return mb_index;
}
<code>xd->per_interface_next_index</code>
*/
-static uword
-dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+uword
+CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * f)
{
dpdk_main_t *dm = &dpdk_main;
dpdk_device_t *xd;
uword n_rx_packets = 0;
- dpdk_device_and_queue_t *dq;
- u32 cpu_index = os_get_cpu_number ();
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+ u32 thread_index = node->thread_index;
/*
* Poll all devices on this cpu for input/interrupts.
*/
/* *INDENT-OFF* */
- vec_foreach (dq, dm->devices_by_cpu[cpu_index])
+ foreach_device_and_queue (dq, rt->devices_and_queues)
{
- xd = vec_elt_at_index(dm->devices, dq->device);
+ xd = vec_elt_at_index(dm->devices, dq->dev_instance);
+ if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE))
+ continue; /* Do not poll slave to a bonded interface */
if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1);
+ n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1);
else
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0);
+ n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0);
}
/* *INDENT-ON* */
return n_rx_packets;
}
+#ifndef CLIB_MULTIARCH_VARIANT
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_input_node) = {
.function = dpdk_input,
.n_errors = DPDK_N_ERROR,
.error_strings = dpdk_error_strings,
};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input);
/* *INDENT-ON* */
+vlib_node_function_t __clib_weak dpdk_input_avx512;
+vlib_node_function_t __clib_weak dpdk_input_avx2;
+
+#if __x86_64__
+static void __clib_constructor
+dpdk_input_multiarch_select (void)
+{
+ if (dpdk_input_avx512 && clib_cpu_supports_avx512f ())
+ dpdk_input_node.function = dpdk_input_avx512;
+ else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ())
+ dpdk_input_node.function = dpdk_input_avx2;
+}
+#endif
+#endif
+
/*
* fd.io coding-style-patch-verification: ON
*