X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Fnode.c;h=3b2cfe360f39fac5b77d13f99b5cbd854caeddb0;hb=a332c46a51f9b4e13963340dfee1318e7513c124;hp=e8d502ca692f84232dfc38ca40dfd3ccb92be4d3;hpb=25f635852aee76255f7210c43d43668a80fdccce;p=vpp.git diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index e8d502ca692..3b2cfe360f3 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -28,46 +28,29 @@ #include +#ifndef CLIB_MULTIARCH_VARIANT static char *dpdk_error_strings[] = { #define _(n,s) s, foreach_dpdk_error #undef _ }; - -always_inline int -vlib_buffer_is_ip4 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); -} - -always_inline int -vlib_buffer_is_ip6 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); -} - -always_inline int -vlib_buffer_is_mpls (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); -} +#endif always_inline u32 -dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) +dpdk_rx_next_from_etype (struct rte_mbuf *mb) { - if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + ethernet_header_t *h = rte_pktmbuf_mtod (mb, ethernet_header_t *); + if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4))) { if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; else return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; } - else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + else if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6))) return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + else + if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))) return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; else return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; @@ -85,7 +68,7 @@ dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) *error = DPDK_ERROR_NONE; } -void +static void dpdk_rx_trace (dpdk_main_t * dm, vlib_node_runtime_t * node, dpdk_device_t * xd, @@ -115,7 +98,7 @@ dpdk_rx_trace (dpdk_main_t * dm, if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) next0 = xd->per_interface_next_index; else - next0 = dpdk_rx_next_from_etype (mb, b0); + next0 = dpdk_rx_next_from_etype (mb); dpdk_rx_error_from_mb (mb, &next0, &error0); @@ -179,7 +162,13 @@ dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, mb_seg = mb->next; b_chain = b; - while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + if (mb->nb_segs < 2) + return; + + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + b->total_length_not_including_first_buffer = 0; + + while (nb_seg < mb->nb_segs) { ASSERT (mb_seg != 0); @@ -224,37 +213,13 @@ dpdk_prefetch_ethertype (struct rte_mbuf *mb) CLIB_CACHE_LINE_BYTES, LOAD); } - -/* - This function should fill 1st cacheline of vlib_buffer_t metadata with data - from buffer template. Instead of filling field by field, we construct - template and then use 128/256 bit vector instruction to copy data. - This code first loads whole cacheline into 4 128-bit registers (xmm) - or two 256 bit registers (ymm) and then stores data into all 4 buffers - efectively saving on register load operations. -*/ - -static_always_inline void -dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3, - void *s) -{ - int i; - for (i = 0; i < 2; i++) - { - *(u8x32 *) (((u8 *) d0) + i * 32) = - *(u8x32 *) (((u8 *) d1) + i * 32) = - *(u8x32 *) (((u8 *) d2) + i * 32) = - *(u8x32 *) (((u8 *) d3) + i * 32) = *(u8x32 *) (((u8 *) s) + i * 32); - } -} - /* * This function is used when there are no worker threads. * The main thread performs IO and forwards the packets. */ static_always_inline u32 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id, + vlib_node_runtime_t * node, u32 thread_index, u16 queue_id, int maybe_multiseg) { u32 n_buffers; @@ -265,7 +230,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, uword n_rx_bytes = 0; u32 n_trace, trace_cnt __attribute__ ((unused)); vlib_buffer_free_list_t *fl; - vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index); + vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index); if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) return 0; @@ -277,7 +242,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, return 0; } - vec_reset_length (xd->d_trace_buffers[cpu_index]); + vec_reset_length (xd->d_trace_buffers[thread_index]); trace_cnt = n_trace = vlib_get_trace_count (vm, node); if (n_trace > 0) @@ -289,7 +254,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, { struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - vec_add1 (xd->d_trace_buffers[cpu_index], + vec_add1 (xd->d_trace_buffers[thread_index], vlib_get_buffer_index (vm, b)); } } @@ -299,6 +264,9 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, /* Update buffer template */ vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; bt->error = node->errors[DPDK_ERROR_NONE]; + /* as DPDK is allocating empty buffers from mempool provided before interface + start for each queue, it is safe to store this in the template */ + bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id]; mb_index = 0; @@ -310,6 +278,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, u32 bi2, next2; u32 bi3, next3; u8 error0, error1, error2, error3; + i16 offset0, offset1, offset2, offset3; u64 or_ol_flags; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); @@ -350,24 +319,13 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, b2 = vlib_buffer_from_rte_mbuf (mb2); b3 = vlib_buffer_from_rte_mbuf (mb3); - dpdk_buffer_init_from_template (b0, b1, b2, b3, bt); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]); dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]); - /* current_data must be set to -RTE_PKTMBUF_HEADROOM in template */ - b0->current_data += mb0->data_off; - b1->current_data += mb1->data_off; - b2->current_data += mb2->data_off; - b3->current_data += mb3->data_off; - - b0->current_length = mb0->data_len; - b1->current_length = mb1->data_len; - b2->current_length = mb2->data_len; - b3->current_length = mb3->data_len; + clib_memcpy64_x4 (b0, b1, b2, b3, bt); dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 10]); - dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]); bi0 = vlib_get_buffer_index (vm, b0); bi1 = vlib_get_buffer_index (vm, b1); @@ -387,14 +345,14 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, } else { - next0 = dpdk_rx_next_from_etype (mb0, b0); - next1 = dpdk_rx_next_from_etype (mb1, b1); - next2 = dpdk_rx_next_from_etype (mb2, b2); - next3 = dpdk_rx_next_from_etype (mb3, b3); + next0 = dpdk_rx_next_from_etype (mb0); + next1 = dpdk_rx_next_from_etype (mb1); + next2 = dpdk_rx_next_from_etype (mb2); + next3 = dpdk_rx_next_from_etype (mb3); } dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 11]); - dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]); or_ol_flags = (mb0->ol_flags | mb1->ol_flags | mb2->ol_flags | mb3->ol_flags); @@ -410,16 +368,43 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, b3->error = node->errors[error3]; } - vlib_buffer_advance (b0, device_input_next_node_advance[next0]); - vlib_buffer_advance (b1, device_input_next_node_advance[next1]); - vlib_buffer_advance (b2, device_input_next_node_advance[next2]); - vlib_buffer_advance (b3, device_input_next_node_advance[next3]); - + offset0 = device_input_next_node_advance[next0]; + b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM; + b0->flags |= device_input_next_node_flags[next0]; + vnet_buffer (b0)->l3_hdr_offset = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = + mb0->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb0->data_len - offset0; n_rx_bytes += mb0->pkt_len; + + offset1 = device_input_next_node_advance[next1]; + b1->current_data = mb1->data_off + offset1 - RTE_PKTMBUF_HEADROOM; + b1->flags |= device_input_next_node_flags[next1]; + vnet_buffer (b1)->l3_hdr_offset = b1->current_data; + vnet_buffer (b1)->l2_hdr_offset = + mb1->data_off - RTE_PKTMBUF_HEADROOM; + b1->current_length = mb1->data_len - offset1; n_rx_bytes += mb1->pkt_len; + + offset2 = device_input_next_node_advance[next2]; + b2->current_data = mb2->data_off + offset2 - RTE_PKTMBUF_HEADROOM; + b2->flags |= device_input_next_node_flags[next2]; + vnet_buffer (b2)->l3_hdr_offset = b2->current_data; + vnet_buffer (b2)->l2_hdr_offset = + mb2->data_off - RTE_PKTMBUF_HEADROOM; + b2->current_length = mb2->data_len - offset2; n_rx_bytes += mb2->pkt_len; + + offset3 = device_input_next_node_advance[next3]; + b3->current_data = mb3->data_off + offset3 - RTE_PKTMBUF_HEADROOM; + b3->flags |= device_input_next_node_flags[next3]; + vnet_buffer (b3)->l3_hdr_offset = b3->current_data; + vnet_buffer (b3)->l2_hdr_offset = + mb3->data_off - RTE_PKTMBUF_HEADROOM; + b3->current_length = mb3->data_len - offset3; n_rx_bytes += mb3->pkt_len; + /* Process subsequent segments of multi-segment packets */ if (maybe_multiseg) { @@ -472,10 +457,6 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, clib_memcpy (b0, bt, CLIB_CACHE_LINE_BYTES); - ASSERT (b0->current_data == -RTE_PKTMBUF_HEADROOM); - b0->current_data += mb0->data_off; - b0->current_length = mb0->data_len; - bi0 = vlib_get_buffer_index (vm, b0); to_next[0] = bi0; @@ -485,12 +466,18 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) next0 = xd->per_interface_next_index; else - next0 = dpdk_rx_next_from_etype (mb0, b0); + next0 = dpdk_rx_next_from_etype (mb0); dpdk_rx_error_from_mb (mb0, &next0, &error0); - - vlib_buffer_advance (b0, device_input_next_node_advance[next0]); - + b0->error = node->errors[error0]; + + offset0 = device_input_next_node_advance[next0]; + b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM; + b0->flags |= device_input_next_node_flags[next0]; + vnet_buffer (b0)->l3_hdr_offset = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = + mb0->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb0->data_len - offset0; n_rx_bytes += mb0->pkt_len; /* Process subsequent segments of multi-segment packets */ @@ -516,20 +503,22 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, vlib_put_next_frame (vm, node, next_index, n_left_to_next); } - if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0)) { - dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], - vec_len (xd->d_trace_buffers[cpu_index])); - vlib_set_trace_count (vm, node, n_trace - - vec_len (xd->d_trace_buffers[cpu_index])); + dpdk_rx_trace (dm, node, xd, queue_id, + xd->d_trace_buffers[thread_index], + vec_len (xd->d_trace_buffers[thread_index])); + vlib_set_trace_count (vm, node, + n_trace - + vec_len (xd->d_trace_buffers[thread_index])); } vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); + thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - vnet_device_increment_rx_packets (cpu_index, mb_index); + vnet_device_increment_rx_packets (thread_index, mb_index); return mb_index; } @@ -593,26 +582,30 @@ poll_rate_limit (dpdk_main_t * dm) xd->per_interface_next_index */ -static uword -dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +uword +CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * f) { dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd; uword n_rx_packets = 0; - dpdk_device_and_queue_t *dq; - u32 cpu_index = os_get_cpu_number (); + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + vnet_device_and_queue_t *dq; + u32 thread_index = node->thread_index; /* * Poll all devices on this cpu for input/interrupts. */ /* *INDENT-OFF* */ - vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + foreach_device_and_queue (dq, rt->devices_and_queues) { - xd = vec_elt_at_index(dm->devices, dq->device); + xd = vec_elt_at_index(dm->devices, dq->dev_instance); + if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE)) + continue; /* Do not poll slave to a bonded interface */ if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1); + n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1); else - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0); + n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0); } /* *INDENT-ON* */ @@ -621,6 +614,7 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) return n_rx_packets; } +#ifndef CLIB_MULTIARCH_VARIANT /* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_input_node) = { .function = dpdk_input, @@ -637,10 +631,23 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { .n_errors = DPDK_N_ERROR, .error_strings = dpdk_error_strings, }; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); /* *INDENT-ON* */ +vlib_node_function_t __clib_weak dpdk_input_avx512; +vlib_node_function_t __clib_weak dpdk_input_avx2; + +#if __x86_64__ +static void __clib_constructor +dpdk_input_multiarch_select (void) +{ + if (dpdk_input_avx512 && clib_cpu_supports_avx512f ()) + dpdk_input_node.function = dpdk_input_avx512; + else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ()) + dpdk_input_node.function = dpdk_input_avx2; +} +#endif +#endif + /* * fd.io coding-style-patch-verification: ON *