X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Fnode.c;h=602891dbabe0907f38985632cbb2ef6da12f0da6;hb=af86a48;hp=7bb1fb3c9c190731614706458dfebd978d2bf6fa;hpb=af05bc018a465a993e795d7c81c2eb12d5b77e44;p=vpp.git diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 7bb1fb3c9c1..602891dbabe 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -28,7 +28,7 @@ #include -#ifndef CLIB_MULTIARCH_VARIANT +#ifndef CLIB_MARCH_VARIANT static char *dpdk_error_strings[] = { #define _(n,s) s, foreach_dpdk_error @@ -213,65 +213,6 @@ poll_rate_limit (dpdk_main_t * dm) xd->per_interface_next_index */ -static_always_inline void -dpdk_mbufs_to_buffer_indices (vlib_main_t * vm, struct rte_mbuf **mb, - u32 * bi, uword n_left) -{ -#ifdef CLIB_HAVE_VEC256 - u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 }; - u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start - - sizeof (struct rte_mbuf)); -#endif - - while (n_left >= 8) - { -#ifdef CLIB_HAVE_VEC256 - /* load 4 pointers into 256-bit register */ - u64x4 v0 = u64x4_load_unaligned (mb); - u64x4 v1 = u64x4_load_unaligned (mb + 4); - u32x8 v2, v3; - - /* calculate 4 buffer indices in parallel - vlib_buffer_t is straight after rte_mbuf so advance all 4 - pointers for size of rte_mbuf */ - v0 -= off4; - v1 -= off4; - - v0 >>= CLIB_LOG2_CACHE_LINE_BYTES; - v1 >>= CLIB_LOG2_CACHE_LINE_BYTES; - - /* permute 256-bit register so lower u32s of each buffer index are - * placed into lower 128-bits */ - v2 = u32x8_permute ((u32x8) v0, mask); - v3 = u32x8_permute ((u32x8) v1, mask); - - /* extract lower 128-bits and save them to the array of buffer indices */ - u32x4_store_unaligned (u32x8_extract_lo (v2), bi); - u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4); -#else - /* equivalent non-nector implementation */ - bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0])); - bi[1] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[1])); - bi[2] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[2])); - bi[3] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[3])); - bi[4] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[4])); - bi[5] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[5])); - bi[6] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[6])); - bi[7] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[7])); -#endif - bi += 8; - mb += 8; - n_left -= 8; - } - while (n_left) - { - bi[0] = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (mb[0])); - bi += 1; - mb += 1; - n_left -= 1; - } -} - static_always_inline u8 dpdk_ol_flags_extract (struct rte_mbuf **mb, u8 * flags, int count) { @@ -474,6 +415,40 @@ dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node, } } +static_always_inline void +dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd, + uword n_rx_packets) +{ + uword n; + dpdk_flow_lookup_entry_t *fle; + vlib_buffer_t *b0; + + /* TODO prefetch and quad-loop */ + for (n = 0; n < n_rx_packets; n++) + { + if ((ptd->flags[n] & (1 << DPDK_RX_F_FDIR)) == 0) + continue; + + fle = vec_elt_at_index (xd->flow_lookup_entries, + ptd->mbufs[n]->hash.fdir.hi); + + if (fle->next_index != (u16) ~ 0) + ptd->next[n] = fle->next_index; + + if (fle->flow_id != ~0) + { + b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]); + b0->flow_id = fle->flow_id; + } + + if (fle->buffer_advance != ~0) + { + b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]); + vlib_buffer_advance (b0, fle->buffer_advance); + } + } +} + static_always_inline u32 dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, vlib_node_runtime_t * node, u32 thread_index, u16 queue_id) @@ -499,7 +474,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, /* get up to DPDK_RX_BURST_SZ buffers from PMD */ while (n_rx_packets < DPDK_RX_BURST_SZ) { - n = rte_eth_rx_burst (xd->device_index, queue_id, + n = rte_eth_rx_burst (xd->port_id, queue_id, ptd->mbufs + n_rx_packets, DPDK_RX_BURST_SZ - n_rx_packets); n_rx_packets += n; @@ -525,7 +500,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, next_index = xd->per_interface_next_index; } - /* as all packets belong to thr same interface feature arc lookup + /* as all packets belong to the same interface feature arc lookup can be don once and result stored in the buffer template */ if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index))) { @@ -549,6 +524,12 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, else dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets); + /* flow offload - process if rx flow offlaod enabled and at least one packet + is marked */ + if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) && + (or_flags & (1 << DPDK_RX_F_FDIR)))) + dpdk_process_flow_offload (xd, ptd, n_rx_packets); + /* is at least one packet marked as ip4 checksum bad? */ if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD))) for (n = 0; n < n_rx_packets; n++) @@ -564,85 +545,15 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, } /* enqueue buffers to the next node */ - dpdk_mbufs_to_buffer_indices (vm, ptd->mbufs, ptd->buffers, n_rx_packets); - n_left = n_rx_packets; - next = ptd->next; - buffers = ptd->buffers; - mb = ptd->mbufs; - while (n_left) - { - u32 n_left_to_next; - u32 *to_next; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); -#ifdef CLIB_HAVE_VEC256 - while (n_left >= 16 && n_left_to_next >= 16) - { - u16x16 next16 = u16x16_load_unaligned (next); - if (u16x16_is_all_equal (next16, next_index)) - { - clib_memcpy (to_next, buffers, 16 * sizeof (u32)); - to_next += 16; - n_left_to_next -= 16; - buffers += 16; - n_left -= 16; - next += 16; - mb += 16; - } - else - { - clib_memcpy (to_next, buffers, 4 * sizeof (u32)); - to_next += 4; - n_left_to_next -= 4; - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, buffers[0], - buffers[1], buffers[2], - buffers[3], next[0], next[1], - next[2], next[3]); - /* next */ - buffers += 4; - n_left -= 4; - next += 4; - mb += 4; - } - } -#endif - while (n_left >= 4 && n_left_to_next >= 4) - { - clib_memcpy (to_next, buffers, 4 * sizeof (u32)); - to_next += 4; - n_left_to_next -= 4; - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, buffers[0], - buffers[1], buffers[2], buffers[3], - next[0], next[1], next[2], - next[3]); - /* next */ - buffers += 4; - n_left -= 4; - next += 4; - mb += 4; - } - while (n_left && n_left_to_next) - { - clib_memcpy (to_next, buffers, 1 * sizeof (u32)); - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, buffers[0], - next[0]); - /* next */ - buffers += 1; - n_left -= 1; - next += 1; - mb += 1; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } + vlib_get_buffer_indices_with_offset (vm, (void **) ptd->mbufs, ptd->buffers, + n_rx_packets, + sizeof (struct rte_mbuf)); + + vlib_buffer_enqueue_to_next (vm, node, ptd->buffers, ptd->next, + n_rx_packets); /* packet trace if enabled */ - if ((n_trace = vlib_get_trace_count (vm, node))) + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) { n_left = n_rx_packets; buffers = ptd->buffers; @@ -673,6 +584,48 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, vlib_set_trace_count (vm, node, n_trace); } + /* rx pcap capture if enabled */ + if (PREDICT_FALSE (dm->pcap[VLIB_RX].pcap_enable)) + { + u32 bi0; + n_left = n_rx_packets; + buffers = ptd->buffers; + while (n_left) + { + bi0 = buffers[0]; + b0 = vlib_get_buffer (vm, bi0); + buffers++; + + if (dm->pcap[VLIB_RX].pcap_sw_if_index == 0 || + dm->pcap[VLIB_RX].pcap_sw_if_index + == vnet_buffer (b0)->sw_if_index[VLIB_RX]) + { + struct rte_mbuf *mb; + i16 data_start; + i32 temp_advance; + + /* + * Note: current_data will have advanced + * when we skip ethernet input. + * Temporarily back up to the original DMA + * target, so we capture a valid ethernet frame + */ + mb = rte_mbuf_from_vlib_buffer (b0); + + /* Figure out the original data_start */ + data_start = (mb->buf_addr + mb->data_off) - (void *) b0->data; + /* Back up that far */ + temp_advance = b0->current_data - data_start; + vlib_buffer_advance (b0, -temp_advance); + /* Trace the packet */ + pcap_add_buffer (&dm->pcap[VLIB_RX].pcap_main, vm, bi0, 512); + /* and advance again */ + vlib_buffer_advance (b0, temp_advance); + } + n_left--; + } + } + vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index, xd->sw_if_index, @@ -683,8 +636,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, return n_rx_packets; } -uword CLIB_CPU_OPTIMIZED -CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node, +VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) { dpdk_main_t *dm = &dpdk_main; @@ -713,10 +665,9 @@ CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node, return n_rx_packets; } -#ifndef CLIB_MULTIARCH_VARIANT +#ifndef CLIB_MARCH_VARIANT /* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_input_node) = { - .function = dpdk_input, .type = VLIB_NODE_TYPE_INPUT, .name = "dpdk-input", .sibling_of = "device-input", @@ -731,20 +682,6 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { .error_strings = dpdk_error_strings, }; /* *INDENT-ON* */ - -vlib_node_function_t __clib_weak dpdk_input_avx512; -vlib_node_function_t __clib_weak dpdk_input_avx2; - -#if __x86_64__ -static void __clib_constructor -dpdk_input_multiarch_select (void) -{ - if (dpdk_input_avx512 && clib_cpu_supports_avx512f ()) - dpdk_input_node.function = dpdk_input_avx512; - else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ()) - dpdk_input_node.function = dpdk_input_avx2; -} -#endif #endif /*