X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fmemif%2Fdevice.c;h=28bf8259311ca0220da8f6f3c3bc60045d591c8c;hb=6aa9f5f8dc14cec4005b8bac33b5a8697168548e;hp=112db57b4b468787340f83a9aa24c0f6a2e35f0b;hpb=5c37ce3e0264c0bec75610837c5819ff4407bd5c;p=vpp.git diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index 112db57b4b4..28bf8259311 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -40,13 +40,13 @@ typedef enum MEMIF_TX_N_ERROR, } memif_tx_func_error_t; -static __clib_unused char *memif_tx_func_error_strings[] = { +static char *memif_tx_func_error_strings[] = { #define _(n,s) s, foreach_memif_tx_func_error #undef _ }; -#ifndef CLIB_MULTIARCH_VARIANT +#ifndef CLIB_MARCH_VARIANT u8 * format_memif_device_name (u8 * s, va_list * args) { @@ -61,7 +61,7 @@ format_memif_device_name (u8 * s, va_list * args) } #endif -static __clib_unused u8 * +static u8 * format_memif_device (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); @@ -77,7 +77,7 @@ format_memif_device (u8 * s, va_list * args) return s; } -static __clib_unused u8 * +static u8 * format_memif_tx_trace (u8 * s, va_list * args) { s = format (s, "Unimplemented..."); @@ -99,35 +99,20 @@ memif_add_copy_op (memif_per_thread_data_t * ptd, void *data, u32 len, static_always_inline uword memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, - memif_ring_type_t type) + memif_ring_type_t type, memif_queue_t * mq, + memif_per_thread_data_t * ptd) { - u8 qid; memif_ring_t *ring; - u32 *buffers = vlib_frame_args (frame); + u32 *buffers = vlib_frame_vector_args (frame); u32 n_left = frame->n_vectors; u32 n_copy_op; u16 ring_size, mask, slot, free_slots; - u32 thread_index = vlib_get_thread_index (); - memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data, - thread_index); - u8 tx_queues = vec_len (mif->tx_queues); - memif_queue_t *mq; int n_retries = 5; vlib_buffer_t *b0, *b1, *b2, *b3; memif_copy_op_t *co; memif_region_index_t last_region = ~0; void *last_region_shm = 0; - if (tx_queues < vec_len (vlib_mains)) - { - ASSERT (tx_queues > 0); - qid = thread_index % tx_queues; - clib_spinlock_lock_if_init (&mif->lockp); - } - else - qid = thread_index; - - mq = vec_elt_at_index (mif->tx_queues, qid); ring = mq->ring; ring_size = 1 << mq->log2_ring_size; mask = ring_size - 1; @@ -254,14 +239,14 @@ no_free_slots: b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); - clib_memcpy (co[0].data, b0->data + co[0].buffer_offset, - co[0].data_len); - clib_memcpy (co[1].data, b1->data + co[1].buffer_offset, - co[1].data_len); - clib_memcpy (co[2].data, b2->data + co[2].buffer_offset, - co[2].data_len); - clib_memcpy (co[3].data, b3->data + co[3].buffer_offset, - co[3].data_len); + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset, + co[1].data_len); + clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset, + co[2].data_len); + clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset, + co[3].data_len); co += 4; n_copy_op -= 4; @@ -269,8 +254,8 @@ no_free_slots: while (n_copy_op) { b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); - clib_memcpy (co[0].data, b0->data + co[0].buffer_offset, - co[0].data_len); + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); co += 1; n_copy_op -= 1; } @@ -302,27 +287,152 @@ no_free_slots: mq->int_count++; } - vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); return frame->n_vectors; } -uword -CLIB_MULTIARCH_FN (memif_interface_tx) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +static_always_inline uword +memif_interface_tx_zc_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, memif_if_t * mif, + memif_queue_t * mq, + memif_per_thread_data_t * ptd) +{ + memif_ring_t *ring = mq->ring; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + u16 slot, free_slots, n_free; + u16 ring_size = 1 << mq->log2_ring_size; + u16 mask = ring_size - 1; + int n_retries = 5; + vlib_buffer_t *b0; + +retry: + n_free = ring->tail - mq->last_tail; + if (n_free >= 16) + { + vlib_buffer_free_from_ring_no_next (vm, mq->buffers, + mq->last_tail & mask, + ring_size, n_free); + mq->last_tail += n_free; + } + + slot = ring->head; + free_slots = ring_size - ring->head + mq->last_tail; + + while (n_left && free_slots) + { + u16 s0; + u16 slots_in_packet = 1; + memif_desc_t *d0; + u32 bi0; + + CLIB_PREFETCH (&ring->desc[(slot + 8) & mask], CLIB_CACHE_LINE_BYTES, + STORE); + + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + + bi0 = buffers[0]; + + next_in_chain: + s0 = slot & mask; + d0 = &ring->desc[s0]; + mq->buffers[s0] = bi0; + b0 = vlib_get_buffer (vm, bi0); + + d0->region = b0->buffer_pool_index + 1; + d0->offset = (void *) b0->data + b0->current_data - + mif->regions[d0->region].shm; + d0->length = b0->current_length; + + free_slots--; + slot++; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + if (PREDICT_FALSE (free_slots == 0)) + { + /* revert to last fully processed packet */ + free_slots += slots_in_packet; + slot -= slots_in_packet; + goto no_free_slots; + } + + d0->flags = MEMIF_DESC_FLAG_NEXT; + bi0 = b0->next_buffer; + + /* next */ + slots_in_packet++; + goto next_in_chain; + } + + d0->flags = 0; + + /* next from */ + buffers++; + n_left--; + } +no_free_slots: + + CLIB_MEMORY_STORE_BARRIER (); + ring->head = slot; + + if (n_left && n_retries--) + goto retry; + + clib_spinlock_unlock_if_init (&mif->lockp); + + if (n_left) + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS, + n_left); + vlib_buffer_free (vm, buffers, n_left); + } + + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) + { + u64 b = 1; + CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b)); + mq->int_count++; + } + + return frame->n_vectors; +} + +VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { memif_main_t *nm = &memif_main; vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance); + memif_queue_t *mq; + u32 thread_index = vm->thread_index; + memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data, + thread_index); + u8 tx_queues = vec_len (mif->tx_queues); - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M); + if (tx_queues < vec_len (vlib_mains)) + { + ASSERT (tx_queues > 0); + mq = vec_elt_at_index (mif->tx_queues, thread_index % tx_queues); + clib_spinlock_lock_if_init (&mif->lockp); + } + else + mq = vec_elt_at_index (mif->tx_queues, thread_index); + + if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY) + return memif_interface_tx_zc_inline (vm, node, frame, mif, mq, ptd); + else if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M, + mq, ptd); else - return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S); + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S, + mq, ptd); } -static __clib_unused void +static void memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, u32 node_index) { @@ -341,13 +451,13 @@ memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, vlib_node_add_next (vlib_get_main (), memif_input_node.index, node_index); } -static __clib_unused void +static void memif_clear_hw_interface_counters (u32 instance) { /* Nothing for now */ } -static __clib_unused clib_error_t * +static clib_error_t * memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, vnet_hw_interface_rx_mode mode) { @@ -364,7 +474,7 @@ memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, return 0; } -static __clib_unused clib_error_t * +static clib_error_t * memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { memif_main_t *mm = &memif_main; @@ -380,7 +490,7 @@ memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return error; } -static __clib_unused clib_error_t * +static clib_error_t * memif_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index, struct vnet_sw_interface_t *st, int is_add) @@ -389,11 +499,9 @@ memif_subif_add_del_function (vnet_main_t * vnm, return 0; } -#ifndef CLIB_MULTIARCH_VARIANT /* *INDENT-OFF* */ VNET_DEVICE_CLASS (memif_device_class) = { .name = "memif", - .tx_function = memif_interface_tx, .format_device_name = format_memif_device_name, .format_device = format_memif_device, .format_tx_trace = format_memif_tx_trace, @@ -406,20 +514,6 @@ VNET_DEVICE_CLASS (memif_device_class) = { .rx_mode_change_function = memif_interface_rx_mode_change, }; -#if __x86_64__ -vlib_node_function_t __clib_weak memif_interface_tx_avx512; -vlib_node_function_t __clib_weak memif_interface_tx_avx2; -static void __clib_constructor -dpdk_interface_tx_multiarch_select (void) -{ - if (memif_interface_tx_avx512 && clib_cpu_supports_avx512f ()) - memif_device_class.tx_function = memif_interface_tx_avx512; - else if (memif_interface_tx_avx2 && clib_cpu_supports_avx2 ()) - memif_device_class.tx_function = memif_interface_tx_avx2; -} -#endif -#endif - /* *INDENT-ON* */ /*