X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fmemif%2Fdevice.c;h=28bf8259311ca0220da8f6f3c3bc60045d591c8c;hb=refs%2Fchanges%2F16%2F15916%2F4;hp=70bdb48ba9c3971ba6e16317470053f2bcd176d3;hpb=c966ecd11ccb4e2ad8e92385a4935c8fd4586a2d;p=vpp.git diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index 70bdb48ba9c..28bf8259311 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -26,10 +26,11 @@ #include #include +#include #define foreach_memif_tx_func_error \ _(NO_FREE_SLOTS, "no free tx slots") \ -_(PENDING_MSGS, "pending msgs in tx ring") +_(ROLLBACK, "no enough space in tx buffers") typedef enum { @@ -45,22 +46,27 @@ static char *memif_tx_func_error_strings[] = { #undef _ }; - -static u8 * +#ifndef CLIB_MARCH_VARIANT +u8 * format_memif_device_name (u8 * s, va_list * args) { - u32 i = va_arg (*args, u32); + u32 dev_instance = va_arg (*args, u32); + memif_main_t *mm = &memif_main; + memif_if_t *mif = pool_elt_at_index (mm->interfaces, dev_instance); + memif_socket_file_t *msf; - s = format (s, "memif%u", i); + msf = pool_elt_at_index (mm->socket_files, mif->socket_file_index); + s = format (s, "memif%lu/%lu", msf->socket_id, mif->id); return s; } +#endif static u8 * format_memif_device (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); int verbose = va_arg (*args, int); - uword indent = format_get_indent (s); + u32 indent = format_get_indent (s); s = format (s, "MEMIF interface"); if (verbose) @@ -79,119 +85,301 @@ format_memif_tx_trace (u8 * s, va_list * args) } static_always_inline void -memif_prefetch_buffer_and_data (vlib_main_t * vm, u32 bi) +memif_add_copy_op (memif_per_thread_data_t * ptd, void *data, u32 len, + u16 buffer_offset, u16 buffer_vec_index) { - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - vlib_prefetch_buffer_header (b, LOAD); - CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD); + memif_copy_op_t *co; + vec_add2_aligned (ptd->copy_ops, co, 1, CLIB_CACHE_LINE_BYTES); + co->data = data; + co->data_len = len; + co->buffer_offset = buffer_offset; + co->buffer_vec_index = buffer_vec_index; } static_always_inline uword memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, - memif_ring_type_t type) + memif_ring_type_t type, memif_queue_t * mq, + memif_per_thread_data_t * ptd) { - u8 rid = 0; - memif_ring_t *ring = memif_get_ring (mif, type, rid); - u32 *buffers = vlib_frame_args (frame); + memif_ring_t *ring; + u32 *buffers = vlib_frame_vector_args (frame); u32 n_left = frame->n_vectors; - u16 ring_size = 1 << mif->log2_ring_size; - u16 mask = ring_size - 1; - u16 head, tail; - u16 free_slots; - - clib_spinlock_lock_if_init (&mif->lockp); - - /* free consumed buffers */ - - head = ring->head; - tail = ring->tail; - - if (tail > head) - free_slots = tail - head; + u32 n_copy_op; + u16 ring_size, mask, slot, free_slots; + int n_retries = 5; + vlib_buffer_t *b0, *b1, *b2, *b3; + memif_copy_op_t *co; + memif_region_index_t last_region = ~0; + void *last_region_shm = 0; + + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + +retry: + + free_slots = ring->tail - mq->last_tail; + mq->last_tail += free_slots; + slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail; + + if (type == MEMIF_RING_S2M) + free_slots = ring_size - ring->head + mq->last_tail; else - free_slots = ring_size - head + tail; + free_slots = ring->head - ring->tail; - while (n_left > 5 && free_slots > 1) + while (n_left && free_slots) { - if (PREDICT_TRUE (head + 5 < ring_size)) - { - CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 2), - CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 3), - CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&ring->desc[head + 4], CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&ring->desc[head + 5], CLIB_CACHE_LINE_BYTES, STORE); - } - else + memif_desc_t *d0; + void *mb0; + i32 src_off; + u32 bi0, dst_off, src_left, dst_left, bytes_to_copy; + u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops); + u32 saved_ptd_buffers_len = _vec_len (ptd->buffers); + u16 saved_slot = slot; + + CLIB_PREFETCH (&ring->desc[(slot + 8) & mask], CLIB_CACHE_LINE_BYTES, + LOAD); + + d0 = &ring->desc[slot & mask]; + if (PREDICT_FALSE (last_region != d0->region)) { - CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 2) % mask), - CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 3) % mask), - CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&ring->desc[(head + 4) % mask], - CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&ring->desc[(head + 5) % mask], - CLIB_CACHE_LINE_BYTES, STORE); + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; } + mb0 = last_region_shm + d0->offset; - memif_prefetch_buffer_and_data (vm, buffers[2]); - memif_prefetch_buffer_and_data (vm, buffers[3]); + dst_off = 0; - vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]); - vlib_buffer_t *b1 = vlib_get_buffer (vm, buffers[1]); + /* slave is the producer, so it should be able to reset buffer length */ + dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size : d0->length; - void *mb0 = memif_get_buffer (mif, ring, head); - clib_memcpy (mb0, vlib_buffer_get_current (b0), CLIB_CACHE_LINE_BYTES); - ring->desc[head].length = b0->current_length; - head = (head + 1) & mask; + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + bi0 = buffers[0]; - void *mb1 = memif_get_buffer (mif, ring, head); - clib_memcpy (mb1, vlib_buffer_get_current (b1), CLIB_CACHE_LINE_BYTES); - ring->desc[head].length = b1->current_length; - head = (head + 1) & mask; + next_in_chain: - if (b0->current_length > CLIB_CACHE_LINE_BYTES) + b0 = vlib_get_buffer (vm, bi0); + src_off = b0->current_data; + src_left = b0->current_length; + + while (src_left) { - clib_memcpy (mb0 + CLIB_CACHE_LINE_BYTES, - vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, - b0->current_length - CLIB_CACHE_LINE_BYTES); + if (PREDICT_FALSE (dst_left == 0)) + { + if (free_slots) + { + slot++; + free_slots--; + d0->flags = MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_left = + (type == + MEMIF_RING_S2M) ? mif->run.buffer_size : d0->length; + + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + } + else + { + /* we need to rollback vectors before bailing out */ + _vec_len (ptd->buffers) = saved_ptd_buffers_len; + _vec_len (ptd->copy_ops) = saved_ptd_copy_ops_len; + vlib_error_count (vm, node->node_index, + MEMIF_TX_ERROR_ROLLBACK, 1); + slot = saved_slot; + goto no_free_slots; + } + } + bytes_to_copy = clib_min (src_left, dst_left); + memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off, + vec_len (ptd->buffers)); + vec_add1_aligned (ptd->buffers, bi0, CLIB_CACHE_LINE_BYTES); + src_off += bytes_to_copy; + dst_off += bytes_to_copy; + src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; } - if (b1->current_length > CLIB_CACHE_LINE_BYTES) + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) { - clib_memcpy (mb1 + CLIB_CACHE_LINE_BYTES, - vlib_buffer_get_current (b1) + CLIB_CACHE_LINE_BYTES, - b1->current_length - CLIB_CACHE_LINE_BYTES); + bi0 = b0->next_buffer; + goto next_in_chain; } + d0->length = dst_off; + d0->flags = 0; + + free_slots -= 1; + slot += 1; + + buffers++; + n_left--; + } +no_free_slots: + + /* copy data */ + n_copy_op = vec_len (ptd->copy_ops); + co = ptd->copy_ops; + while (n_copy_op >= 8) + { + CLIB_PREFETCH (co[4].data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (co[5].data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (co[6].data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (co[7].data, CLIB_CACHE_LINE_BYTES, LOAD); + + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]); + b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); + b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); + + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset, + co[1].data_len); + clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset, + co[2].data_len); + clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset, + co[3].data_len); + + co += 4; + n_copy_op -= 4; + } + while (n_copy_op) + { + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + co += 1; + n_copy_op -= 1; + } + + vec_reset_length (ptd->copy_ops); + vec_reset_length (ptd->buffers); + + CLIB_MEMORY_STORE_BARRIER (); + if (type == MEMIF_RING_S2M) + ring->head = slot; + else + ring->tail = slot; + + if (n_left && n_retries--) + goto retry; + + clib_spinlock_unlock_if_init (&mif->lockp); + + if (n_left) + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS, + n_left); + } - buffers += 2; - n_left -= 2; - free_slots -= 2; + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) + { + u64 b = 1; + CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b)); + mq->int_count++; } + vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); + + return frame->n_vectors; +} + +static_always_inline uword +memif_interface_tx_zc_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, memif_if_t * mif, + memif_queue_t * mq, + memif_per_thread_data_t * ptd) +{ + memif_ring_t *ring = mq->ring; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + u16 slot, free_slots, n_free; + u16 ring_size = 1 << mq->log2_ring_size; + u16 mask = ring_size - 1; + int n_retries = 5; + vlib_buffer_t *b0; + +retry: + n_free = ring->tail - mq->last_tail; + if (n_free >= 16) + { + vlib_buffer_free_from_ring_no_next (vm, mq->buffers, + mq->last_tail & mask, + ring_size, n_free); + mq->last_tail += n_free; + } + + slot = ring->head; + free_slots = ring_size - ring->head + mq->last_tail; + while (n_left && free_slots) { - vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]); - void *mb0 = memif_get_buffer (mif, ring, head); - clib_memcpy (mb0, vlib_buffer_get_current (b0), CLIB_CACHE_LINE_BYTES); + u16 s0; + u16 slots_in_packet = 1; + memif_desc_t *d0; + u32 bi0; + + CLIB_PREFETCH (&ring->desc[(slot + 8) & mask], CLIB_CACHE_LINE_BYTES, + STORE); + + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + + bi0 = buffers[0]; - if (b0->current_length > CLIB_CACHE_LINE_BYTES) + next_in_chain: + s0 = slot & mask; + d0 = &ring->desc[s0]; + mq->buffers[s0] = bi0; + b0 = vlib_get_buffer (vm, bi0); + + d0->region = b0->buffer_pool_index + 1; + d0->offset = (void *) b0->data + b0->current_data - + mif->regions[d0->region].shm; + d0->length = b0->current_length; + + free_slots--; + slot++; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) { - clib_memcpy (mb0 + CLIB_CACHE_LINE_BYTES, - vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, - b0->current_length - CLIB_CACHE_LINE_BYTES); + if (PREDICT_FALSE (free_slots == 0)) + { + /* revert to last fully processed packet */ + free_slots += slots_in_packet; + slot -= slots_in_packet; + goto no_free_slots; + } + + d0->flags = MEMIF_DESC_FLAG_NEXT; + bi0 = b0->next_buffer; + + /* next */ + slots_in_packet++; + goto next_in_chain; } - ring->desc[head].length = b0->current_length; - head = (head + 1) & mask; + d0->flags = 0; + + /* next from */ buffers++; n_left--; - free_slots--; } +no_free_slots: CLIB_MEMORY_STORE_BARRIER (); - ring->head = head; + ring->head = slot; + + if (n_left && n_retries--) + goto retry; clib_spinlock_unlock_if_init (&mif->lockp); @@ -202,28 +390,46 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_free (vm, buffers, n_left); } - vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); - if (mif->interrupt_line.fd > 0) + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) { - u8 b = rid; - CLIB_UNUSED (int r) = write (mif->interrupt_line.fd, &b, sizeof (b)); + u64 b = 1; + CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b)); + mq->int_count++; } return frame->n_vectors; } -static uword -memif_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { memif_main_t *nm = &memif_main; vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance); + memif_queue_t *mq; + u32 thread_index = vm->thread_index; + memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data, + thread_index); + u8 tx_queues = vec_len (mif->tx_queues); - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M); + if (tx_queues < vec_len (vlib_mains)) + { + ASSERT (tx_queues > 0); + mq = vec_elt_at_index (mif->tx_queues, thread_index % tx_queues); + clib_spinlock_lock_if_init (&mif->lockp); + } + else + mq = vec_elt_at_index (mif->tx_queues, thread_index); + + if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY) + return memif_interface_tx_zc_inline (vm, node, frame, mif, mq, ptd); + else if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M, + mq, ptd); else - return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S); + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S, + mq, ptd); } static void @@ -251,35 +457,35 @@ memif_clear_hw_interface_counters (u32 instance) /* Nothing for now */ } +static clib_error_t * +memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, + vnet_hw_interface_rx_mode mode) +{ + memif_main_t *mm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT; + else + mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT; + + return 0; +} + static clib_error_t * memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { - memif_main_t *apm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - memif_msg_t msg = { 0 }; + memif_main_t *mm = &memif_main; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); static clib_error_t *error = 0; if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; else - { - mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; - if (!(mif->flags & MEMIF_IF_FLAG_DELETING) - && mif->connection.index != ~0) - { - msg.version = MEMIF_VERSION; - msg.type = MEMIF_MSG_TYPE_DISCONNECT; - if (send (mif->connection.fd, &msg, sizeof (msg), 0) < 0) - { - clib_unix_warning ("Failed to send disconnect request"); - error = clib_error_return_unix (0, "send fd %d", - mif->connection.fd); - memif_disconnect (vm, mif); - } - } - } + mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; return error; } @@ -296,7 +502,6 @@ memif_subif_add_del_function (vnet_main_t * vnm, /* *INDENT-OFF* */ VNET_DEVICE_CLASS (memif_device_class) = { .name = "memif", - .tx_function = memif_interface_tx, .format_device_name = format_memif_device_name, .format_device = format_memif_device, .format_tx_trace = format_memif_tx_trace, @@ -306,10 +511,9 @@ VNET_DEVICE_CLASS (memif_device_class) = { .clear_counters = memif_clear_hw_interface_counters, .admin_up_down_function = memif_interface_admin_up_down, .subif_add_del_function = memif_subif_add_del_function, + .rx_mode_change_function = memif_interface_rx_mode_change, }; -VLIB_DEVICE_TX_FUNCTION_MULTIARCH(memif_device_class, - memif_interface_tx) /* *INDENT-ON* */ /*