#include <memif/memif.h>
#include <memif/private.h>
-#define foreach_memif_tx_func_error \
-_(NO_FREE_SLOTS, "no free tx slots") \
-_(ROLLBACK, "no enough space in tx buffers")
+#define foreach_memif_tx_func_error \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \
+ _ (ROLLBACK, rollback, ERROR, "no enough space in tx buffers")
typedef enum
{
-#define _(f,s) MEMIF_TX_ERROR_##f,
+#define _(f, n, s, d) MEMIF_TX_ERROR_##f,
foreach_memif_tx_func_error
#undef _
MEMIF_TX_N_ERROR,
} memif_tx_func_error_t;
-static __clib_unused char *memif_tx_func_error_strings[] = {
-#define _(n,s) s,
+static vlib_error_desc_t memif_tx_func_error_counters[] = {
+#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
foreach_memif_tx_func_error
#undef _
};
-#ifndef CLIB_MULTIARCH_VARIANT
+#ifndef CLIB_MARCH_VARIANT
u8 *
format_memif_device_name (u8 * s, va_list * args)
{
}
#endif
-static __clib_unused u8 *
+static u8 *
format_memif_device (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
return s;
}
-static __clib_unused u8 *
+static u8 *
format_memif_tx_trace (u8 * s, va_list * args)
{
s = format (s, "Unimplemented...");
}
static_always_inline uword
-memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, memif_if_t * mif,
- memif_ring_type_t type)
+memif_interface_tx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *buffers, memif_if_t *mif,
+ memif_ring_type_t type, memif_queue_t *mq,
+ memif_per_thread_data_t *ptd, u32 n_left)
{
- u8 qid;
memif_ring_t *ring;
- u32 *buffers = vlib_frame_args (frame);
- u32 n_left = frame->n_vectors;
u32 n_copy_op;
u16 ring_size, mask, slot, free_slots;
- u32 thread_index = vlib_get_thread_index ();
- memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
- thread_index);
- u8 tx_queues = vec_len (mif->tx_queues);
- memif_queue_t *mq;
int n_retries = 5;
vlib_buffer_t *b0, *b1, *b2, *b3;
memif_copy_op_t *co;
memif_region_index_t last_region = ~0;
void *last_region_shm = 0;
+ u16 head, tail;
- if (tx_queues < vec_len (vlib_mains))
- {
- ASSERT (tx_queues > 0);
- qid = thread_index % tx_queues;
- clib_spinlock_lock_if_init (&mif->lockp);
- }
- else
- qid = thread_index;
-
- mq = vec_elt_at_index (mif->tx_queues, qid);
ring = mq->ring;
ring_size = 1 << mq->log2_ring_size;
mask = ring_size - 1;
retry:
- free_slots = ring->tail - mq->last_tail;
- mq->last_tail += free_slots;
- slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
-
if (type == MEMIF_RING_S2M)
- free_slots = ring_size - ring->head + mq->last_tail;
+ {
+ slot = head = ring->head;
+ tail = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
+ mq->last_tail += tail - mq->last_tail;
+ free_slots = ring_size - head + mq->last_tail;
+ }
else
- free_slots = ring->head - ring->tail;
+ {
+ slot = tail = ring->tail;
+ head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
+ mq->last_tail += tail - mq->last_tail;
+ free_slots = head - tail;
+ }
while (n_left && free_slots)
{
u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
u16 saved_slot = slot;
- CLIB_PREFETCH (&ring->desc[(slot + 8) & mask], CLIB_CACHE_LINE_BYTES,
- LOAD);
+ clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
d0 = &ring->desc[slot & mask];
if (PREDICT_FALSE (last_region != d0->region))
{
slot++;
free_slots--;
+ d0->length = dst_off;
d0->flags = MEMIF_DESC_FLAG_NEXT;
d0 = &ring->desc[slot & mask];
dst_off = 0;
else
{
/* we need to rollback vectors before bailing out */
- _vec_len (ptd->buffers) = saved_ptd_buffers_len;
- _vec_len (ptd->copy_ops) = saved_ptd_copy_ops_len;
+ vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+ vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
vlib_error_count (vm, node->node_index,
MEMIF_TX_ERROR_ROLLBACK, 1);
slot = saved_slot;
co = ptd->copy_ops;
while (n_copy_op >= 8)
{
- CLIB_PREFETCH (co[4].data, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (co[5].data, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (co[6].data, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (co[7].data, CLIB_CACHE_LINE_BYTES, LOAD);
+ clib_prefetch_load (co[4].data);
+ clib_prefetch_load (co[5].data);
+ clib_prefetch_load (co[6].data);
+ clib_prefetch_load (co[7].data);
b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
- clib_memcpy (co[0].data, b0->data + co[0].buffer_offset,
- co[0].data_len);
- clib_memcpy (co[1].data, b1->data + co[1].buffer_offset,
- co[1].data_len);
- clib_memcpy (co[2].data, b2->data + co[2].buffer_offset,
- co[2].data_len);
- clib_memcpy (co[3].data, b3->data + co[3].buffer_offset,
- co[3].data_len);
+ clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset,
+ co[1].data_len);
+ clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset,
+ co[2].data_len);
+ clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset,
+ co[3].data_len);
co += 4;
n_copy_op -= 4;
while (n_copy_op)
{
b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
- clib_memcpy (co[0].data, b0->data + co[0].buffer_offset,
- co[0].data_len);
+ clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
co += 1;
n_copy_op -= 1;
}
vec_reset_length (ptd->copy_ops);
vec_reset_length (ptd->buffers);
- CLIB_MEMORY_STORE_BARRIER ();
if (type == MEMIF_RING_S2M)
- ring->head = slot;
+ __atomic_store_n (&ring->head, slot, __ATOMIC_RELEASE);
else
- ring->tail = slot;
+ __atomic_store_n (&ring->tail, slot, __ATOMIC_RELEASE);
if (n_left && n_retries--)
goto retry;
- clib_spinlock_unlock_if_init (&mif->lockp);
+ return n_left;
+}
- if (n_left)
+static_always_inline uword
+memif_interface_tx_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *buffers, memif_if_t *mif, memif_queue_t *mq,
+ memif_per_thread_data_t *ptd, u32 n_left)
+{
+ memif_ring_t *ring = mq->ring;
+ u16 slot, free_slots, n_free;
+ u16 ring_size = 1 << mq->log2_ring_size;
+ u16 mask = ring_size - 1;
+ int n_retries = 5;
+ vlib_buffer_t *b0;
+ u16 head, tail;
+
+retry:
+ tail = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
+ slot = head = ring->head;
+
+ n_free = tail - mq->last_tail;
+ if (n_free >= 16)
{
- vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
- n_left);
+ vlib_buffer_free_from_ring_no_next (vm, mq->buffers,
+ mq->last_tail & mask,
+ ring_size, n_free);
+ mq->last_tail += n_free;
}
- if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
+ free_slots = ring_size - head + mq->last_tail;
+
+ while (n_left && free_slots)
{
- u64 b = 1;
- CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b));
- mq->int_count++;
+ u16 s0;
+ u16 slots_in_packet = 1;
+ memif_desc_t *d0;
+ u32 bi0;
+
+ clib_prefetch_store (&ring->desc[(slot + 8) & mask]);
+
+ if (PREDICT_TRUE (n_left >= 4))
+ vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
+
+ bi0 = buffers[0];
+
+ next_in_chain:
+ s0 = slot & mask;
+ d0 = &ring->desc[s0];
+ mq->buffers[s0] = bi0;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ d0->region = b0->buffer_pool_index + 1;
+ d0->offset = (void *) b0->data + b0->current_data -
+ mif->regions[d0->region].shm;
+ d0->length = b0->current_length;
+
+ free_slots--;
+ slot++;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ if (PREDICT_FALSE (free_slots == 0))
+ {
+ /* revert to last fully processed packet */
+ free_slots += slots_in_packet;
+ slot -= slots_in_packet;
+ goto no_free_slots;
+ }
+
+ d0->flags = MEMIF_DESC_FLAG_NEXT;
+ bi0 = b0->next_buffer;
+
+ /* next */
+ slots_in_packet++;
+ goto next_in_chain;
+ }
+
+ d0->flags = 0;
+
+ /* next from */
+ buffers++;
+ n_left--;
}
+no_free_slots:
- vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ __atomic_store_n (&ring->head, slot, __ATOMIC_RELEASE);
- return frame->n_vectors;
+ if (n_left && n_retries--)
+ goto retry;
+
+ return n_left;
}
-uword
-CLIB_MULTIARCH_FN (memif_interface_tx) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
memif_main_t *nm = &memif_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ memif_queue_t *mq;
+ u32 qid = tf->queue_id;
+ u32 *from, thread_index = vm->thread_index;
+ memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
+ thread_index);
+ uword n_left;
- if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
- return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M);
+ ASSERT (vec_len (mif->tx_queues) > qid);
+ mq = vec_elt_at_index (mif->tx_queues, qid);
+
+ if (tf->shared_queue)
+ clib_spinlock_lock (&mq->lockp);
+
+ from = vlib_frame_vector_args (frame);
+ n_left = frame->n_vectors;
+ if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY)
+ n_left =
+ memif_interface_tx_zc_inline (vm, node, from, mif, mq, ptd, n_left);
+ else if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
+ n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M,
+ mq, ptd, n_left);
else
- return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S);
+ n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S,
+ mq, ptd, n_left);
+
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&mq->lockp);
+
+ if (n_left)
+ vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
+ n_left);
+
+ if ((mq->ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
+ {
+ u64 b = 1;
+ int __clib_unused r = write (mq->int_fd, &b, sizeof (b));
+ mq->int_count++;
+ }
+
+ if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
+ vlib_buffer_free (vm, from, frame->n_vectors);
+ else if (n_left)
+ vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
+
+ return frame->n_vectors - n_left;
}
-static __clib_unused void
+static void
memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
u32 node_index)
{
vlib_node_add_next (vlib_get_main (), memif_input_node.index, node_index);
}
-static __clib_unused void
+static void
memif_clear_hw_interface_counters (u32 instance)
{
/* Nothing for now */
}
-static __clib_unused clib_error_t *
+static clib_error_t *
memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
- vnet_hw_interface_rx_mode mode)
+ vnet_hw_if_rx_mode mode)
{
memif_main_t *mm = &memif_main;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
- if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ if (mode == VNET_HW_IF_RX_MODE_POLLING)
mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT;
else
mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT;
return 0;
}
-static __clib_unused clib_error_t *
-memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- memif_main_t *mm = &memif_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- static clib_error_t *error = 0;
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- mif->flags |= MEMIF_IF_FLAG_ADMIN_UP;
- else
- mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP;
-
- return error;
-}
-
-static __clib_unused clib_error_t *
+static clib_error_t *
memif_subif_add_del_function (vnet_main_t * vnm,
u32 hw_if_index,
struct vnet_sw_interface_t *st, int is_add)
return 0;
}
-#ifndef CLIB_MULTIARCH_VARIANT
/* *INDENT-OFF* */
VNET_DEVICE_CLASS (memif_device_class) = {
.name = "memif",
- .tx_function = memif_interface_tx,
.format_device_name = format_memif_device_name,
.format_device = format_memif_device,
.format_tx_trace = format_memif_tx_trace,
.tx_function_n_errors = MEMIF_TX_N_ERROR,
- .tx_function_error_strings = memif_tx_func_error_strings,
+ .tx_function_error_counters = memif_tx_func_error_counters,
.rx_redirect_to_node = memif_set_interface_next_node,
.clear_counters = memif_clear_hw_interface_counters,
.admin_up_down_function = memif_interface_admin_up_down,
.rx_mode_change_function = memif_interface_rx_mode_change,
};
-#if __x86_64__
-vlib_node_function_t __clib_weak memif_interface_tx_avx512;
-vlib_node_function_t __clib_weak memif_interface_tx_avx2;
-static void __clib_constructor
-dpdk_interface_tx_multiarch_select (void)
-{
- if (memif_interface_tx_avx512 && clib_cpu_supports_avx512f ())
- memif_device_class.tx_function = memif_interface_tx_avx512;
- else if (memif_interface_tx_avx2 && clib_cpu_supports_avx2 ())
- memif_device_class.tx_function = memif_interface_tx_avx2;
-}
-#endif
-#endif
-
/* *INDENT-ON* */
/*