2 *------------------------------------------------------------------
3 * Copyright (c) 2016 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
21 #include <sys/ioctl.h>
24 #include <vlib/vlib.h>
25 #include <vlib/unix/unix.h>
26 #include <vnet/ethernet/ethernet.h>
28 #include <memif/memif.h>
29 #include <memif/private.h>
31 #define foreach_memif_tx_func_error \
32 _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \
33 _ (ROLLBACK, rollback, ERROR, "no enough space in tx buffers")
37 #define _(f, n, s, d) MEMIF_TX_ERROR_##f,
38 foreach_memif_tx_func_error
41 } memif_tx_func_error_t;
43 static vlib_error_desc_t memif_tx_func_error_counters[] = {
44 #define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
45 foreach_memif_tx_func_error
49 #ifndef CLIB_MARCH_VARIANT
51 format_memif_device_name (u8 * s, va_list * args)
53 u32 dev_instance = va_arg (*args, u32);
54 memif_main_t *mm = &memif_main;
55 memif_if_t *mif = pool_elt_at_index (mm->interfaces, dev_instance);
56 memif_socket_file_t *msf;
58 msf = pool_elt_at_index (mm->socket_files, mif->socket_file_index);
59 s = format (s, "memif%lu/%lu", msf->socket_id, mif->id);
65 format_memif_device (u8 * s, va_list * args)
67 u32 dev_instance = va_arg (*args, u32);
68 int verbose = va_arg (*args, int);
69 u32 indent = format_get_indent (s);
71 s = format (s, "MEMIF interface");
74 s = format (s, "\n%U instance %u", format_white_space, indent + 2,
81 format_memif_tx_trace (u8 * s, va_list * args)
83 s = format (s, "Unimplemented...");
87 static_always_inline void
88 memif_add_copy_op (memif_per_thread_data_t * ptd, void *data, u32 len,
89 u16 buffer_offset, u16 buffer_vec_index)
92 vec_add2_aligned (ptd->copy_ops, co, 1, CLIB_CACHE_LINE_BYTES);
95 co->buffer_offset = buffer_offset;
96 co->buffer_vec_index = buffer_vec_index;
99 static_always_inline uword
100 memif_interface_tx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
101 u32 *buffers, memif_if_t *mif,
102 memif_ring_type_t type, memif_queue_t *mq,
103 memif_per_thread_data_t *ptd, u32 n_left)
107 u16 ring_size, mask, slot, free_slots;
109 vlib_buffer_t *b0, *b1, *b2, *b3;
111 memif_region_index_t last_region = ~0;
112 void *last_region_shm = 0;
116 ring_size = 1 << mq->log2_ring_size;
117 mask = ring_size - 1;
121 if (type == MEMIF_RING_S2M)
123 slot = head = ring->head;
124 tail = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
125 mq->last_tail += tail - mq->last_tail;
126 free_slots = ring_size - head + mq->last_tail;
130 slot = tail = ring->tail;
131 head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
132 mq->last_tail += tail - mq->last_tail;
133 free_slots = head - tail;
136 while (n_left && free_slots)
141 u32 bi0, dst_off, src_left, dst_left, bytes_to_copy;
142 u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops);
143 u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
144 u16 saved_slot = slot;
146 clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
148 d0 = &ring->desc[slot & mask];
149 if (PREDICT_FALSE (last_region != d0->region))
151 last_region_shm = mif->regions[d0->region].shm;
152 last_region = d0->region;
154 mb0 = last_region_shm + d0->offset;
158 /* slave is the producer, so it should be able to reset buffer length */
159 dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size : d0->length;
161 if (PREDICT_TRUE (n_left >= 4))
162 vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
167 b0 = vlib_get_buffer (vm, bi0);
168 src_off = b0->current_data;
169 src_left = b0->current_length;
173 if (PREDICT_FALSE (dst_left == 0))
179 d0->length = dst_off;
180 d0->flags = MEMIF_DESC_FLAG_NEXT;
181 d0 = &ring->desc[slot & mask];
185 MEMIF_RING_S2M) ? mif->run.buffer_size : d0->length;
187 if (PREDICT_FALSE (last_region != d0->region))
189 last_region_shm = mif->regions[d0->region].shm;
190 last_region = d0->region;
192 mb0 = last_region_shm + d0->offset;
196 /* we need to rollback vectors before bailing out */
197 vec_set_len (ptd->buffers, saved_ptd_buffers_len);
198 vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
199 vlib_error_count (vm, node->node_index,
200 MEMIF_TX_ERROR_ROLLBACK, 1);
205 bytes_to_copy = clib_min (src_left, dst_left);
206 memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off,
207 vec_len (ptd->buffers));
208 vec_add1_aligned (ptd->buffers, bi0, CLIB_CACHE_LINE_BYTES);
209 src_off += bytes_to_copy;
210 dst_off += bytes_to_copy;
211 src_left -= bytes_to_copy;
212 dst_left -= bytes_to_copy;
215 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
217 bi0 = b0->next_buffer;
221 d0->length = dst_off;
233 n_copy_op = vec_len (ptd->copy_ops);
235 while (n_copy_op >= 8)
237 clib_prefetch_load (co[4].data);
238 clib_prefetch_load (co[5].data);
239 clib_prefetch_load (co[6].data);
240 clib_prefetch_load (co[7].data);
242 b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
243 b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
244 b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
245 b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
247 clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
249 clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset,
251 clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset,
253 clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset,
261 b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
262 clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
268 vec_reset_length (ptd->copy_ops);
269 vec_reset_length (ptd->buffers);
271 if (type == MEMIF_RING_S2M)
272 __atomic_store_n (&ring->head, slot, __ATOMIC_RELEASE);
274 __atomic_store_n (&ring->tail, slot, __ATOMIC_RELEASE);
276 if (n_left && n_retries--)
282 static_always_inline uword
283 memif_interface_tx_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
284 u32 *buffers, memif_if_t *mif, memif_queue_t *mq,
285 memif_per_thread_data_t *ptd, u32 n_left)
287 memif_ring_t *ring = mq->ring;
288 u16 slot, free_slots, n_free;
289 u16 ring_size = 1 << mq->log2_ring_size;
290 u16 mask = ring_size - 1;
296 tail = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
297 slot = head = ring->head;
299 n_free = tail - mq->last_tail;
302 vlib_buffer_free_from_ring_no_next (vm, mq->buffers,
303 mq->last_tail & mask,
305 mq->last_tail += n_free;
308 free_slots = ring_size - head + mq->last_tail;
310 while (n_left && free_slots)
313 u16 slots_in_packet = 1;
317 clib_prefetch_store (&ring->desc[(slot + 8) & mask]);
319 if (PREDICT_TRUE (n_left >= 4))
320 vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
326 d0 = &ring->desc[s0];
327 mq->buffers[s0] = bi0;
328 b0 = vlib_get_buffer (vm, bi0);
330 d0->region = b0->buffer_pool_index + 1;
331 d0->offset = (void *) b0->data + b0->current_data -
332 mif->regions[d0->region].shm;
333 d0->length = b0->current_length;
338 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
340 if (PREDICT_FALSE (free_slots == 0))
342 /* revert to last fully processed packet */
343 free_slots += slots_in_packet;
344 slot -= slots_in_packet;
348 d0->flags = MEMIF_DESC_FLAG_NEXT;
349 bi0 = b0->next_buffer;
364 __atomic_store_n (&ring->head, slot, __ATOMIC_RELEASE);
366 if (n_left && n_retries--)
372 CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm,
375 memif_main_t *mm = &memif_main;
376 memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
377 memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff);
378 memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head;
379 memif_per_thread_data_t *ptd = &dma_info->data;
381 vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
383 dma_info->finished = 1;
384 vec_reset_length (ptd->buffers);
385 vec_reset_length (ptd->copy_ops);
387 __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE);
390 if (mq->dma_info_head == mq->dma_info_size)
391 mq->dma_info_head = 0;
392 mq->dma_info_full = 0;
395 #ifndef CLIB_MARCH_VARIANT
397 memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b)
399 return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b);
403 static_always_inline uword
404 memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
405 u32 *buffers, memif_if_t *mif,
406 memif_ring_type_t type, memif_queue_t *mq,
411 u16 ring_size, mask, slot, free_slots;
412 int n_retries = 5, fallback = 0;
413 vlib_buffer_t *b0, *b1, *b2, *b3;
415 memif_region_index_t last_region = ~0;
416 void *last_region_shm = 0;
418 memif_dma_info_t *dma_info;
419 memif_per_thread_data_t *ptd;
420 memif_main_t *mm = &memif_main;
421 u16 mif_id = mif - mm->interfaces;
424 ring_size = 1 << mq->log2_ring_size;
425 mask = ring_size - 1;
427 dma_info = mq->dma_info + mq->dma_info_tail;
428 ptd = &dma_info->data;
430 /* do software fallback if dma info ring is full */
431 u16 dma_mask = mq->dma_info_size - 1;
432 if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) ||
433 ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0)))
435 if (!mq->dma_info_full)
436 mq->dma_info_full = 1;
441 vlib_dma_batch_t *b = NULL;
442 if (PREDICT_TRUE (!fallback))
443 b = vlib_dma_batch_new (vm, mif->dma_tx_config);
449 slot = tail = mq->dma_tail;
450 head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
451 mq->last_tail += tail - mq->last_tail;
452 free_slots = head - mq->dma_tail;
454 while (n_left && free_slots)
459 u32 bi0, dst_off, src_left, dst_left, bytes_to_copy;
460 u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops);
461 u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
462 u16 saved_slot = slot;
464 clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
466 d0 = &ring->desc[slot & mask];
467 if (PREDICT_FALSE (last_region != d0->region))
469 last_region_shm = mif->regions[d0->region].shm;
470 last_region = d0->region;
472 mb0 = last_region_shm + d0->offset;
476 /* slave is the producer, so it should be able to reset buffer length */
477 dst_left = d0->length;
479 if (PREDICT_TRUE (n_left >= 4))
480 vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
485 b0 = vlib_get_buffer (vm, bi0);
486 src_off = b0->current_data;
487 src_left = b0->current_length;
491 if (PREDICT_FALSE (dst_left == 0))
495 d0->length = dst_off;
496 d0->flags = MEMIF_DESC_FLAG_NEXT;
497 d0 = &ring->desc[slot & mask];
499 dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size :
502 if (PREDICT_FALSE (last_region != d0->region))
504 last_region_shm = mif->regions[d0->region].shm;
505 last_region = d0->region;
507 mb0 = last_region_shm + d0->offset;
511 /* we need to rollback vectors before bailing out */
512 vec_set_len (ptd->buffers, saved_ptd_buffers_len);
513 vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
514 vlib_error_count (vm, node->node_index,
515 MEMIF_TX_ERROR_ROLLBACK, 1);
520 bytes_to_copy = clib_min (src_left, dst_left);
521 memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off,
522 vec_len (ptd->buffers));
523 src_off += bytes_to_copy;
524 dst_off += bytes_to_copy;
525 src_left -= bytes_to_copy;
526 dst_left -= bytes_to_copy;
529 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
533 bi0 = b0->next_buffer;
537 vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES);
538 d0->length = dst_off;
550 n_copy_op = vec_len (ptd->copy_ops);
552 while (n_copy_op >= 8)
554 clib_prefetch_load (co[4].data);
555 clib_prefetch_load (co[5].data);
556 clib_prefetch_load (co[6].data);
557 clib_prefetch_load (co[7].data);
559 b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
560 b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
561 b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
562 b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
564 if (PREDICT_TRUE (!fallback))
566 vlib_dma_batch_add (vm, b, co[0].data,
567 b0->data + co[0].buffer_offset, co[0].data_len);
568 vlib_dma_batch_add (vm, b, co[1].data,
569 b1->data + co[1].buffer_offset, co[1].data_len);
570 vlib_dma_batch_add (vm, b, co[2].data,
571 b2->data + co[2].buffer_offset, co[2].data_len);
572 vlib_dma_batch_add (vm, b, co[3].data,
573 b3->data + co[3].buffer_offset, co[3].data_len);
577 clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
579 clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset,
581 clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset,
583 clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset,
592 b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
593 if (PREDICT_TRUE (!fallback))
594 vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset,
597 clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
603 /* save dma info before retry */
604 dma_info->dma_tail = slot;
606 vec_reset_length (ptd->copy_ops);
608 if (n_left && n_retries--)
611 if (PREDICT_TRUE (!fallback))
613 vlib_dma_batch_set_cookie (vm, b,
614 ((u64) mif_id << 16) | (mq - mif->tx_queues));
615 vlib_dma_batch_submit (vm, b);
616 dma_info->finished = 0;
621 if (mq->dma_info_tail == mq->dma_info_size)
622 mq->dma_info_tail = 0;
625 else if (fallback && dma_info->finished)
627 /* if dma has been completed, update ring immediately */
628 vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
629 vec_reset_length (ptd->buffers);
630 __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE);
636 VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
637 vlib_node_runtime_t * node,
638 vlib_frame_t * frame)
640 memif_main_t *nm = &memif_main;
641 vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
642 memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
643 vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
645 u32 qid = tf->queue_id;
646 u32 *from, thread_index = vm->thread_index;
647 memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
651 ASSERT (vec_len (mif->tx_queues) > qid);
652 mq = vec_elt_at_index (mif->tx_queues, qid);
654 if (tf->shared_queue)
655 clib_spinlock_lock (&mq->lockp);
657 from = vlib_frame_vector_args (frame);
658 n_left = frame->n_vectors;
659 if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY)
661 memif_interface_tx_zc_inline (vm, node, from, mif, mq, ptd, n_left);
662 else if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
663 n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M,
667 if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
668 n_left = memif_interface_tx_dma_inline (vm, node, from, mif,
669 MEMIF_RING_M2S, mq, n_left);
671 n_left = memif_interface_tx_inline (vm, node, from, mif,
672 MEMIF_RING_M2S, mq, ptd, n_left);
675 if (tf->shared_queue)
676 clib_spinlock_unlock (&mq->lockp);
679 vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
682 if ((mq->ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
685 int __clib_unused r = write (mq->int_fd, &b, sizeof (b));
689 if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
692 vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
694 else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
695 vlib_buffer_free (vm, from, frame->n_vectors);
697 vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
699 return frame->n_vectors - n_left;
703 memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
706 memif_main_t *apm = &memif_main;
707 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
708 memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
710 /* Shut off redirection */
711 if (node_index == ~0)
713 mif->per_interface_next_index = node_index;
717 mif->per_interface_next_index =
718 vlib_node_add_next (vlib_get_main (), memif_input_node.index, node_index);
722 memif_clear_hw_interface_counters (u32 instance)
724 /* Nothing for now */
727 static clib_error_t *
728 memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
729 vnet_hw_if_rx_mode mode)
731 memif_main_t *mm = &memif_main;
732 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
733 memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
734 memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
736 if (mode == VNET_HW_IF_RX_MODE_POLLING)
737 mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT;
739 mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT;
745 VNET_DEVICE_CLASS (memif_device_class) = {
747 .format_device_name = format_memif_device_name,
748 .format_device = format_memif_device,
749 .format_tx_trace = format_memif_tx_trace,
750 .tx_function_n_errors = MEMIF_TX_N_ERROR,
751 .tx_function_error_counters = memif_tx_func_error_counters,
752 .rx_redirect_to_node = memif_set_interface_next_node,
753 .clear_counters = memif_clear_hw_interface_counters,
754 .admin_up_down_function = memif_interface_admin_up_down,
755 .rx_mode_change_function = memif_interface_rx_mode_change,
761 * fd.io coding-style-patch-verification: ON
764 * eval: (c-set-style "gnu")