2 *------------------------------------------------------------------
3 * Copyright (c) 2016 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
18 #include <sys/types.h>
22 #include <vlib/vlib.h>
23 #include <vlib/unix/unix.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ethernet/ethernet.h>
26 #include <vnet/gso/gro_func.h>
27 #include <vnet/gso/hdr_offset_parser.h>
28 #include <vnet/ip/ip4_packet.h>
29 #include <vnet/ip/ip6_packet.h>
30 #include <vnet/tcp/tcp_packet.h>
31 #include <vnet/udp/udp_packet.h>
32 #include <vnet/devices/virtio/virtio.h>
34 #define VIRTIO_TX_MAX_CHAIN_LEN 127
36 #define foreach_virtio_tx_func_error \
37 _(NO_FREE_SLOTS, "no free tx slots") \
38 _(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \
39 _(PENDING_MSGS, "pending msgs in tx ring") \
40 _(INDIRECT_DESC_ALLOC_FAILED, "indirect descriptor allocation failed - packet drop") \
41 _(OUT_OF_ORDER, "out-of-order buffers in used ring") \
42 _(GSO_PACKET_DROP, "gso disabled on itf -- gso packet drop") \
43 _(CSUM_OFFLOAD_PACKET_DROP, "checksum offload disabled on itf -- csum offload packet drop")
47 #define _(f,s) VIRTIO_TX_ERROR_##f,
48 foreach_virtio_tx_func_error
51 } virtio_tx_func_error_t;
53 static char *virtio_tx_func_error_strings[] = {
55 foreach_virtio_tx_func_error
60 format_virtio_device (u8 * s, va_list * args)
62 u32 dev_instance = va_arg (*args, u32);
63 int verbose = va_arg (*args, int);
64 u32 indent = format_get_indent (s);
66 s = format (s, "VIRTIO interface");
69 s = format (s, "\n%U instance %u", format_white_space, indent + 2,
79 generic_header_offset_t gho;
84 format_virtio_tx_trace (u8 * s, va_list * va)
86 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
87 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
88 virtio_tx_trace_t *t = va_arg (*va, virtio_tx_trace_t *);
89 u32 indent = format_get_indent (s);
91 s = format (s, "%Ubuffer 0x%x: %U\n", format_white_space, indent,
92 t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
94 format (s, "%U%U\n", format_white_space, indent,
95 format_generic_header_offset, &t->gho);
97 format (s, "%U%U", format_white_space, indent,
98 format_ethernet_header_with_length, t->buffer.pre_data,
99 sizeof (t->buffer.pre_data));
104 virtio_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b0,
107 virtio_tx_trace_t *t;
108 t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
109 t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
110 t->buffer_index = bi;
113 int is_ip4 = 0, is_ip6 = 0;
115 switch (((u8 *) vlib_buffer_get_current (b0))[0] & 0xf0)
126 vnet_generic_header_offset_parser (b0, &t->gho, 0, is_ip4, is_ip6);
129 vnet_generic_header_offset_parser (b0, &t->gho, 1,
131 VNET_BUFFER_F_IS_IP4,
132 b0->flags & VNET_BUFFER_F_IS_IP6);
134 clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data));
135 clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0),
136 sizeof (t->buffer.pre_data));
140 virtio_interface_drop_inline (vlib_main_t *vm, virtio_if_t *vif,
141 uword node_index, u32 *buffers, u16 n,
142 virtio_tx_func_error_t error)
144 vlib_error_count (vm, node_index, error, n);
145 vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
146 VNET_INTERFACE_COUNTER_DROP,
147 vm->thread_index, vif->sw_if_index, n);
148 vlib_buffer_free (vm, buffers, n);
152 virtio_memset_ring_u32 (u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
154 ASSERT (n_buffers <= ring_size);
156 if (PREDICT_TRUE (start + n_buffers <= ring_size))
158 clib_memset_u32 (ring + start, ~0, n_buffers);
162 clib_memset_u32 (ring + start, ~0, ring_size - start);
163 clib_memset_u32 (ring, ~0, n_buffers - (ring_size - start));
168 virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
171 u16 used = vring->desc_in_use;
172 u16 sz = vring->size;
174 u16 last = vring->last_used_idx;
175 u16 n_left = vring->used->idx - last;
176 u16 out_of_order_count = 0;
183 vring_used_elem_t *e = &vring->used->ring[last & mask];
185 slot = n_buffers = e->id;
187 while (e->id == (n_buffers & mask))
192 vring_desc_t *d = &vring->desc[e->id];
194 while (d->flags & VRING_DESC_F_NEXT)
198 d = &vring->desc[next];
202 e = &vring->used->ring[last & mask];
204 vlib_buffer_free_from_ring (vm, vring->buffers, slot,
205 sz, (n_buffers - slot));
206 virtio_memset_ring_u32 (vring->buffers, slot, sz, (n_buffers - slot));
207 used -= (n_buffers - slot);
211 vlib_buffer_free (vm, &vring->buffers[e->id], 1);
212 vring->buffers[e->id] = ~0;
216 out_of_order_count++;
217 vring->flags |= VRING_TX_OUT_OF_ORDER;
222 * Some vhost-backends give buffers back in out-of-order fashion in used ring.
223 * It impacts the overall virtio-performance.
225 if (out_of_order_count)
226 vlib_error_count (vm, node_index, VIRTIO_TX_ERROR_OUT_OF_ORDER,
229 vring->desc_in_use = used;
230 vring->last_used_idx = last;
234 virtio_free_used_device_desc_packed (vlib_main_t *vm, virtio_vring_t *vring,
237 vring_packed_desc_t *d;
238 u16 sz = vring->size;
239 u16 last = vring->last_used_idx;
240 u16 n_buffers = 0, start;
243 if (vring->desc_in_use == 0)
246 d = &vring->packed_desc[last];
250 while ((flags & VRING_DESC_F_AVAIL) == (vring->used_wrap_counter << 7) &&
251 (flags & VRING_DESC_F_USED) == (vring->used_wrap_counter << 15))
259 vring->used_wrap_counter ^= 1;
261 d = &vring->packed_desc[last];
267 vlib_buffer_free_from_ring (vm, vring->buffers, start, sz, n_buffers);
268 virtio_memset_ring_u32 (vring->buffers, start, sz, n_buffers);
269 vring->desc_in_use -= n_buffers;
270 vring->last_used_idx = last;
275 virtio_free_used_device_desc (vlib_main_t *vm, virtio_vring_t *vring,
276 uword node_index, int packed)
279 virtio_free_used_device_desc_packed (vm, vring, node_index);
281 virtio_free_used_device_desc_split (vm, vring, node_index);
286 set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
289 vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
291 if (b->flags & VNET_BUFFER_F_IS_IP4)
294 generic_header_offset_t gho = { 0 };
295 vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
297 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
298 hdr->csum_start = gho.l4_hdr_offset; // 0x22;
299 if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
301 hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
303 else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
305 hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
309 * virtio devices do not support IP4 checksum offload. So driver takes care
310 * of it while doing tx.
313 (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
314 if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
315 ip4->checksum = ip4_header_checksum (ip4);
317 else if (b->flags & VNET_BUFFER_F_IS_IP6)
319 generic_header_offset_t gho = { 0 };
320 vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
322 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
323 hdr->csum_start = gho.l4_hdr_offset; // 0x36;
324 if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
326 hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
328 else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
330 hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
336 set_gso_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr, const int is_l2)
338 vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
340 if (b->flags & VNET_BUFFER_F_IS_IP4)
343 generic_header_offset_t gho = { 0 };
344 vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
346 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
347 hdr->gso_size = vnet_buffer2 (b)->gso_size;
348 hdr->hdr_len = gho.hdr_sz;
349 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
350 hdr->csum_start = gho.l4_hdr_offset; // 0x22;
351 hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
353 (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
355 * virtio devices do not support IP4 checksum offload. So driver takes care
356 * of it while doing tx.
358 if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
359 ip4->checksum = ip4_header_checksum (ip4);
361 else if (b->flags & VNET_BUFFER_F_IS_IP6)
363 generic_header_offset_t gho = { 0 };
364 vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
366 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
367 hdr->gso_size = vnet_buffer2 (b)->gso_size;
368 hdr->hdr_len = gho.hdr_sz;
369 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
370 hdr->csum_start = gho.l4_hdr_offset; // 0x36;
371 hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
376 add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
377 virtio_if_t *vif, virtio_vring_t *vring, u32 bi,
378 u16 free_desc_count, u16 avail, u16 next, u16 mask,
379 int hdr_sz, int do_gso, int csum_offload, int is_pci,
380 int is_tun, int is_indirect, int is_any_layout)
385 d = &vring->desc[next];
386 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
387 virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
388 u32 drop_inline = ~0;
390 clib_memset_u8 (hdr, 0, hdr_sz);
392 if (b->flags & VNET_BUFFER_F_GSO)
395 set_gso_offsets (b, hdr, is_l2);
398 drop_inline = VIRTIO_TX_ERROR_GSO_PACKET_DROP;
402 else if (b->flags & VNET_BUFFER_F_OFFLOAD)
405 set_checksum_offsets (b, hdr, is_l2);
408 drop_inline = VIRTIO_TX_ERROR_CSUM_OFFLOAD_PACKET_DROP;
413 if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
415 virtio_tx_trace (vm, node, b, bi, is_tun);
418 if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
420 d->addr = ((is_pci) ? vlib_buffer_get_current_pa (vm, b) :
421 pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz;
422 d->len = b->current_length + hdr_sz;
425 else if (is_indirect)
428 * We are using single vlib_buffer_t for indirect descriptor(s)
429 * chain. Single descriptor is 16 bytes and vlib_buffer_t
430 * has 2048 bytes space. So maximum long chain can have 128
431 * (=2048/16) indirect descriptors.
432 * It can easily support 65535 bytes of Jumbo frames with
433 * each data buffer size of 512 bytes minimum.
435 u32 indirect_buffer = 0;
436 if (PREDICT_FALSE (vlib_buffer_alloc (vm, &indirect_buffer, 1) == 0))
438 drop_inline = VIRTIO_TX_ERROR_INDIRECT_DESC_ALLOC_FAILED;
442 vlib_buffer_t *indirect_desc = vlib_get_buffer (vm, indirect_buffer);
443 indirect_desc->current_data = 0;
444 indirect_desc->flags |= VLIB_BUFFER_NEXT_PRESENT;
445 indirect_desc->next_buffer = bi;
446 bi = indirect_buffer;
449 (vring_desc_t *) vlib_buffer_get_current (indirect_desc);
453 d->addr = vlib_physmem_get_pa (vm, id);
454 id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz;
457 * If VIRTIO_F_ANY_LAYOUT is not negotiated, then virtio_net_hdr
458 * should be presented in separate descriptor and data will start
459 * from next descriptor.
462 id->len = b->current_length + hdr_sz;
466 id->flags = VRING_DESC_F_NEXT;
470 id->addr = vlib_buffer_get_current_pa (vm, b);
471 id->len = b->current_length;
473 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
475 id->flags = VRING_DESC_F_NEXT;
479 b = vlib_get_buffer (vm, b->next_buffer);
480 id->addr = vlib_buffer_get_current_pa (vm, b);
481 id->len = b->current_length;
482 if (PREDICT_FALSE (count == VIRTIO_TX_MAX_CHAIN_LEN))
484 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
485 vlib_error_count (vm, node->node_index,
486 VIRTIO_TX_ERROR_TRUNC_PACKET, 1);
491 else /* VIRTIO_IF_TYPE_[TAP | TUN] */
493 d->addr = pointer_to_uword (id);
494 /* first buffer in chain */
495 id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
496 id->len = b->current_length + hdr_sz;
498 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
500 id->flags = VRING_DESC_F_NEXT;
504 b = vlib_get_buffer (vm, b->next_buffer);
505 id->addr = pointer_to_uword (vlib_buffer_get_current (b));
506 id->len = b->current_length;
507 if (PREDICT_FALSE (count == VIRTIO_TX_MAX_CHAIN_LEN))
509 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
510 vlib_error_count (vm, node->node_index,
511 VIRTIO_TX_ERROR_TRUNC_PACKET, 1);
518 d->len = count * sizeof (vring_desc_t);
519 d->flags = VRING_DESC_F_INDIRECT;
524 vlib_buffer_t *b_temp = b;
525 u16 n_buffers_in_chain = 1;
528 * Check the length of the chain for the required number of
529 * descriptors. Return from here, retry to get more descriptors,
530 * if chain length is greater than available descriptors.
532 while (b_temp->flags & VLIB_BUFFER_NEXT_PRESENT)
534 n_buffers_in_chain++;
535 b_temp = vlib_get_buffer (vm, b_temp->next_buffer);
538 if (n_buffers_in_chain > free_desc_count)
539 return n_buffers_in_chain;
541 d->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz;
542 d->len = b->current_length + hdr_sz;
544 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
546 d->flags = VRING_DESC_F_NEXT;
547 vring->buffers[count] = bi;
549 ~(VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID);
553 count = (count + 1) & mask;
555 d = &vring->desc[count];
556 b = vlib_get_buffer (vm, bi);
557 d->addr = vlib_buffer_get_current_pa (vm, b);
558 d->len = b->current_length;
561 vring->buffers[count] = bi;
562 vring->avail->ring[avail & mask] = next;
570 vring->buffers[next] = bi;
571 vring->avail->ring[avail & mask] = next;
575 if (drop_inline != ~0)
576 virtio_interface_drop_inline (vm, vif, node->node_index, &bi, 1,
583 add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
584 virtio_if_t *vif, virtio_vring_t *vring, u32 bi,
585 u16 next, int hdr_sz, int do_gso, int csum_offload,
586 int is_pci, int is_tun, int is_indirect,
589 u16 n_added = 0, flags = 0;
591 vring_packed_desc_t *d = &vring->packed_desc[next];
592 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
593 virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
594 u32 drop_inline = ~0;
596 clib_memset (hdr, 0, hdr_sz);
598 if (b->flags & VNET_BUFFER_F_GSO)
601 set_gso_offsets (b, hdr, is_l2);
604 drop_inline = VIRTIO_TX_ERROR_GSO_PACKET_DROP;
608 else if (b->flags & VNET_BUFFER_F_OFFLOAD)
611 set_checksum_offsets (b, hdr, is_l2);
614 drop_inline = VIRTIO_TX_ERROR_CSUM_OFFLOAD_PACKET_DROP;
618 if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
620 virtio_tx_trace (vm, node, b, bi, is_tun);
623 if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
626 ((is_pci) ? vlib_buffer_get_current_pa (vm,
628 pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz;
629 d->len = b->current_length + hdr_sz;
631 else if (is_indirect)
634 * We are using single vlib_buffer_t for indirect descriptor(s)
635 * chain. Single descriptor is 16 bytes and vlib_buffer_t
636 * has 2048 bytes space. So maximum long chain can have 128
637 * (=2048/16) indirect descriptors.
638 * It can easily support 65535 bytes of Jumbo frames with
639 * each data buffer size of 512 bytes minimum.
641 u32 indirect_buffer = 0;
642 if (PREDICT_FALSE (vlib_buffer_alloc (vm, &indirect_buffer, 1) == 0))
644 drop_inline = VIRTIO_TX_ERROR_INDIRECT_DESC_ALLOC_FAILED;
648 vlib_buffer_t *indirect_desc = vlib_get_buffer (vm, indirect_buffer);
649 indirect_desc->current_data = 0;
650 indirect_desc->flags |= VLIB_BUFFER_NEXT_PRESENT;
651 indirect_desc->next_buffer = bi;
652 bi = indirect_buffer;
654 vring_packed_desc_t *id =
655 (vring_packed_desc_t *) vlib_buffer_get_current (indirect_desc);
659 d->addr = vlib_physmem_get_pa (vm, id);
660 id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz;
663 * If VIRTIO_F_ANY_LAYOUT is not negotiated, then virtio_net_hdr
664 * should be presented in separate descriptor and data will start
665 * from next descriptor.
668 id->len = b->current_length + hdr_sz;
676 id->addr = vlib_buffer_get_current_pa (vm, b);
677 id->len = b->current_length;
679 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
685 b = vlib_get_buffer (vm, b->next_buffer);
686 id->addr = vlib_buffer_get_current_pa (vm, b);
687 id->len = b->current_length;
688 if (PREDICT_FALSE (count == VIRTIO_TX_MAX_CHAIN_LEN))
690 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
691 vlib_error_count (vm, node->node_index,
692 VIRTIO_TX_ERROR_TRUNC_PACKET, 1);
699 d->len = count * sizeof (vring_packed_desc_t);
700 flags = VRING_DESC_F_INDIRECT;
706 if (vring->avail_wrap_counter)
708 flags |= VRING_DESC_F_AVAIL;
709 flags &= ~VRING_DESC_F_USED;
713 flags &= ~VRING_DESC_F_AVAIL;
714 flags |= VRING_DESC_F_USED;
719 vring->buffers[next] = bi;
723 if (drop_inline != ~0)
724 virtio_interface_drop_inline (vm, vif, node->node_index, &bi, 1,
731 virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
732 vlib_node_runtime_t *node,
733 virtio_if_t *vif, virtio_if_type_t type,
734 virtio_vring_t *vring, u32 *buffers,
735 u16 n_left, const int do_gso,
736 const int csum_offload)
738 int is_pci = (type == VIRTIO_IF_TYPE_PCI);
739 int is_tun = (type == VIRTIO_IF_TYPE_TUN);
741 ((vif->features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) != 0);
743 ((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0);
744 const int hdr_sz = vif->virtio_net_hdr_sz;
745 u16 sz = vring->size;
746 u16 used, next, n_buffers = 0, n_buffers_left = 0;
747 u16 n_vectors = n_left;
750 used = vring->desc_in_use;
751 next = vring->desc_next;
753 if (vif->packet_buffering)
755 n_buffers = n_buffers_left = virtio_vring_n_buffers (vring->buffering);
757 while (n_buffers_left && used < sz)
761 u32 bi = virtio_vring_buffering_read_from_front (vring->buffering);
764 n_added = add_buffer_to_slot_packed (
765 vm, node, vif, vring, bi, next, hdr_sz, do_gso, csum_offload,
766 is_pci, is_tun, is_indirect, is_any_layout);
768 if (PREDICT_FALSE (n_added == 0))
776 vring->avail_wrap_counter ^= 1;
781 while (n_left && used < sz)
785 n_added = add_buffer_to_slot_packed (
786 vm, node, vif, vring, buffers[0], next, hdr_sz, do_gso, csum_offload,
787 is_pci, is_tun, is_indirect, is_any_layout);
790 if (PREDICT_FALSE (n_added == 0))
798 vring->avail_wrap_counter ^= 1;
802 if (n_left != n_vectors || n_buffers != n_buffers_left)
804 CLIB_MEMORY_STORE_BARRIER ();
805 vring->desc_next = next;
806 vring->desc_in_use = used;
807 CLIB_MEMORY_BARRIER ();
808 if (vring->device_event->flags != VRING_EVENT_F_DISABLE)
809 virtio_kick (vm, vring, vif);
816 virtio_find_free_desc (virtio_vring_t *vring, u16 size, u16 mask, u16 req,
817 u16 next, u32 *first_free_desc_index,
818 u16 *free_desc_count)
821 /* next is used as hint: from where to start looking */
822 for (u16 i = 0; i < size; i++, next++)
824 if (vring->buffers[next & mask] == ~0)
826 if (*first_free_desc_index == ~0)
828 *first_free_desc_index = (next & mask);
830 (*free_desc_count)++;
837 if (start + *free_desc_count == i)
839 (*free_desc_count)++;
852 virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
853 vlib_node_runtime_t *node,
854 virtio_if_t *vif, virtio_if_type_t type,
855 virtio_vring_t *vring, u32 *buffers,
856 u16 n_left, int do_gso, int csum_offload)
858 u16 used, next, avail, n_buffers = 0, n_buffers_left = 0;
859 int is_pci = (type == VIRTIO_IF_TYPE_PCI);
860 int is_tun = (type == VIRTIO_IF_TYPE_TUN);
862 ((vif->features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) != 0);
864 ((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0);
865 u16 sz = vring->size;
866 int hdr_sz = vif->virtio_net_hdr_sz;
868 u16 n_vectors = n_left;
870 used = vring->desc_in_use;
871 next = vring->desc_next;
872 avail = vring->avail->idx;
874 u16 free_desc_count = 0;
876 if (PREDICT_FALSE (vring->flags & VRING_TX_OUT_OF_ORDER))
878 u32 first_free_desc_index = ~0;
880 virtio_find_free_desc (vring, sz, mask, n_left, next,
881 &first_free_desc_index, &free_desc_count);
884 next = first_free_desc_index;
887 free_desc_count = sz - used;
889 if (vif->packet_buffering)
891 n_buffers = n_buffers_left = virtio_vring_n_buffers (vring->buffering);
893 while (n_buffers_left && free_desc_count)
897 u32 bi = virtio_vring_buffering_read_from_front (vring->buffering);
901 n_added = add_buffer_to_slot (vm, node, vif, vring, bi,
902 free_desc_count, avail, next, mask,
903 hdr_sz, do_gso, csum_offload, is_pci,
904 is_tun, is_indirect, is_any_layout);
905 if (PREDICT_FALSE (n_added == 0))
910 else if (PREDICT_FALSE (n_added > free_desc_count))
914 next = (next + n_added) & mask;
917 free_desc_count -= n_added;
921 while (n_left && free_desc_count)
926 add_buffer_to_slot (vm, node, vif, vring, buffers[0], free_desc_count,
927 avail, next, mask, hdr_sz, do_gso, csum_offload,
928 is_pci, is_tun, is_indirect, is_any_layout);
930 if (PREDICT_FALSE (n_added == 0))
936 else if (PREDICT_FALSE (n_added > free_desc_count))
940 next = (next + n_added) & mask;
944 free_desc_count -= n_added;
947 if (n_left != n_vectors || n_buffers != n_buffers_left)
949 clib_atomic_store_seq_cst (&vring->avail->idx, avail);
950 vring->desc_next = next;
951 vring->desc_in_use = used;
952 if ((clib_atomic_load_seq_cst (&vring->used->flags) &
953 VRING_USED_F_NO_NOTIFY) == 0)
954 virtio_kick (vm, vring, vif);
961 virtio_interface_tx_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
962 virtio_if_t *vif, virtio_if_type_t type,
963 virtio_vring_t *vring, u32 *buffers,
964 u16 n_left, int packed, int do_gso,
968 return virtio_interface_tx_packed_gso_inline (vm, node, vif, type, vring,
970 do_gso, csum_offload);
972 return virtio_interface_tx_split_gso_inline (vm, node, vif, type, vring,
974 do_gso, csum_offload);
978 virtio_interface_tx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
979 virtio_if_t *vif, virtio_vring_t *vring,
980 virtio_if_type_t type, u32 *buffers, u16 n_left,
983 vnet_main_t *vnm = vnet_get_main ();
984 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
986 if (hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO)
987 return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
988 buffers, n_left, packed,
990 1 /* checksum offload */ );
991 else if (hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM)
992 return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
993 buffers, n_left, packed,
995 1 /* checksum offload */ );
997 return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
998 buffers, n_left, packed,
1000 0 /* no checksum offload */ );
1003 VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm,
1004 vlib_node_runtime_t * node,
1005 vlib_frame_t * frame)
1007 virtio_main_t *nm = &virtio_main;
1008 vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
1009 virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
1010 u16 qid = vm->thread_index % vif->num_txqs;
1011 virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
1012 u16 n_left = frame->n_vectors;
1013 u32 *buffers = vlib_frame_vector_args (frame);
1014 u32 to[GRO_TO_VECTOR_SIZE (n_left)];
1015 int packed = vif->is_packed;
1016 u16 n_vectors = frame->n_vectors;
1018 clib_spinlock_lock_if_init (&vring->lockp);
1020 if (vif->packet_coalesce)
1022 n_vectors = n_left =
1023 vnet_gro_inline (vm, vring->flow_table, buffers, n_left, to);
1027 u16 retry_count = 2;
1030 /* free consumed buffers */
1031 virtio_free_used_device_desc (vm, vring, node->node_index, packed);
1033 if (vif->type == VIRTIO_IF_TYPE_TAP)
1034 n_left = virtio_interface_tx_inline (vm, node, vif, vring,
1036 &buffers[n_vectors - n_left],
1038 else if (vif->type == VIRTIO_IF_TYPE_PCI)
1039 n_left = virtio_interface_tx_inline (vm, node, vif, vring,
1041 &buffers[n_vectors - n_left],
1043 else if (vif->type == VIRTIO_IF_TYPE_TUN)
1044 n_left = virtio_interface_tx_inline (vm, node, vif, vring,
1046 &buffers[n_vectors - n_left],
1051 if (n_left && retry_count--)
1054 if (vif->packet_buffering && n_left)
1056 u16 n_buffered = virtio_vring_buffering_store_packets (vring->buffering,
1061 n_left -= n_buffered;
1064 virtio_interface_drop_inline (vm, vif, node->node_index,
1065 &buffers[n_vectors - n_left], n_left,
1066 VIRTIO_TX_ERROR_NO_FREE_SLOTS);
1068 clib_spinlock_unlock_if_init (&vring->lockp);
1070 return frame->n_vectors - n_left;
1074 virtio_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
1077 virtio_main_t *apm = &virtio_main;
1078 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
1079 virtio_if_t *vif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
1081 /* Shut off redirection */
1082 if (node_index == ~0)
1084 vif->per_interface_next_index = node_index;
1088 vif->per_interface_next_index =
1089 vlib_node_add_next (vlib_get_main (), virtio_input_node.index,
1094 virtio_clear_hw_interface_counters (u32 instance)
1096 /* Nothing for now */
1100 virtio_set_rx_interrupt (virtio_if_t *vif, virtio_vring_t *vring)
1103 vring->driver_event->flags &= ~VRING_EVENT_F_DISABLE;
1105 vring->avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
1109 virtio_set_rx_polling (virtio_if_t *vif, virtio_vring_t *vring)
1112 vring->driver_event->flags |= VRING_EVENT_F_DISABLE;
1114 vring->avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
1117 static clib_error_t *
1118 virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
1119 vnet_hw_if_rx_mode mode)
1121 vlib_main_t *vm = vnm->vlib_main;
1122 virtio_main_t *mm = &virtio_main;
1123 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
1124 virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
1125 virtio_vring_t *rx_vring = vec_elt_at_index (vif->rxq_vrings, qid);
1127 if (vif->type == VIRTIO_IF_TYPE_PCI && !(vif->support_int_mode))
1129 virtio_set_rx_polling (vif, rx_vring);
1130 return clib_error_return (0, "interrupt mode is not supported");
1133 if (mode == VNET_HW_IF_RX_MODE_POLLING)
1135 if (vif->packet_coalesce || vif->packet_buffering)
1137 if (mm->interrupt_queues_count > 0)
1138 mm->interrupt_queues_count--;
1139 if (mm->interrupt_queues_count == 0)
1140 vlib_process_signal_event (vm,
1141 virtio_send_interrupt_node.index,
1142 VIRTIO_EVENT_STOP_TIMER, 0);
1144 virtio_set_rx_polling (vif, rx_vring);
1148 if (vif->packet_coalesce || vif->packet_buffering)
1150 mm->interrupt_queues_count++;
1151 if (mm->interrupt_queues_count == 1)
1152 vlib_process_signal_event (vm,
1153 virtio_send_interrupt_node.index,
1154 VIRTIO_EVENT_START_TIMER, 0);
1156 virtio_set_rx_interrupt (vif, rx_vring);
1159 rx_vring->mode = mode;
1164 static clib_error_t *
1165 virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
1167 virtio_main_t *mm = &virtio_main;
1168 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
1169 virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
1171 if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1173 vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
1174 vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
1175 VNET_HW_INTERFACE_FLAG_LINK_UP);
1179 vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
1180 vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
1185 static clib_error_t *
1186 virtio_subif_add_del_function (vnet_main_t * vnm,
1188 struct vnet_sw_interface_t *st, int is_add)
1190 /* Nothing for now */
1195 VNET_DEVICE_CLASS (virtio_device_class) = {
1197 .format_device_name = format_virtio_device_name,
1198 .format_device = format_virtio_device,
1199 .format_tx_trace = format_virtio_tx_trace,
1200 .tx_function_n_errors = VIRTIO_TX_N_ERROR,
1201 .tx_function_error_strings = virtio_tx_func_error_strings,
1202 .rx_redirect_to_node = virtio_set_interface_next_node,
1203 .clear_counters = virtio_clear_hw_interface_counters,
1204 .admin_up_down_function = virtio_interface_admin_up_down,
1205 .subif_add_del_function = virtio_subif_add_del_function,
1206 .rx_mode_change_function = virtio_interface_rx_mode_change,
1212 * fd.io coding-style-patch-verification: ON
1215 * eval: (c-set-style "gnu")