From b977d3f7c28caee8359098be569b55e770c5c6ec Mon Sep 17 00:00:00 2001 From: Mohsin Kazmi Date: Mon, 16 Nov 2020 16:49:30 +0100 Subject: [PATCH] virtio: virtio: implement packed queues from virtio 1.1 Type: feature Change-Id: I12703371541298efa029903d6762b1cd1f7322ca Signed-off-by: Mohsin Kazmi --- src/vnet/devices/virtio/FEATURE.yaml | 1 + src/vnet/devices/virtio/cli.c | 6 +- src/vnet/devices/virtio/device.c | 343 +++++++++++++++++++++++++-- src/vnet/devices/virtio/node.c | 216 +++++++++++++++-- src/vnet/devices/virtio/pci.c | 354 +++++++++++++++++++++++++++- src/vnet/devices/virtio/virtio.c | 184 ++++++++++----- src/vnet/devices/virtio/virtio.h | 43 +++- src/vnet/devices/virtio/virtio_pci_modern.c | 25 +- 8 files changed, 1037 insertions(+), 135 deletions(-) diff --git a/src/vnet/devices/virtio/FEATURE.yaml b/src/vnet/devices/virtio/FEATURE.yaml index ac7133bae7f..7b2fb59e1ad 100644 --- a/src/vnet/devices/virtio/FEATURE.yaml +++ b/src/vnet/devices/virtio/FEATURE.yaml @@ -7,6 +7,7 @@ features: - Device mode to emulate vhost-user interface presented to VPP from the guest VM. - Support virtio 1.0 in virtio + - Support virtio 1.1 packed ring in virtio [experimental] - Support multi-queue, GSO, checksum offload, indirect descriptor, jumbo frame, and packed ring. - Support virtio 1.1 packed ring in vhost diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c index 8a9c97be8cb..05f9eab18cf 100644 --- a/src/vnet/devices/virtio/cli.c +++ b/src/vnet/devices/virtio/cli.c @@ -49,10 +49,12 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.checksum_offload_enabled = 1; else if (unformat (line_input, "buffering")) { - args.virtio_flags = VIRTIO_FLAG_BUFFERING; + args.virtio_flags |= VIRTIO_FLAG_BUFFERING; if (unformat (line_input, "size %u", &buffering_size)) args.buffering_size = buffering_size; } + else if (unformat (line_input, "packed")) + args.virtio_flags |= VIRTIO_FLAG_PACKED; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -69,7 +71,7 @@ VLIB_CLI_COMMAND (virtio_pci_create_command, static) = { .path = "create interface virtio", .short_help = "create interface virtio " "[feature-mask ] [gso-enabled] [csum-enabled] " - "[buffering [size ]]", + "[buffering [size ]] [packed]", .function = virtio_pci_create_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c index add82b77634..7e57952905b 100644 --- a/src/vnet/devices/virtio/device.c +++ b/src/vnet/devices/virtio/device.c @@ -161,8 +161,8 @@ virtio_memset_ring_u32 (u32 * ring, u32 start, u32 ring_size, u32 n_buffers) } static_always_inline void -virtio_free_used_device_desc (vlib_main_t * vm, virtio_vring_t * vring, - uword node_index) +virtio_free_used_device_desc_split (vlib_main_t * vm, virtio_vring_t * vring, + uword node_index) { u16 used = vring->desc_in_use; u16 sz = vring->size; @@ -227,7 +227,60 @@ virtio_free_used_device_desc (vlib_main_t * vm, virtio_vring_t * vring, } static_always_inline void -set_checksum_offsets (vlib_buffer_t * b, virtio_net_hdr_v1_t * hdr, int is_l2) +virtio_free_used_device_desc_packed (vlib_main_t * vm, virtio_vring_t * vring, + uword node_index) +{ + vring_packed_desc_t *d; + u16 sz = vring->size; + u16 last = vring->last_used_idx; + u16 n_buffers = 0, start; + u16 flags; + + if (vring->desc_in_use == 0) + return; + + d = &vring->packed_desc[last]; + flags = d->flags; + start = d->id; + + while ((flags & VRING_DESC_F_AVAIL) == (vring->used_wrap_counter << 7) && + (flags & VRING_DESC_F_USED) == (vring->used_wrap_counter << 15)) + { + last++; + n_buffers++; + + if (last >= sz) + { + last = 0; + vring->used_wrap_counter ^= 1; + } + d = &vring->packed_desc[last]; + flags = d->flags; + } + + if (n_buffers) + { + vlib_buffer_free_from_ring (vm, vring->buffers, start, sz, n_buffers); + virtio_memset_ring_u32 (vring->buffers, start, sz, n_buffers); + vring->desc_in_use -= n_buffers; + vring->last_used_idx = last; + } +} + +static_always_inline void +virtio_free_used_device_desc (vlib_main_t * vm, virtio_vring_t * vring, + uword node_index, int packed) +{ + if (packed) + virtio_free_used_device_desc_packed (vm, vring, node_index); + else + virtio_free_used_device_desc_split (vm, vring, node_index); + +} + +static_always_inline void +set_checksum_offsets (vlib_buffer_t * b, virtio_net_hdr_v1_t * hdr, + const int is_l2) { if (b->flags & VNET_BUFFER_F_IS_IP4) { @@ -274,7 +327,8 @@ set_checksum_offsets (vlib_buffer_t * b, virtio_net_hdr_v1_t * hdr, int is_l2) } static_always_inline void -set_gso_offsets (vlib_buffer_t * b, virtio_net_hdr_v1_t * hdr, int is_l2) +set_gso_offsets (vlib_buffer_t * b, virtio_net_hdr_v1_t * hdr, + const int is_l2) { if (b->flags & VNET_BUFFER_F_IS_IP4) { @@ -504,6 +558,238 @@ done: return n_added; } +static_always_inline u16 +add_buffer_to_slot_packed (vlib_main_t * vm, vlib_node_runtime_t * node, + virtio_vring_t * vring, u32 bi, u16 next, + int hdr_sz, int do_gso, int csum_offload, + int is_pci, int is_tun, int is_indirect, + int is_any_layout) +{ + u16 n_added = 0, flags = 0; + int is_l2 = !is_tun; + vring_packed_desc_t *d = &vring->packed_desc[next]; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz; + u32 drop_inline = ~0; + + clib_memset (hdr, 0, hdr_sz); + + if (b->flags & VNET_BUFFER_F_GSO) + { + if (do_gso) + set_gso_offsets (b, hdr, is_l2); + else + { + drop_inline = VIRTIO_TX_ERROR_GSO_PACKET_DROP; + goto done; + } + } + else if (b->flags & (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM | + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) + { + if (csum_offload) + set_checksum_offsets (b, hdr, is_l2); + else + { + drop_inline = VIRTIO_TX_ERROR_CSUM_OFFLOAD_PACKET_DROP; + goto done; + } + } + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + { + virtio_tx_trace (vm, node, b, bi, is_tun); + } + + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) + { + d->addr = + ((is_pci) ? vlib_buffer_get_current_pa (vm, + b) : + pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz; + d->len = b->current_length + hdr_sz; + } + else if (is_indirect) + { + /* + * We are using single vlib_buffer_t for indirect descriptor(s) + * chain. Single descriptor is 16 bytes and vlib_buffer_t + * has 2048 bytes space. So maximum long chain can have 128 + * (=2048/16) indirect descriptors. + * It can easily support 65535 bytes of Jumbo frames with + * each data buffer size of 512 bytes minimum. + */ + u32 indirect_buffer = 0; + if (PREDICT_FALSE (vlib_buffer_alloc (vm, &indirect_buffer, 1) == 0)) + { + drop_inline = VIRTIO_TX_ERROR_INDIRECT_DESC_ALLOC_FAILED; + goto done; + } + + vlib_buffer_t *indirect_desc = vlib_get_buffer (vm, indirect_buffer); + indirect_desc->current_data = 0; + indirect_desc->flags |= VLIB_BUFFER_NEXT_PRESENT; + indirect_desc->next_buffer = bi; + bi = indirect_buffer; + + vring_packed_desc_t *id = + (vring_packed_desc_t *) vlib_buffer_get_current (indirect_desc); + u32 count = 1; + if (is_pci) + { + d->addr = vlib_physmem_get_pa (vm, id); + id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz; + + /* + * If VIRTIO_F_ANY_LAYOUT is not negotiated, then virtio_net_hdr + * should be presented in separate descriptor and data will start + * from next descriptor. + */ + if (is_any_layout) + id->len = b->current_length + hdr_sz; + else + { + id->len = hdr_sz; + id->flags = 0; + id->id = 0; + count++; + id++; + id->addr = vlib_buffer_get_current_pa (vm, b); + id->len = b->current_length; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + id->flags = 0; + id->id = 0; + count++; + id++; + b = vlib_get_buffer (vm, b->next_buffer); + id->addr = vlib_buffer_get_current_pa (vm, b); + id->len = b->current_length; + } + } + id->flags = 0; + id->id = 0; + d->len = count * sizeof (vring_packed_desc_t); + flags = VRING_DESC_F_INDIRECT; + } + else + { + ASSERT (0); + } + if (vring->avail_wrap_counter) + { + flags |= VRING_DESC_F_AVAIL; + flags &= ~VRING_DESC_F_USED; + } + else + { + flags &= ~VRING_DESC_F_AVAIL; + flags |= VRING_DESC_F_USED; + } + + d->id = next; + d->flags = flags; + vring->buffers[next] = bi; + n_added++; + +done: + if (drop_inline != ~0) + virtio_interface_drop_inline (vm, node->node_index, &bi, 1, drop_inline); + + return n_added; +} + +static_always_inline uword +virtio_interface_tx_packed_gso_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + virtio_if_t * vif, + virtio_if_type_t type, + virtio_vring_t * vring, + u32 * buffers, u16 n_left, + const int do_gso, + const int csum_offload) +{ + int is_pci = (type == VIRTIO_IF_TYPE_PCI); + int is_tun = (type == VIRTIO_IF_TYPE_TUN); + int is_indirect = + ((vif->features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) != 0); + int is_any_layout = + ((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0); + const int hdr_sz = vif->virtio_net_hdr_sz; + u16 sz = vring->size; + u16 used, next, n_buffers = 0, n_buffers_left = 0; + u16 n_vectors = n_left; + + + used = vring->desc_in_use; + next = vring->desc_next; + + if (vif->packet_buffering) + { + n_buffers = n_buffers_left = virtio_vring_n_buffers (vring->buffering); + + while (n_buffers_left && used < sz) + { + u16 n_added = 0; + + u32 bi = virtio_vring_buffering_read_from_front (vring->buffering); + if (bi == ~0) + break; + n_added = add_buffer_to_slot_packed (vm, node, + vring, bi, next, + hdr_sz, do_gso, csum_offload, + is_pci, is_tun, is_indirect, + is_any_layout); + n_buffers_left--; + if (PREDICT_FALSE (n_added == 0)) + continue; + + used++; + next++; + if (next >= sz) + { + next = 0; + vring->avail_wrap_counter ^= 1; + } + } + } + + while (n_left && used < sz) + { + u16 n_added = 0; + + n_added = add_buffer_to_slot_packed (vm, node, + vring, buffers[0], next, + hdr_sz, do_gso, csum_offload, + is_pci, is_tun, is_indirect, + is_any_layout); + buffers++; + n_left--; + if (PREDICT_FALSE (n_added == 0)) + continue; + + used++; + next++; + if (next >= sz) + { + next = 0; + vring->avail_wrap_counter ^= 1; + } + } + + if (n_left != n_vectors || n_buffers != n_buffers_left) + { + CLIB_MEMORY_STORE_BARRIER (); + vring->desc_next = next; + vring->desc_in_use = used; + CLIB_MEMORY_BARRIER (); + if (vring->device_event->flags != VRING_EVENT_F_DISABLE) + virtio_kick (vm, vring, vif); + } + + return n_left; +} + static_always_inline void virtio_find_free_desc (virtio_vring_t * vring, u16 size, u16 mask, u16 req, u16 next, u32 * first_free_desc_index, @@ -541,11 +827,13 @@ virtio_find_free_desc (virtio_vring_t * vring, u16 size, u16 mask, } static_always_inline u16 -virtio_interface_tx_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - virtio_if_t * vif, - virtio_if_type_t type, virtio_vring_t * vring, - u32 * buffers, u16 n_left, int do_gso, - int csum_offload) +virtio_interface_tx_split_gso_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + virtio_if_t * vif, + virtio_if_type_t type, + virtio_vring_t * vring, u32 * buffers, + u16 n_left, int do_gso, + int csum_offload) { u16 used, next, avail, n_buffers = 0, n_buffers_left = 0; int is_pci = (type == VIRTIO_IF_TYPE_PCI); @@ -649,27 +937,47 @@ virtio_interface_tx_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, return n_left; } +static_always_inline u16 +virtio_interface_tx_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + virtio_if_t * vif, + virtio_if_type_t type, virtio_vring_t * vring, + u32 * buffers, u16 n_left, int packed, + int do_gso, int csum_offload) +{ + if (packed) + return virtio_interface_tx_packed_gso_inline (vm, node, vif, type, vring, + buffers, n_left, + 1 /* do_gso */ , + 1 /* checksum offload */ ); + else + return virtio_interface_tx_split_gso_inline (vm, node, vif, type, vring, + buffers, n_left, + 1 /* do_gso */ , + 1 /* checksum offload */ ); +} + static_always_inline u16 virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, virtio_if_t * vif, virtio_vring_t * vring, virtio_if_type_t type, - u32 * buffers, u16 n_left) + u32 * buffers, u16 n_left, int packed) { vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) return virtio_interface_tx_gso_inline (vm, node, vif, type, vring, - buffers, n_left, 1 /* do_gso */ , + buffers, n_left, packed, + 1 /* do_gso */ , 1 /* checksum offload */ ); else if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD) return virtio_interface_tx_gso_inline (vm, node, vif, type, vring, - buffers, n_left, + buffers, n_left, packed, 0 /* no do_gso */ , 1 /* checksum offload */ ); else return virtio_interface_tx_gso_inline (vm, node, vif, type, vring, - buffers, n_left, + buffers, n_left, packed, 0 /* no do_gso */ , 0 /* no checksum offload */ ); } @@ -686,6 +994,7 @@ VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm, u16 n_left = frame->n_vectors; u32 *buffers = vlib_frame_vector_args (frame); u32 to[GRO_TO_VECTOR_SIZE (n_left)]; + int packed = vif->is_packed; clib_spinlock_lock_if_init (&vring->lockp); @@ -703,23 +1012,23 @@ VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm, retry: /* free consumed buffers */ - virtio_free_used_device_desc (vm, vring, node->node_index); + virtio_free_used_device_desc (vm, vring, node->node_index, packed); if (vif->type == VIRTIO_IF_TYPE_TAP) n_left = virtio_interface_tx_inline (vm, node, vif, vring, VIRTIO_IF_TYPE_TAP, &buffers[frame->n_vectors - n_left], - n_left); + n_left, packed); else if (vif->type == VIRTIO_IF_TYPE_PCI) n_left = virtio_interface_tx_inline (vm, node, vif, vring, VIRTIO_IF_TYPE_PCI, &buffers[frame->n_vectors - n_left], - n_left); + n_left, packed); else if (vif->type == VIRTIO_IF_TYPE_TUN) n_left = virtio_interface_tx_inline (vm, node, vif, vring, VIRTIO_IF_TYPE_TUN, &buffers[frame->n_vectors - n_left], - n_left); + n_left, packed); else ASSERT (0); diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 91e788e4d28..a45b52c2eab 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -81,9 +81,9 @@ format_virtio_input_trace (u8 * s, va_list * args) } static_always_inline void -virtio_refill_vring (vlib_main_t * vm, virtio_if_t * vif, - virtio_if_type_t type, virtio_vring_t * vring, - const int hdr_sz, u32 node_index) +virtio_refill_vring_split (vlib_main_t * vm, virtio_if_t * vif, + virtio_if_type_t type, virtio_vring_t * vring, + const int hdr_sz, u32 node_index) { u16 used, next, avail, n_slots, n_refill; u16 sz = vring->size; @@ -148,6 +148,88 @@ more: goto more; } +static_always_inline void +virtio_refill_vring_packed (vlib_main_t * vm, virtio_if_t * vif, + virtio_if_type_t type, virtio_vring_t * vring, + const int hdr_sz, u32 node_index) +{ + u16 used, next, n_slots, n_refill, flags = 0, first_desc_flags; + u16 sz = vring->size; + +more: + used = vring->desc_in_use; + + if (sz == used) + return; + + /* deliver free buffers in chunks of 64 */ + n_refill = clib_min (sz - used, 64); + + next = vring->desc_next; + first_desc_flags = vring->packed_desc[next].flags; + n_slots = + vlib_buffer_alloc_to_ring_from_pool (vm, vring->buffers, next, + sz, n_refill, + vring->buffer_pool_index); + + if (PREDICT_FALSE (n_slots != n_refill)) + { + vlib_error_count (vm, node_index, + VIRTIO_INPUT_ERROR_BUFFER_ALLOC, n_refill - n_slots); + if (n_slots == 0) + return; + } + + while (n_slots) + { + vring_packed_desc_t *d = &vring->packed_desc[next]; + vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]); + /* + * current_data may not be initialized with 0 and may contain + * previous offset. Here we want to make sure, it should be 0 + * initialized. + */ + b->current_data = -hdr_sz; + memset (vlib_buffer_get_current (b), 0, hdr_sz); + d->addr = + ((type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm, + b) : + pointer_to_uword (vlib_buffer_get_current (b))); + d->len = vlib_buffer_get_default_data_size (vm) + hdr_sz; + + if (vring->avail_wrap_counter) + flags = (VRING_DESC_F_AVAIL | VRING_DESC_F_WRITE); + else + flags = (VRING_DESC_F_USED | VRING_DESC_F_WRITE); + + d->id = next; + if (vring->desc_next == next) + first_desc_flags = flags; + else + d->flags = flags; + + next++; + if (next >= sz) + { + next = 0; + vring->avail_wrap_counter ^= 1; + } + n_slots--; + used++; + } + CLIB_MEMORY_STORE_BARRIER (); + vring->packed_desc[vring->desc_next].flags = first_desc_flags; + vring->desc_next = next; + vring->desc_in_use = used; + CLIB_MEMORY_BARRIER (); + if (vring->device_event->flags != VRING_EVENT_F_DISABLE) + { + virtio_kick (vm, vring, vif); + } + + goto more; +} + static_always_inline void virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr, u8 * l4_proto, u8 * l4_hdr_sz, virtio_if_type_t type) @@ -258,11 +340,51 @@ fill_gso_buffer_flags (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr, } } +static_always_inline u16 +virtio_n_left_to_process (virtio_vring_t * vring, const int packed) +{ + if (packed) + return vring->desc_in_use; + else + return vring->used->idx - vring->last_used_idx; +} + +static_always_inline u16 +virtio_get_slot_id (virtio_vring_t * vring, const int packed, u16 last, + u16 mask) +{ + if (packed) + return vring->packed_desc[last].id; + else + return vring->used->ring[last & mask].id; +} + +static_always_inline u16 +virtio_get_len (virtio_vring_t * vring, const int packed, const int hdr_sz, + u16 last, u16 mask) +{ + if (packed) + return vring->packed_desc[last].len - hdr_sz; + else + return vring->used->ring[last & mask].len - hdr_sz; +} + +#define increment_last(last, packed, vring) \ + do { \ + last++; \ + if (packed && last >= vring->size) \ + { \ + last = 0; \ + vring->used_wrap_counter ^= 1; \ + } \ + } while (0) + static_always_inline uword virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, virtio_if_t * vif, virtio_vring_t * vring, virtio_if_type_t type, - int gso_enabled, int checksum_offload_enabled) + int gso_enabled, int checksum_offload_enabled, + int packed) { vnet_main_t *vnm = vnet_get_main (); u32 thread_index = vm->thread_index; @@ -274,7 +396,7 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_rx_bytes = 0; u16 mask = vring->size - 1; u16 last = vring->last_used_idx; - u16 n_left = vring->used->idx - last; + u16 n_left = virtio_n_left_to_process (vring, packed); vlib_buffer_t bt; if (n_left == 0) @@ -303,12 +425,24 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left && n_left_to_next) { + if (packed) + { + vring_packed_desc_t *d = &vring->packed_desc[last]; + u16 flags = d->flags; + if ((flags & VRING_DESC_F_AVAIL) != + (vring->used_wrap_counter << 7) + || (flags & VRING_DESC_F_USED) != + (vring->used_wrap_counter << 15)) + { + n_left = 0; + break; + } + } u8 l4_proto = 0, l4_hdr_sz = 0; u16 num_buffers = 1; - vring_used_elem_t *e = &vring->used->ring[last & mask]; virtio_net_hdr_v1_t *hdr; - u16 slot = e->id; - u16 len = e->len - hdr_sz; + u16 slot = virtio_get_slot_id (vring, packed, last, mask); + u16 len = virtio_get_len (vring, packed, hdr_sz, last, mask); u32 bi0 = vring->buffers[slot]; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); hdr = vlib_buffer_get_current (b0); @@ -336,20 +470,22 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b0->total_length_not_including_first_buffer = 0; while (num_buffers > 1) { - last++; - e = &vring->used->ring[last & mask]; - u32 cbi = vring->buffers[e->id]; + increment_last (last, packed, vring); + u16 cslot = virtio_get_slot_id (vring, packed, last, mask); + u16 clen = + virtio_get_len (vring, packed, hdr_sz, last, mask); + u32 cbi = vring->buffers[cslot]; cb = vlib_get_buffer (vm, cbi); /* current buffer */ - cb->current_length = e->len; + cb->current_length = clen; /* previous buffer */ pb->next_buffer = cbi; pb->flags |= VLIB_BUFFER_NEXT_PRESENT; /* first buffer */ - b0->total_length_not_including_first_buffer += e->len; + b0->total_length_not_including_first_buffer += clen; pb = cb; vring->desc_in_use--; @@ -406,7 +542,7 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node, to_next += 1; n_left_to_next--; n_left--; - last++; + increment_last (last, packed, vring); /* only tun interfaces may have different next index */ if (type == VIRTIO_IF_TYPE_TUN) @@ -451,21 +587,49 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, clib_spinlock_unlock_if_init (&txq_vring->lockp); } - if ((vring->used->flags & VRING_USED_F_NO_NOTIFY) == 0 && - vring->last_kick_avail_idx != vring->avail->idx) - virtio_kick (vm, vring, vif); + if (vif->is_packed) + { + if (vring->device_event->flags != VRING_EVENT_F_DISABLE) + virtio_kick (vm, vring, vif); + + if (vif->gso_enabled) + rv = + virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, + 1, 1, 1); + else if (vif->csum_offload_enabled) + rv = + virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, + 0, 1, 1); + else + rv = + virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, + 0, 0, 1); - if (vif->gso_enabled) - rv = virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, - 1, 1); - else if (vif->csum_offload_enabled) - rv = virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, - 0, 1); + virtio_refill_vring_packed (vm, vif, type, vring, hdr_sz, + node->node_index); + } else - rv = virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, - 0, 0); + { + if ((vring->used->flags & VRING_USED_F_NO_NOTIFY) == 0 && + vring->last_kick_avail_idx != vring->avail->idx) + virtio_kick (vm, vring, vif); + + if (vif->gso_enabled) + rv = + virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, + 1, 1, 0); + else if (vif->csum_offload_enabled) + rv = + virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, + 0, 1, 0); + else + rv = + virtio_device_input_gso_inline (vm, node, frame, vif, vring, type, + 0, 0, 0); - virtio_refill_vring (vm, vif, type, vring, hdr_sz, node->node_index); + virtio_refill_vring_split (vm, vif, type, vring, hdr_sz, + node->node_index); + } return rv; } diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c index 177f764c08a..785ca4839c5 100644 --- a/src/vnet/devices/virtio/pci.c +++ b/src/vnet/devices/virtio/pci.c @@ -195,8 +195,148 @@ device_status (vlib_main_t * vm, virtio_if_t * vif) } static int -virtio_pci_send_ctrl_msg (vlib_main_t * vm, virtio_if_t * vif, - virtio_ctrl_msg_t * data, u32 len) +virtio_pci_send_ctrl_msg_packed (vlib_main_t * vm, virtio_if_t * vif, + virtio_ctrl_msg_t * data, u32 len) +{ + virtio_vring_t *vring = vif->cxq_vring; + virtio_net_ctrl_ack_t status = VIRTIO_NET_ERR; + virtio_ctrl_msg_t result; + u32 buffer_index; + vlib_buffer_t *b; + u16 used, next; + u16 sz = vring->size; + u16 flags = 0, first_desc_flags = 0; + + used = vring->desc_in_use; + next = vring->desc_next; + vring_packed_desc_t *d = &vring->packed_desc[next]; + + if (vlib_buffer_alloc (vm, &buffer_index, 1)) + b = vlib_get_buffer (vm, buffer_index); + else + return VIRTIO_NET_ERR; + /* + * current_data may not be initialized with 0 and may contain + * previous offset. + */ + b->current_data = 0; + clib_memcpy (vlib_buffer_get_current (b), data, sizeof (virtio_ctrl_msg_t)); + + first_desc_flags = VRING_DESC_F_NEXT; + if (vring->avail_wrap_counter) + { + first_desc_flags |= VRING_DESC_F_AVAIL; + first_desc_flags &= ~VRING_DESC_F_USED; + } + else + { + first_desc_flags &= ~VRING_DESC_F_AVAIL; + first_desc_flags |= VRING_DESC_F_USED; + } + d->addr = vlib_buffer_get_current_pa (vm, b); + d->len = sizeof (virtio_net_ctrl_hdr_t); + d->id = next; + + next++; + if (next >= sz) + { + next = 0; + vring->avail_wrap_counter ^= 1; + } + used++; + + d = &vring->packed_desc[next]; + flags = VRING_DESC_F_NEXT; + if (vring->avail_wrap_counter) + { + flags |= VRING_DESC_F_AVAIL; + flags &= ~VRING_DESC_F_USED; + } + else + { + flags &= ~VRING_DESC_F_AVAIL; + flags |= VRING_DESC_F_USED; + } + d->addr = vlib_buffer_get_current_pa (vm, b) + + STRUCT_OFFSET_OF (virtio_ctrl_msg_t, data); + d->len = len; + d->id = next; + d->flags = flags; + + next++; + if (next >= sz) + { + next = 0; + vring->avail_wrap_counter ^= 1; + } + used++; + + d = &vring->packed_desc[next]; + flags = VRING_DESC_F_WRITE; + if (vring->avail_wrap_counter) + { + flags |= VRING_DESC_F_AVAIL; + flags &= ~VRING_DESC_F_USED; + } + else + { + flags &= ~VRING_DESC_F_AVAIL; + flags |= VRING_DESC_F_USED; + } + d->addr = vlib_buffer_get_current_pa (vm, b) + + STRUCT_OFFSET_OF (virtio_ctrl_msg_t, status); + d->len = sizeof (data->status); + d->id = next; + d->flags = flags; + + next++; + if (next >= sz) + { + next = 0; + vring->avail_wrap_counter ^= 1; + } + used++; + + CLIB_MEMORY_STORE_BARRIER (); + vring->packed_desc[vring->desc_next].flags = first_desc_flags; + vring->desc_next = next; + vring->desc_in_use = used; + CLIB_MEMORY_BARRIER (); + if (vring->device_event->flags != VRING_EVENT_F_DISABLE) + { + virtio_kick (vm, vring, vif); + } + + u16 last = vring->last_used_idx; + d = &vring->packed_desc[last]; + do + { + flags = d->flags; + } + while ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) + || (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15)); + + last += 3; + if (last >= vring->size) + { + last = last - vring->size; + vring->used_wrap_counter ^= 1; + } + vring->desc_in_use -= 3; + vring->last_used_idx = last; + + CLIB_MEMORY_BARRIER (); + clib_memcpy (&result, vlib_buffer_get_current (b), + sizeof (virtio_ctrl_msg_t)); + virtio_log_debug (vif, "ctrl-queue: status %u", result.status); + status = result.status; + vlib_buffer_free (vm, &buffer_index, 1); + return status; +} + +static int +virtio_pci_send_ctrl_msg_split (vlib_main_t * vm, virtio_if_t * vif, + virtio_ctrl_msg_t * data, u32 len) { virtio_vring_t *vring = vif->cxq_vring; virtio_net_ctrl_ack_t status = VIRTIO_NET_ERR; @@ -289,6 +429,16 @@ virtio_pci_send_ctrl_msg (vlib_main_t * vm, virtio_if_t * vif, return status; } +static int +virtio_pci_send_ctrl_msg (vlib_main_t * vm, virtio_if_t * vif, + virtio_ctrl_msg_t * data, u32 len) +{ + if (vif->is_packed) + return virtio_pci_send_ctrl_msg_packed (vm, vif, data, len); + else + return virtio_pci_send_ctrl_msg_split (vm, vif, data, len); +} + static int virtio_pci_disable_offload (vlib_main_t * vm, virtio_if_t * vif) { @@ -440,8 +590,73 @@ virtio_pci_queue_size_valid (u16 qsz) } clib_error_t * -virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif, - u16 queue_num) +virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_num) +{ + clib_error_t *error = 0; + u16 queue_size = 0; + virtio_vring_t *vring; + u32 i = 0; + void *ptr = NULL; + + queue_size = vif->virtio_pci_func->get_queue_size (vm, vif, queue_num); + + if (queue_size > 32768) + return clib_error_return (0, "ring size must be 32768 or lower"); + + if (queue_size == 0) + queue_size = 256; + + vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->cxq_vring, 0); + + i = + (((queue_size * sizeof (vring_packed_desc_t)) + + sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN - + 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)) + sizeof (vring_desc_event_t); + + ptr = + vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN, + vif->numa_node); + if (!ptr) + return vlib_physmem_last_error (vm); + clib_memset (ptr, 0, i); + + vring->packed_desc = ptr; + + vring->driver_event = ptr + (queue_size * sizeof (vring_packed_desc_t)); + vring->driver_event->off_wrap = 0; + vring->driver_event->flags = VRING_EVENT_F_DISABLE; + + vring->device_event = + ptr + + (((queue_size * sizeof (vring_packed_desc_t)) + + sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN - + 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)); + vring->device_event->off_wrap = 0; + vring->device_event->flags = 0; + + vring->queue_id = queue_num; + vring->size = queue_size; + vring->avail_wrap_counter = 1; + vring->used_wrap_counter = 1; + + ASSERT (vring->buffers == 0); + + virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num, + queue_size); + vif->virtio_pci_func->setup_queue (vm, vif, queue_num, (void *) vring); + vring->queue_notify_offset = + vif->notify_off_multiplier * + vif->virtio_pci_func->get_queue_notify_off (vm, vif, queue_num); + virtio_log_debug (vif, "queue-notify-offset: number %u, offset %u", + queue_num, vring->queue_notify_offset); + return error; +} + +clib_error_t * +virtio_pci_control_vring_split_init (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_num) { clib_error_t *error = 0; u16 queue_size = 0; @@ -496,7 +711,18 @@ virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif, } clib_error_t * -virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) +virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_num) +{ + if (vif->is_packed) + return virtio_pci_control_vring_packed_init (vm, vif, queue_num); + else + return virtio_pci_control_vring_split_init (vm, vif, queue_num); +} + +clib_error_t * +virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_num) { vlib_thread_main_t *vtm = vlib_get_thread_main (); clib_error_t *error = 0; @@ -574,6 +800,106 @@ virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) return error; } +clib_error_t * +virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_num) +{ + vlib_thread_main_t *vtm = vlib_get_thread_main (); + clib_error_t *error = 0; + u16 queue_size = 0; + virtio_vring_t *vring; + u32 i = 0; + void *ptr = NULL; + + queue_size = vif->virtio_pci_func->get_queue_size (vm, vif, queue_num); + + if (queue_size > 32768) + return clib_error_return (0, "ring size must be 32768 or lower"); + + if (queue_size == 0) + queue_size = 256; + + if (queue_num % 2) + { + vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num), + CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num)); + if (vif->max_queue_pairs < vtm->n_vlib_mains) + clib_spinlock_init (&vring->lockp); + } + else + { + vec_validate_aligned (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num), + CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num)); + } + + i = + (((queue_size * sizeof (vring_packed_desc_t)) + + sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN - + 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)) + sizeof (vring_desc_event_t); + + ptr = + vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN, + vif->numa_node); + if (!ptr) + return vlib_physmem_last_error (vm); + + clib_memset (ptr, 0, i); + vring->packed_desc = ptr; + + vring->driver_event = ptr + (queue_size * sizeof (vring_packed_desc_t)); + vring->driver_event->off_wrap = 0; + vring->driver_event->flags = VRING_EVENT_F_DISABLE; + + vring->device_event = + ptr + + (((queue_size * sizeof (vring_packed_desc_t)) + + sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN - + 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)); + vring->device_event->off_wrap = 0; + vring->device_event->flags = 0; + + vring->queue_id = queue_num; + + vring->avail_wrap_counter = 1; + vring->used_wrap_counter = 1; + + ASSERT (vring->buffers == 0); + vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES); + if (queue_num % 2) + { + virtio_log_debug (vif, "tx-queue: number %u, size %u", queue_num, + queue_size); + clib_memset_u32 (vring->buffers, ~0, queue_size); + } + else + { + virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num, + queue_size); + } + vring->size = queue_size; + if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, (void *) vring)) + return clib_error_return (0, "error in queue address setup"); + + vring->queue_notify_offset = + vif->notify_off_multiplier * + vif->virtio_pci_func->get_queue_notify_off (vm, vif, queue_num); + virtio_log_debug (vif, "queue-notify-offset: number %u, offset %u", + queue_num, vring->queue_notify_offset); + + return error; +} + +clib_error_t * +virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) +{ + if (vif->is_packed) + return virtio_pci_vring_packed_init (vm, vif, queue_num); + else + return virtio_pci_vring_split_init (vm, vif, queue_num); +} + static void virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, u64 req_features) @@ -605,6 +931,13 @@ virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, if (vif->is_modern) supported_features |= VIRTIO_FEATURE (VIRTIO_F_VERSION_1); + if (vif->is_packed) + { + supported_features |= + (VIRTIO_FEATURE (VIRTIO_F_RING_PACKED) | + VIRTIO_FEATURE (VIRTIO_F_IN_ORDER)); + } + if (req_features == 0) { req_features = supported_features; @@ -621,6 +954,9 @@ virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MTU); } + if ((vif->features & (VIRTIO_FEATURE (VIRTIO_F_RING_PACKED))) == 0) + vif->is_packed = 0; + vif->virtio_pci_func->set_driver_features (vm, vif, vif->features); vif->features = vif->virtio_pci_func->get_driver_features (vm, vif); } @@ -1034,6 +1370,8 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) vif->dev_instance = vif - vim->interfaces; vif->per_interface_next_index = ~0; vif->pci_addr.as_u32 = args->addr; + if (args->virtio_flags & VIRTIO_FLAG_PACKED) + vif->is_packed = 1; if ((error = vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, @@ -1260,7 +1598,7 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i); if (vring->used) { - virtio_free_rx_buffers (vm, vring); + virtio_free_buffers (vm, vring); } vec_free (vring->buffers); vlib_physmem_free (vm, vring->desc); @@ -1271,9 +1609,11 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i); if (vring->used) { - virtio_free_used_desc (vm, vring); + virtio_free_buffers (vm, vring); } vec_free (vring->buffers); + gro_flow_table_free (vring->flow_table); + virtio_vring_buffering_free (vm, vring->buffering); clib_spinlock_free (&vring->lockp); vlib_physmem_free (vm, vring->desc); } diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 925ad092b91..99f581a1f1b 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -134,7 +134,7 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) } inline void -virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring) +virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring) { u16 used = vring->desc_in_use; u16 last = vring->last_used_idx; @@ -159,7 +159,7 @@ virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx) close (vring->call_fd); if (vring->used) { - virtio_free_rx_buffers (vm, vring); + virtio_free_buffers (vm, vring); clib_mem_free (vring->used); } if (vring->desc) @@ -170,32 +170,6 @@ virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx) return 0; } -inline void -virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) -{ - u16 used = vring->desc_in_use; - u16 sz = vring->size; - u16 mask = sz - 1; - u16 last = vring->last_used_idx; - u16 n_left = vring->used->idx - last; - - if (n_left == 0) - return; - - while (n_left) - { - vring_used_elem_t *e = &vring->used->ring[last & mask]; - u16 slot = e->id; - - vlib_buffer_free (vm, &vring->buffers[slot], 1); - used--; - last++; - n_left--; - } - vring->desc_in_use = used; - vring->last_used_idx = last; -} - clib_error_t * virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx) { @@ -207,7 +181,7 @@ virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx) close (vring->call_fd); if (vring->used) { - virtio_free_used_desc (vm, vring); + virtio_free_buffers (vm, vring); clib_mem_free (vring->used); } if (vring->desc) @@ -408,10 +382,24 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", vring->size, vring->last_used_idx, vring->desc_next, vring->desc_in_use); - vlib_cli_output (vm, - " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", - vring->avail->flags, vring->avail->idx, - vring->used->flags, vring->used->idx); + if (vif->is_packed) + { + vlib_cli_output (vm, + " driver_event.flags 0x%x driver_event.off_wrap %d device_event.flags 0x%x device_event.off_wrap %d", + vring->driver_event->flags, + vring->driver_event->off_wrap, + vring->device_event->flags, + vring->device_event->off_wrap); + vlib_cli_output (vm, + " avail wrap counter %d, used wrap counter %d", + vring->avail_wrap_counter, + vring->used_wrap_counter); + } + else + vlib_cli_output (vm, + " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", + vring->avail->flags, vring->avail->idx, + vring->used->flags, vring->used->idx); if (type & (VIRTIO_IF_TYPE_TAP | VIRTIO_IF_TYPE_TUN)) { vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, @@ -421,17 +409,29 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) { vlib_cli_output (vm, "\n descriptor table:\n"); vlib_cli_output (vm, - " id addr len flags next user_addr\n"); + " id addr len flags next/id user_addr\n"); vlib_cli_output (vm, - " ===== ================== ===== ====== ===== ==================\n"); + " ===== ================== ===== ====== ======= ==================\n"); for (j = 0; j < vring->size; j++) { - vring_desc_t *desc = &vring->desc[j]; - vlib_cli_output (vm, - " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", - j, desc->addr, - desc->len, - desc->flags, desc->next, desc->addr); + if (vif->is_packed) + { + vring_packed_desc_t *desc = &vring->packed_desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->id, desc->addr); + } + else + { + vring_desc_t *desc = &vring->desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->next, desc->addr); + } } } } @@ -443,10 +443,24 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", vring->size, vring->last_used_idx, vring->desc_next, vring->desc_in_use); - vlib_cli_output (vm, - " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", - vring->avail->flags, vring->avail->idx, - vring->used->flags, vring->used->idx); + if (vif->is_packed) + { + vlib_cli_output (vm, + " driver_event.flags 0x%x driver_event.off_wrap %d device_event.flags 0x%x device_event.off_wrap %d", + vring->driver_event->flags, + vring->driver_event->off_wrap, + vring->device_event->flags, + vring->device_event->off_wrap); + vlib_cli_output (vm, + " avail wrap counter %d, used wrap counter %d", + vring->avail_wrap_counter, + vring->used_wrap_counter); + } + else + vlib_cli_output (vm, + " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", + vring->avail->flags, vring->avail->idx, + vring->used->flags, vring->used->idx); if (type & (VIRTIO_IF_TYPE_TAP | VIRTIO_IF_TYPE_TUN)) { vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, @@ -466,17 +480,29 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) { vlib_cli_output (vm, "\n descriptor table:\n"); vlib_cli_output (vm, - " id addr len flags next user_addr\n"); + " id addr len flags next/id user_addr\n"); vlib_cli_output (vm, - " ===== ================== ===== ====== ===== ==================\n"); + " ===== ================== ===== ====== ======== ==================\n"); for (j = 0; j < vring->size; j++) { - vring_desc_t *desc = &vring->desc[j]; - vlib_cli_output (vm, - " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", - j, desc->addr, - desc->len, - desc->flags, desc->next, desc->addr); + if (vif->is_packed) + { + vring_packed_desc_t *desc = &vring->packed_desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->id, desc->addr); + } + else + { + vring_desc_t *desc = &vring->desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->next, desc->addr); + } } } } @@ -489,10 +515,26 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", vring->size, vring->last_used_idx, vring->desc_next, vring->desc_in_use); - vlib_cli_output (vm, - " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", - vring->avail->flags, vring->avail->idx, - vring->used->flags, vring->used->idx); + if (vif->is_packed) + { + vlib_cli_output (vm, + " driver_event.flags 0x%x driver_event.off_wrap %d device_event.flags 0x%x device_event.off_wrap %d", + vring->driver_event->flags, + vring->driver_event->off_wrap, + vring->device_event->flags, + vring->device_event->off_wrap); + vlib_cli_output (vm, + " avail wrap counter %d, used wrap counter %d", + vring->avail_wrap_counter, + vring->used_wrap_counter); + } + else + { + vlib_cli_output (vm, + " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", + vring->avail->flags, vring->avail->idx, + vring->used->flags, vring->used->idx); + } if (type & (VIRTIO_IF_TYPE_TAP | VIRTIO_IF_TYPE_TUN)) { vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, @@ -502,17 +544,29 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) { vlib_cli_output (vm, "\n descriptor table:\n"); vlib_cli_output (vm, - " id addr len flags next user_addr\n"); + " id addr len flags next/id user_addr\n"); vlib_cli_output (vm, - " ===== ================== ===== ====== ===== ==================\n"); + " ===== ================== ===== ====== ======== ==================\n"); for (j = 0; j < vring->size; j++) { - vring_desc_t *desc = &vring->desc[j]; - vlib_cli_output (vm, - " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", - j, desc->addr, - desc->len, - desc->flags, desc->next, desc->addr); + if (vif->is_packed) + { + vring_packed_desc_t *desc = &vring->packed_desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->id, desc->addr); + } + else + { + vring_desc_t *desc = &vring->desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->next, desc->addr); + } } } } diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index fda72365db9..035dc9ca40d 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -64,29 +64,46 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); clib_spinlock_t lockp; - vring_desc_t *desc; - vring_used_t *used; - vring_avail_t *avail; + union + { + struct + { + vring_desc_t *desc; + vring_used_t *used; + vring_avail_t *avail; + }; + struct + { + vring_packed_desc_t *packed_desc; + vring_desc_event_t *driver_event; + vring_desc_event_t *device_event; + }; + }; + u32 *buffers; + u16 size; + u16 queue_id; u16 desc_in_use; u16 desc_next; + u16 last_used_idx; + u16 last_kick_avail_idx; union { struct { int kick_fd; int call_fd; + u32 call_file_index; + }; + struct + { + u16 avail_wrap_counter; + u16 used_wrap_counter; + u16 queue_notify_offset; }; - u16 queue_notify_offset; }; - u8 buffer_pool_index; - u16 size; - u16 queue_id; #define VRING_TX_OUT_OF_ORDER 1 u16 flags; - u32 *buffers; - u16 last_used_idx; - u16 last_kick_avail_idx; - u32 call_file_index; + u8 buffer_pool_index; vnet_hw_if_rx_mode mode; virtio_vring_buffering_t *buffering; gro_flow_table_t *flow_table; @@ -190,6 +207,7 @@ typedef struct }; }; const virtio_pci_func_t *virtio_pci_func; + int is_packed; } virtio_if_t; typedef struct @@ -214,8 +232,7 @@ clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx); void virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, u32 idx); -extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring); -extern void virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring); +extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring); extern void virtio_set_net_hdr_size (virtio_if_t * vif); extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type); diff --git a/src/vnet/devices/virtio/virtio_pci_modern.c b/src/vnet/devices/virtio/virtio_pci_modern.c index 1934f98003d..8e090ffed3a 100644 --- a/src/vnet/devices/virtio/virtio_pci_modern.c +++ b/src/vnet/devices/virtio/virtio_pci_modern.c @@ -268,24 +268,39 @@ static u8 virtio_pci_modern_setup_queue (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id, void *p) { - vring_t vr; + u64 desc, avail, used; u16 queue_size = 0; virtio_pci_modern_set_queue_select (vif, queue_id); queue_size = virtio_pci_modern_get_queue_size (vm, vif, queue_id); - vring_init (&vr, queue_size, p, VIRTIO_PCI_VRING_ALIGN); - u64 desc = vlib_physmem_get_pa (vm, vr.desc); + if (vif->is_packed) + { + virtio_vring_t *vring = (virtio_vring_t *) p; + + desc = vlib_physmem_get_pa (vm, vring->packed_desc); + avail = vlib_physmem_get_pa (vm, vring->driver_event); + used = vlib_physmem_get_pa (vm, vring->device_event); + } + else + { + vring_t vr; + + vring_init (&vr, queue_size, p, VIRTIO_PCI_VRING_ALIGN); + + desc = vlib_physmem_get_pa (vm, vr.desc); + avail = vlib_physmem_get_pa (vm, vr.avail); + used = vlib_physmem_get_pa (vm, vr.used); + } + virtio_pci_modern_set_queue_desc (vif, desc); if (desc != virtio_pci_modern_get_queue_desc (vif)) return 1; - u64 avail = vlib_physmem_get_pa (vm, vr.avail); virtio_pci_modern_set_queue_driver (vif, avail); if (avail != virtio_pci_modern_get_queue_driver (vif)) return 1; - u64 used = vlib_physmem_get_pa (vm, vr.used); virtio_pci_modern_set_queue_device (vif, used); if (used != virtio_pci_modern_get_queue_device (vif)) return 1; -- 2.16.6