X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Fvirtio%2Fpci.c;h=65dab0ae769e0ce451ced9a0ed3c9ace47e27e08;hb=7f6d145accc6e63b150ab4efc282f19cbe996b57;hp=531a7fb26f638b7b906bb58475c75ab2e3f9c2cb;hpb=33cc5cfd04093d2da781cd3591d0da905873f865;p=vpp.git diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c index 531a7fb26f6..65dab0ae769 100644 --- a/src/vnet/devices/virtio/pci.c +++ b/src/vnet/devices/virtio/pci.c @@ -15,19 +15,11 @@ #include #include -#include -#include -#include -#include -#if defined(__x86_64__) -#include -#endif #include #include #include #include -#include #include #include #include @@ -44,9 +36,7 @@ #define PCI_MSIX_ENABLE 0x8000 -static u32 msix_enabled = 0; - -#define PCI_CONFIG_SIZE ((msix_enabled == VIRTIO_MSIX_ENABLED) ? \ +#define PCI_CONFIG_SIZE(vif) ((vif->msix_enabled == VIRTIO_MSIX_ENABLED) ? \ 24 : 20) static pci_device_id_t virtio_pci_device_ids[] = { @@ -71,17 +61,17 @@ virtio_pci_legacy_read_config (vlib_main_t * vm, virtio_if_t * vif, void *dst, if (len >= 4) { size = 4; - vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, dst); + vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst); } else if (len >= 2) { size = 2; - vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, dst); + vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst); } else { size = 1; - vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, dst); + vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst); } dst = (u8 *) dst + size; addr += size; @@ -101,17 +91,17 @@ virtio_pci_legacy_write_config (vlib_main_t * vm, virtio_if_t * vif, if (len >= 4) { size = 4; - vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, src); + vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src); } else if (len >= 2) { size = 2; - vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, src); + vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src); } else { size = 1; - vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, src); + vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src); } src = (u8 *) src + size; addr += size; @@ -120,7 +110,7 @@ virtio_pci_legacy_write_config (vlib_main_t * vm, virtio_if_t * vif, } static u64 -virtio_pci_legacy_get_features (vlib_main_t * vm, virtio_if_t * vif) +virtio_pci_legacy_get_host_features (vlib_main_t * vm, virtio_if_t * vif) { u32 features; vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES, @@ -129,8 +119,18 @@ virtio_pci_legacy_get_features (vlib_main_t * vm, virtio_if_t * vif) } static u32 -virtio_pci_legacy_set_features (vlib_main_t * vm, virtio_if_t * vif, - u64 features) +virtio_pci_legacy_get_guest_features (vlib_main_t * vm, virtio_if_t * vif) +{ + u32 feature = 0; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &feature); + vif->features = feature; + return feature; +} + +static u32 +virtio_pci_legacy_set_guest_features (vlib_main_t * vm, virtio_if_t * vif, + u64 features) { if ((features >> 32) != 0) { @@ -187,16 +187,21 @@ virtio_pci_legacy_get_queue_num (vlib_main_t * vm, virtio_if_t * vif, return queue_num; } - -static void +static int virtio_pci_legacy_setup_queue (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id, void *p) { u64 addr = vlib_physmem_get_pa (vm, p) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + u32 addr2 = 0; vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, &queue_id); vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, (u32 *) & addr); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, + &addr2); + if ((u32) addr == addr2) + return 0; + return 1; } static void @@ -259,12 +264,15 @@ virtio_pci_get_max_virtqueue_pairs (vlib_main_t * vm, virtio_if_t * vif) if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)) { virtio_pci_legacy_read_config (vm, vif, &config.max_virtqueue_pairs, - sizeof (config.max_virtqueue_pairs), 8); + sizeof (config.max_virtqueue_pairs), + STRUCT_OFFSET_OF (virtio_net_config_t, + max_virtqueue_pairs)); max_queue_pairs = config.max_virtqueue_pairs; } + virtio_log_debug (vif, "max queue pair is %x", max_queue_pairs); if (max_queue_pairs < 1 || max_queue_pairs > 0x8000) - clib_error_return (error, "max queue pair is %x", max_queue_pairs); + return clib_error_return (error, "max queue pair is %x", max_queue_pairs); vif->max_queue_pairs = max_queue_pairs; return error; @@ -280,7 +288,7 @@ virtio_pci_set_mac (vlib_main_t * vm, virtio_if_t * vif) static u32 virtio_pci_get_mac (vlib_main_t * vm, virtio_if_t * vif) { - if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC)) + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC)) { virtio_pci_legacy_read_config (vm, vif, vif->mac_addr, sizeof (vif->mac_addr), 0); @@ -296,9 +304,10 @@ virtio_pci_is_link_up (vlib_main_t * vm, virtio_if_t * vif) * Minimal driver: assumes link is up */ u16 status = 1; - if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)) + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)) virtio_pci_legacy_read_config (vm, vif, &status, sizeof (status), /* mac */ - 6); + STRUCT_OFFSET_OF (virtio_net_config_t, + status)); return status; } @@ -415,6 +424,19 @@ debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif) vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &data_u8); vlib_cli_output (vm, "isr 0x%x", data_u8); + if (vif->msix_enabled == VIRTIO_MSIX_ENABLED) + { + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, + &data_u16); + vlib_cli_output (vm, "config vector 0x%x", data_u16); + u16 queue_id = 0; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, + &data_u16); + vlib_cli_output (vm, "queue vector for queue (0) 0x%x", data_u16); + } + u8 mac[6]; virtio_pci_legacy_read_config (vm, vif, mac, sizeof (mac), 0); vlib_cli_output (vm, "mac %U", format_ethernet_address, mac); @@ -428,7 +450,7 @@ debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif) 10); vlib_cli_output (vm, "mtu 0x%x", data_u16); - u32 i = PCI_CONFIG_SIZE + 12, a = 4; + u32 i = PCI_CONFIG_SIZE (vif) + 12, a = 4; i += a; i &= ~a; for (; i < 64; i += 4) @@ -439,6 +461,249 @@ debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif) } } +struct virtio_ctrl_msg +{ + struct virtio_net_ctrl_hdr ctrl; + virtio_net_ctrl_ack status; + u8 data[1024]; +}; + +static int +virtio_pci_send_ctrl_msg (vlib_main_t * vm, virtio_if_t * vif, + struct virtio_ctrl_msg *data, u32 len) +{ + virtio_vring_t *vring = vif->cxq_vring; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct virtio_ctrl_msg result; + u32 buffer_index; + vlib_buffer_t *b; + u16 used, next, avail; + u16 sz = vring->size; + u16 mask = sz - 1; + + used = vring->desc_in_use; + next = vring->desc_next; + avail = vring->avail->idx; + struct vring_desc *d = &vring->desc[next]; + + if (vlib_buffer_alloc (vm, &buffer_index, 1)) + b = vlib_get_buffer (vm, buffer_index); + else + return VIRTIO_NET_ERR; + /* + * current_data may not be initialized with 0 and may contain + * previous offset. + */ + b->current_data = 0; + clib_memcpy (vlib_buffer_get_current (b), data, + sizeof (struct virtio_ctrl_msg)); + d->flags = VRING_DESC_F_NEXT; + d->addr = vlib_buffer_get_current_pa (vm, b); + d->len = sizeof (struct virtio_net_ctrl_hdr); + vring->avail->ring[avail & mask] = next; + avail++; + next = (next + 1) & mask; + d->next = next; + used++; + + d = &vring->desc[next]; + d->flags = VRING_DESC_F_NEXT; + d->addr = vlib_buffer_get_current_pa (vm, b) + + STRUCT_OFFSET_OF (struct virtio_ctrl_msg, data); + d->len = len; + next = (next + 1) & mask; + d->next = next; + used++; + + d = &vring->desc[next]; + d->flags = VRING_DESC_F_WRITE; + d->addr = vlib_buffer_get_current_pa (vm, b) + + STRUCT_OFFSET_OF (struct virtio_ctrl_msg, status); + d->len = sizeof (data->status); + next = (next + 1) & mask; + used++; + + CLIB_MEMORY_STORE_BARRIER (); + vring->avail->idx = avail; + vring->desc_next = next; + vring->desc_in_use = used; + + if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) + { + virtio_kick (vm, vring, vif); + } + + u16 last = vring->last_used_idx, n_left = 0; + n_left = vring->used->idx - last; + + while (n_left) + { + struct vring_used_elem *e = &vring->used->ring[last & mask]; + u16 slot = e->id; + + d = &vring->desc[slot]; + while (d->flags & VRING_DESC_F_NEXT) + { + used--; + slot = d->next; + d = &vring->desc[slot]; + } + used--; + last++; + n_left--; + } + vring->desc_in_use = used; + vring->last_used_idx = last; + + CLIB_MEMORY_BARRIER (); + clib_memcpy (&result, vlib_buffer_get_current (b), + sizeof (struct virtio_ctrl_msg)); + virtio_log_debug (vif, "ctrl-queue: status %u", result.status); + status = result.status; + vlib_buffer_free (vm, &buffer_index, 1); + return status; +} + +static int +virtio_pci_disable_offload (vlib_main_t * vm, virtio_if_t * vif) +{ + struct virtio_ctrl_msg offload_hdr; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + + offload_hdr.ctrl.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; + offload_hdr.ctrl.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; + offload_hdr.status = VIRTIO_NET_ERR; + u64 offloads = 0ULL; + clib_memcpy (offload_hdr.data, &offloads, sizeof (offloads)); + + status = + virtio_pci_send_ctrl_msg (vm, vif, &offload_hdr, sizeof (offloads)); + virtio_log_debug (vif, "disable offloads"); + vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); + virtio_pci_legacy_get_guest_features (vm, vif); + return status; +} + +static int +virtio_pci_enable_checksum_offload (vlib_main_t * vm, virtio_if_t * vif) +{ + struct virtio_ctrl_msg csum_offload_hdr; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + + csum_offload_hdr.ctrl.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; + csum_offload_hdr.ctrl.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; + csum_offload_hdr.status = VIRTIO_NET_ERR; + u64 offloads = 0ULL; + offloads |= VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM); + clib_memcpy (csum_offload_hdr.data, &offloads, sizeof (offloads)); + + status = + virtio_pci_send_ctrl_msg (vm, vif, &csum_offload_hdr, sizeof (offloads)); + virtio_log_debug (vif, "enable checksum offload"); + vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); + virtio_pci_legacy_get_guest_features (vm, vif); + return status; +} + +static int +virtio_pci_enable_gso (vlib_main_t * vm, virtio_if_t * vif) +{ + struct virtio_ctrl_msg gso_hdr; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + + gso_hdr.ctrl.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; + gso_hdr.ctrl.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; + gso_hdr.status = VIRTIO_NET_ERR; + u64 offloads = VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) + | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) + | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6); + clib_memcpy (gso_hdr.data, &offloads, sizeof (offloads)); + + status = virtio_pci_send_ctrl_msg (vm, vif, &gso_hdr, sizeof (offloads)); + virtio_log_debug (vif, "enable gso"); + vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); + virtio_pci_legacy_get_guest_features (vm, vif); + return status; +} + +static int +virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled, + int csum_offload_enabled) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); + + if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) && + (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))) + { + if (gso_enabled + && (vif->features & (VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | + VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6)))) + { + if (virtio_pci_enable_gso (vm, vif)) + { + virtio_log_warning (vif, "gso is not enabled"); + } + else + { + vif->gso_enabled = 1; + vif->csum_offload_enabled = 0; + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | + VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + } + } + else if (csum_offload_enabled + && (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))) + { + if (virtio_pci_enable_checksum_offload (vm, vif)) + { + virtio_log_warning (vif, "checksum offload is not enabled"); + } + else + { + vif->csum_offload_enabled = 1; + vif->gso_enabled = 0; + hw->flags &= ~VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO; + hw->flags |= + VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + } + } + else + { + if (virtio_pci_disable_offload (vm, vif)) + { + virtio_log_warning (vif, "offloads are not disabled"); + } + else + { + vif->csum_offload_enabled = 0; + vif->gso_enabled = 0; + hw->flags &= ~(VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | + VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD); + } + } + } + + return 0; +} + +static int +virtio_pci_enable_multiqueue (vlib_main_t * vm, virtio_if_t * vif, + u16 num_queues) +{ + struct virtio_ctrl_msg mq_hdr; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + + mq_hdr.ctrl.class = VIRTIO_NET_CTRL_MQ; + mq_hdr.ctrl.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; + mq_hdr.status = VIRTIO_NET_ERR; + clib_memcpy (mq_hdr.data, &num_queues, sizeof (num_queues)); + + status = virtio_pci_send_ctrl_msg (vm, vif, &mq_hdr, sizeof (num_queues)); + virtio_log_debug (vif, "multi-queue enable %u queues", num_queues); + return status; +} + static u8 virtio_pci_queue_size_valid (u16 qsz) { @@ -450,16 +715,17 @@ virtio_pci_queue_size_valid (u16 qsz) } clib_error_t * -virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx) +virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_num) { clib_error_t *error = 0; u16 queue_size = 0; virtio_vring_t *vring; struct vring vr; u32 i = 0; - void *ptr; + void *ptr = NULL; - queue_size = virtio_pci_legacy_get_queue_num (vm, vif, idx); + queue_size = virtio_pci_legacy_get_queue_num (vm, vif, queue_num); if (!virtio_pci_queue_size_valid (queue_size)) clib_warning ("queue size is not valid"); @@ -472,45 +738,103 @@ virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx) if (queue_size == 0) queue_size = 256; - vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES); - vring = vec_elt_at_index (vif->vrings, idx); + vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->cxq_vring, 0); + i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN); + i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN); + ptr = + vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN, + vif->numa_node); + if (!ptr) + return vlib_physmem_last_error (vm); + clib_memset (ptr, 0, i); + vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN); + vring->desc = vr.desc; + vring->avail = vr.avail; + vring->used = vr.used; + vring->queue_id = queue_num; + vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; + + ASSERT (vring->buffers == 0); + vring->size = queue_size; + virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num, + queue_size); + virtio_pci_legacy_setup_queue (vm, vif, queue_num, ptr); + vring->kick_fd = -1; + + return error; +} + +clib_error_t * +virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) +{ + clib_error_t *error = 0; + u16 queue_size = 0; + virtio_vring_t *vring; + struct vring vr; + u32 i = 0; + void *ptr = NULL; + + queue_size = virtio_pci_legacy_get_queue_num (vm, vif, queue_num); + if (!virtio_pci_queue_size_valid (queue_size)) + clib_warning ("queue size is not valid"); + + if (!is_pow2 (queue_size)) + return clib_error_return (0, "ring size must be power of 2"); + + if (queue_size > 32768) + return clib_error_return (0, "ring size must be 32768 or lower"); + + if (queue_size == 0) + queue_size = 256; + + if (queue_num % 2) + { + vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num), + CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num)); + clib_spinlock_init (&vring->lockp); + } + else + { + vec_validate_aligned (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num), + CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num)); + } i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN); i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN); - ptr = vlib_physmem_alloc_aligned (vm, i, VIRTIO_PCI_VRING_ALIGN); - memset (ptr, 0, i); + ptr = + vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN, + vif->numa_node); + if (!ptr) + return vlib_physmem_last_error (vm); + clib_memset (ptr, 0, i); vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN); vring->desc = vr.desc; vring->avail = vr.avail; vring->used = vr.used; - vring->queue_id = idx; + vring->queue_id = queue_num; vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; ASSERT (vring->buffers == 0); vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES); - ASSERT (vring->indirect_buffers == 0); - vec_validate_aligned (vring->indirect_buffers, queue_size, - CLIB_CACHE_LINE_BYTES); - if (idx % 2) + if (queue_num % 2) { - u32 n_alloc = 0; - do - { - if (n_alloc < queue_size) - n_alloc = - vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc, - queue_size - n_alloc); - } - while (n_alloc != queue_size); - vif->tx_ring_sz = queue_size; + virtio_log_debug (vif, "tx-queue: number %u, size %u", queue_num, + queue_size); + clib_memset_u32 (vring->buffers, ~0, queue_size); } else - vif->rx_ring_sz = queue_size; + { + virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num, + queue_size); + } vring->size = queue_size; + if (virtio_pci_legacy_setup_queue (vm, vif, queue_num, ptr)) + return clib_error_return (0, "error in queue address setup"); - virtio_pci_legacy_setup_queue (vm, vif, idx, ptr); vring->kick_fd = -1; - return error; } @@ -522,10 +846,23 @@ virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, * if features are not requested * default: all supported features */ - u64 supported_features = VIRTIO_FEATURE (VIRTIO_NET_F_MTU) + u64 supported_features = VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) + | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) + | VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) + | VIRTIO_FEATURE (VIRTIO_NET_F_MTU) | VIRTIO_FEATURE (VIRTIO_NET_F_MAC) + | VIRTIO_FEATURE (VIRTIO_NET_F_GSO) + | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) + | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6) + | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) + | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) + | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6) + | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) | VIRTIO_FEATURE (VIRTIO_NET_F_STATUS) + | VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) + | VIRTIO_FEATURE (VIRTIO_NET_F_MQ) + | VIRTIO_FEATURE (VIRTIO_F_NOTIFY_ON_EMPTY) | VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) | VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); @@ -536,23 +873,25 @@ virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, vif->features = req_features & vif->remote_features & supported_features; - if (vif-> - remote_features & vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU)) + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU)) { virtio_net_config_t config; virtio_pci_legacy_read_config (vm, vif, &config.mtu, - sizeof (config.mtu), 10); + sizeof (config.mtu), + STRUCT_OFFSET_OF (virtio_net_config_t, + mtu)); if (config.mtu < 64) vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MTU); } - vif->features = virtio_pci_legacy_set_features (vm, vif, vif->features); + vif->features = + virtio_pci_legacy_set_guest_features (vm, vif, vif->features); } void virtio_pci_read_device_feature (vlib_main_t * vm, virtio_if_t * vif) { - vif->remote_features = virtio_pci_legacy_get_features (vm, vif); + vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); } int @@ -592,25 +931,31 @@ clib_error_t * virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) { clib_error_t *error = 0; - virtio_main_t *vim = &virtio_main; struct virtio_pci_cap cap; - u8 pos, common_cfg = 0, notify_base = 0, dev_cfg = 0, isr = 0; + u8 pos, common_cfg = 0, notify_base = 0, dev_cfg = 0, isr = 0, pci_cfg = 0; vlib_pci_dev_handle_t h = vif->pci_dev_handle; if ((error = vlib_pci_read_config_u8 (vm, h, PCI_CAPABILITY_LIST, &pos))) - clib_error_return (error, "error in reading capabilty list position"); - + { + virtio_log_error (vif, "error in reading capabilty list position"); + clib_error_return (error, "error in reading capabilty list position"); + } while (pos) { if ((error = vlib_pci_read_write_config (vm, h, VLIB_READ, pos, &cap, sizeof (cap)))) - clib_error_return (error, "error in reading the capability at [%2x]", - pos); + { + virtio_log_error (vif, "%s [%2x]", + "error in reading the capability at", pos); + clib_error_return (error, + "error in reading the capability at [%2x]", pos); + } if (cap.cap_vndr == PCI_CAP_ID_MSIX) { - u16 flags; + u16 flags, table_size, table_size_mask = 0x07FF; + if ((error = vlib_pci_read_write_config (vm, h, VLIB_READ, pos + 2, &flags, sizeof (flags)))) @@ -618,19 +963,32 @@ virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) "error in reading the capability at [%2x]", pos + 2); + table_size = flags & table_size_mask; + virtio_log_debug (vif, "flags:0x%x %s 0x%x", flags, + "msix interrupt vector table-size", table_size); + if (flags & PCI_MSIX_ENABLE) - msix_enabled = VIRTIO_MSIX_ENABLED; + { + virtio_log_debug (vif, "msix interrupt enabled"); + vif->msix_enabled = VIRTIO_MSIX_ENABLED; + } else - msix_enabled = VIRTIO_MSIX_DISABLED; + { + virtio_log_debug (vif, "msix interrupt disabled"); + vif->msix_enabled = VIRTIO_MSIX_DISABLED; + } } if (cap.cap_vndr != PCI_CAP_ID_VNDR) { - virtio_log_debug (vim, vif, "[%2x] %s %2x ", pos, + virtio_log_debug (vif, "[%2x] %s %2x ", pos, "skipping non VNDR cap id:", cap.cap_vndr); goto next; } + virtio_log_debug (vif, + "[%4x] cfg type: %u, bar: %u, offset: %04x, len: %u", + pos, cap.cfg_type, cap.bar, cap.offset, cap.length); switch (cap.cfg_type) { case VIRTIO_PCI_CAP_COMMON_CFG: @@ -645,6 +1003,10 @@ virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) case VIRTIO_PCI_CAP_ISR_CFG: isr = 1; break; + case VIRTIO_PCI_CAP_PCI_CFG: + if (cap.bar == 0) + pci_cfg = 1; + break; } next: pos = cap.cap_next; @@ -652,11 +1014,14 @@ virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) if (common_cfg == 0 || notify_base == 0 || dev_cfg == 0 || isr == 0) { - virtio_log_debug (vim, vif, "legacy virtio pci device found"); + virtio_log_debug (vif, "legacy virtio pci device found"); return error; } - virtio_log_debug (vim, vif, "modern virtio pci device found"); + if (!pci_cfg) + clib_error_return (error, "modern virtio pci device found"); + + virtio_log_debug (vif, "transitional virtio pci device found"); return error; } @@ -665,13 +1030,17 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, virtio_pci_create_if_args_t * args) { clib_error_t *error = 0; + vlib_thread_main_t *vtm = vlib_get_thread_main (); u8 status = 0; - virtio_pci_read_caps (vm, vif); + if ((error = virtio_pci_read_caps (vm, vif))) + clib_error_return (error, "Device is not supported"); if (virtio_pci_reset_device (vm, vif) < 0) - clib_error_return (error, "Failed to reset the device"); - + { + virtio_log_error (vif, "Failed to reset the device"); + clib_error_return (error, "Failed to reset the device"); + } /* * read device features and negotiate (user) requested features */ @@ -684,10 +1053,16 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_FEATURES_OK); status = virtio_pci_legacy_get_status (vm, vif); if (!(status & VIRTIO_CONFIG_STATUS_FEATURES_OK)) - clib_error_return (error, "Device doesn't support requested features"); - + { + virtio_log_error (vif, + "error encountered: Device doesn't support requested features"); + clib_error_return (error, "Device doesn't support requested features"); + } vif->status = status; + /* + * get or set the mac address + */ if (virtio_pci_get_mac (vm, vif)) { f64 now = vlib_time_now (vm); @@ -703,23 +1078,87 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, virtio_set_net_hdr_size (vif); + /* + * Initialize the virtqueues + */ if ((error = virtio_pci_get_max_virtqueue_pairs (vm, vif))) - goto error; + goto err; - if ((error = virtio_pci_vring_init (vm, vif, 0))) - goto error; + for (int i = 0; i < vif->max_queue_pairs; i++) + { + if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i)))) + { + virtio_log_warning (vif, "%s (%u) %s", "error in rxq-queue", + RX_QUEUE (i), "initialization"); + } + else + { + vif->num_rxqs++; + } - if ((error = virtio_pci_vring_init (vm, vif, 1))) - goto error; + if (i >= vtm->n_vlib_mains) + { + /* + * There is 1:1 mapping between tx queue and vpp worker thread. + * tx queue 0 is bind with thread index 0, tx queue 1 on thread + * index 1 and so on. + * Multiple worker threads can poll same tx queue when number of + * workers are more than tx queues. In this case, 1:N mapping + * between tx queue and vpp worker thread. + */ + virtio_log_debug (vif, "%s %u, %s", "tx-queue: number", + TX_QUEUE (i), + "no VPP worker thread is available"); + continue; + } - if (msix_enabled == VIRTIO_MSIX_ENABLED) + if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i)))) + { + virtio_log_warning (vif, "%s (%u) %s", "error in txq-queue", + TX_QUEUE (i), "initialization"); + } + else + { + vif->num_txqs++; + } + } + + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) { - virtio_pci_legacy_set_config_irq (vm, vif, VIRTIO_MSI_NO_VECTOR); - virtio_pci_legacy_set_queue_irq (vm, vif, VIRTIO_MSI_NO_VECTOR, 0); + if ((error = + virtio_pci_control_vring_init (vm, vif, vif->max_queue_pairs * 2))) + { + virtio_log_warning (vif, "%s (%u) %s", "error in control-queue", + vif->max_queue_pairs * 2, "initialization"); + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)) + vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MQ); + } } + else + { + virtio_log_debug (vif, "control queue is not available"); + vif->cxq_vring = NULL; + } + + /* + * set the msix interrupts + */ + if (vif->msix_enabled == VIRTIO_MSIX_ENABLED) + { + if (virtio_pci_legacy_set_config_irq (vm, vif, 1) == + VIRTIO_MSI_NO_VECTOR) + virtio_log_warning (vif, "config vector 1 is not set"); + if (virtio_pci_legacy_set_queue_irq (vm, vif, 0, 0) == + VIRTIO_MSI_NO_VECTOR) + virtio_log_warning (vif, "queue vector 0 is not set"); + } + + /* + * set the driver status OK + */ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK); vif->status = virtio_pci_legacy_get_status (vm, vif); -error: +err: return error; } @@ -732,25 +1171,6 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) vlib_pci_dev_handle_t h; clib_error_t *error = 0; - if (args->rxq_size == 0) - args->rxq_size = VIRTIO_NUM_RX_DESC; - if (args->txq_size == 0) - args->txq_size = VIRTIO_NUM_TX_DESC; - - if (!virtio_pci_queue_size_valid (args->rxq_size) || - !virtio_pci_queue_size_valid (args->txq_size)) - { - args->rv = VNET_API_ERROR_INVALID_VALUE; - args->error = - clib_error_return (error, - "queue size must be <= 4096, >= 64, " - "and multiples of 64"); - vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", - format_vlib_pci_addr, &args->addr, - "queue size must be <= 4096, >= 64, and multiples of 64"); - return; - } - /* *INDENT-OFF* */ pool_foreach (vif, vim->interfaces, ({ if (vif->pci_addr.as_u32 == args->addr) @@ -771,18 +1191,10 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) vif->per_interface_next_index = ~0; vif->pci_addr.as_u32 = args->addr; - if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0) - { - args->rv = VNET_API_ERROR_SYSCALL_ERROR_1; - args->error = clib_error_return_unix (0, "open '/dev/vhost-net'"); - goto error; - } - if ((error = vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, virtio_pci_device_ids, &h))) { - pool_put (vim->interfaces, vif); args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = clib_error_return (error, "pci-addr %U", format_vlib_pci_addr, @@ -790,67 +1202,88 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", format_vlib_pci_addr, &vif->pci_addr, "error encountered on pci device open"); + pool_put (vim->interfaces, vif); return; } vif->pci_dev_handle = h; vlib_pci_set_private_data (vm, h, vif->dev_instance); + vif->numa_node = vlib_pci_get_numa_node (vm, h); + vif->type = VIRTIO_IF_TYPE_PCI; if ((error = vlib_pci_bus_master_enable (vm, h))) { - virtio_log_error (vim, vif, - "error encountered on pci bus master enable"); + virtio_log_error (vif, "error encountered on pci bus master enable"); goto error; } if ((error = vlib_pci_io_region (vm, h, 0))) { - virtio_log_error (vim, vif, "error encountered on pci io region"); + virtio_log_error (vif, "error encountered on pci io region"); goto error; } - if ((error = virtio_pci_device_init (vm, vif, args))) - { - virtio_log_error (vim, vif, "error encountered on device init"); - goto error; - } - - if (msix_enabled == VIRTIO_MSIX_ENABLED) + if (vlib_pci_get_num_msix_interrupts (vm, h) > 1) { if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1, &virtio_pci_irq_0_handler))) { - virtio_log_error (vim, vif, + virtio_log_error (vif, "error encountered on pci register msix handler 0"); goto error; } if ((error = vlib_pci_register_msix_handler (vm, h, 1, 1, &virtio_pci_irq_1_handler))) { - virtio_log_error (vim, vif, + virtio_log_error (vif, "error encountered on pci register msix handler 1"); goto error; } if ((error = vlib_pci_enable_msix_irq (vm, h, 0, 2))) { - virtio_log_error (vim, vif, - "error encountered on pci enable msix irq"); + virtio_log_error (vif, "error encountered on pci enable msix irq"); goto error; } + vif->support_int_mode = 1; + virtio_log_debug (vif, "device supports msix interrupts"); + } + else if (vlib_pci_get_num_msix_interrupts (vm, h) == 1) + { + /* + * if msix table-size is 1, fall back to intX. + */ + if ((error = + vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler))) + { + virtio_log_error (vif, + "error encountered on pci register interrupt handler"); + goto error; + } + vif->support_int_mode = 1; + virtio_log_debug (vif, "pci register interrupt handler"); } else { - vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler); + /* + * WARN: intX is showing some weird behaviour. + * Please don't use interrupt mode with UIO driver. + */ + vif->support_int_mode = 0; + virtio_log_debug (vif, "driver is configured in poll mode only"); } if ((error = vlib_pci_intr_enable (vm, h))) { - virtio_log_error (vim, vif, - "error encountered on pci interrupt enable"); + virtio_log_error (vif, "error encountered on pci interrupt enable"); + goto error; + } + + if ((error = virtio_pci_device_init (vm, vif, args))) + { + virtio_log_error (vif, "error encountered on device init"); goto error; } - vif->type = VIRTIO_IF_TYPE_PCI; /* create interface */ error = ethernet_register_interface (vnm, virtio_device_class.index, vif->dev_instance, vif->mac_addr, @@ -859,7 +1292,7 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) if (error) { - virtio_log_error (vim, vif, + virtio_log_error (vif, "error encountered on ethernet register interface"); goto error; } @@ -872,8 +1305,15 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, virtio_input_node.index); - vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0); - + u32 i = 0; + vec_foreach_index (i, vif->rxq_vrings) + { + vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0); + virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i)); + /* Set default rx mode to POLLING */ + vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i, + VNET_HW_INTERFACE_RX_MODE_POLLING); + } if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) { vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; @@ -882,6 +1322,16 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) } else vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + + virtio_pci_offloads (vm, vif, args->gso_enabled, + args->checksum_offload_enabled); + + if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) && + (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ))) + { + if (virtio_pci_enable_multiqueue (vm, vif, vif->max_queue_pairs)) + virtio_log_warning (vif, "multiqueue is not set"); + } return; error: @@ -902,47 +1352,74 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) vlib_pci_intr_disable (vm, vif->pci_dev_handle); - virtio_pci_legacy_del_queue (vm, vif, 0); - virtio_pci_legacy_del_queue (vm, vif, 1); + for (i = 0; i < vif->max_queue_pairs; i++) + { + virtio_pci_legacy_del_queue (vm, vif, RX_QUEUE (i)); + virtio_pci_legacy_del_queue (vm, vif, TX_QUEUE (i)); + } + + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) + virtio_pci_legacy_del_queue (vm, vif, vif->max_queue_pairs * 2); virtio_pci_legacy_reset (vm, vif); if (vif->hw_if_index) { vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); - vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0); + vec_foreach_index (i, vif->rxq_vrings) + { + vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i); + } ethernet_delete_interface (vnm, vif->hw_if_index); } vlib_pci_device_close (vm, vif->pci_dev_handle); - vec_foreach_index (i, vif->vrings) + vec_foreach_index (i, vif->rxq_vrings) { - virtio_vring_t *vring = vec_elt_at_index (vif->vrings, i); + virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i); if (vring->kick_fd != -1) close (vring->kick_fd); if (vring->used) { - if ((i & 1) == 1) - virtio_free_used_desc (vm, vring); - else - virtio_free_rx_buffers (vm, vring); + virtio_free_rx_buffers (vm, vring); } - if (vring->queue_id % 2) + vec_free (vring->buffers); + vlib_physmem_free (vm, vring->desc); + } + + vec_foreach_index (i, vif->txq_vrings) + { + virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i); + if (vring->kick_fd != -1) + close (vring->kick_fd); + if (vring->used) { - vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size); + virtio_free_used_desc (vm, vring); } vec_free (vring->buffers); - vec_free (vring->indirect_buffers); + clib_spinlock_free (&vring->lockp); vlib_physmem_free (vm, vring->desc); } - vec_free (vif->vrings); + if (vif->cxq_vring != NULL) + { + u16 last = vif->cxq_vring->last_used_idx; + u16 n_left = vif->cxq_vring->used->idx - last; + while (n_left) + { + last++; + n_left--; + } + + vif->cxq_vring->last_used_idx = last; + vlib_physmem_free (vm, vif->cxq_vring->desc); + } + + vec_free (vif->rxq_vrings); + vec_free (vif->txq_vrings); + vec_free (vif->cxq_vring); - if (vif->fd != -1) - close (vif->fd); - if (vif->tap_fd != -1) - vif->tap_fd = -1; clib_error_free (vif->error); memset (vif, 0, sizeof (*vif)); pool_put (vim->interfaces, vif); @@ -950,6 +1427,25 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) return 0; } +int +virtio_pci_enable_disable_offloads (vlib_main_t * vm, virtio_if_t * vif, + int gso_enabled, + int checksum_offload_enabled, + int offloads_disabled) +{ + if (vif->type != VIRTIO_IF_TYPE_PCI) + return VNET_API_ERROR_INVALID_INTERFACE; + + if (gso_enabled) + virtio_pci_offloads (vm, vif, 1, 0); + else if (checksum_offload_enabled) + virtio_pci_offloads (vm, vif, 0, 1); + else if (offloads_disabled) + virtio_pci_offloads (vm, vif, 0, 0); + + return 0; +} + /* * fd.io coding-style-patch-verification: ON *