X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=lib%2Flibrte_vhost%2Fvirtio_net.c;h=0024f729ebfa5a16caecb54c04cd78bd83f3a2fa;hb=39157ec04095ab012d11db23c462844634bfbb8f;hp=595f67c4d9d1b5a9ff612581a25a0cafc322aa61;hpb=6b3e017e5d25f15da73f7700f7f2ac553ef1a2e9;p=deb_dpdk.git diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 595f67c4..0024f729 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -44,16 +44,26 @@ #include #include #include +#include #include "vhost.h" #define MAX_PKT_BURST 32 #define VHOST_LOG_PAGE 4096 +/* + * Atomically set a bit in memory. + */ +static inline void __attribute__((always_inline)) +vhost_set_bit(unsigned int nr, volatile uint8_t *addr) +{ + __sync_fetch_and_or_8(addr, (1U << nr)); +} + static inline void __attribute__((always_inline)) vhost_log_page(uint8_t *log_base, uint64_t page) { - log_base[page / 8] |= 1 << (page % 8); + vhost_set_bit(page % 8, &log_base[page / 8]); } static inline void __attribute__((always_inline)) @@ -144,11 +154,16 @@ update_shadow_used_ring(struct vhost_virtqueue *vq, static void virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { - if (m_buf->ol_flags & PKT_TX_L4_MASK) { + uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK; + + if (m_buf->ol_flags & PKT_TX_TCP_SEG) + csum_l4 |= PKT_TX_TCP_CKSUM; + + if (csum_l4) { net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; - switch (m_buf->ol_flags & PKT_TX_L4_MASK) { + switch (csum_l4) { case PKT_TX_TCP_CKSUM: net_hdr->csum_offset = (offsetof(struct tcp_hdr, cksum)); @@ -164,6 +179,15 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } } + /* IP cksum verification cannot be bypassed, then calculate here */ + if (m_buf->ol_flags & PKT_TX_IP_CKSUM) { + struct ipv4_hdr *ipv4_hdr; + + ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct ipv4_hdr *, + m_buf->l2_len); + ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); + } + if (m_buf->ol_flags & PKT_TX_TCP_SEG) { if (m_buf->ol_flags & PKT_TX_IPV4) net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; @@ -195,6 +219,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, struct vring_desc *desc; uint64_t desc_addr; struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; + /* A counter to avoid desc dead loop chain */ + uint16_t nr_desc = 1; desc = &descs[desc_idx]; desc_addr = gpa_to_vva(dev, desc->addr); @@ -233,7 +259,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, /* Room in vring buffer is not enough */ return -1; } - if (unlikely(desc->next >= size)) + if (unlikely(desc->next >= size || ++nr_desc > size)) return -1; desc = &descs[desc->next]; @@ -288,8 +314,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, } vq = dev->virtqueue[queue_id]; + + rte_spinlock_lock(&vq->access_lock); + if (unlikely(vq->enabled == 0)) - return 0; + goto out_access_unlock; avail_idx = *((volatile uint16_t *)&vq->avail->idx); start_idx = vq->last_used_idx; @@ -297,7 +326,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, count = RTE_MIN(count, free_entries); count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); if (count == 0) - return 0; + goto out_access_unlock; LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", dev->vid, start_idx, start_idx + count); @@ -363,6 +392,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) && (vq->callfd >= 0)) eventfd_write(vq->callfd, (eventfd_t)1); + +out_access_unlock: + rte_spinlock_unlock(&vq->access_lock); + return count; } @@ -557,12 +590,15 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, } vq = dev->virtqueue[queue_id]; + + rte_spinlock_lock(&vq->access_lock); + if (unlikely(vq->enabled == 0)) - return 0; + goto out_access_unlock; count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) - return 0; + goto out_access_unlock; rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); @@ -606,6 +642,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, eventfd_write(vq->callfd, (eventfd_t)1); } +out_access_unlock: + rte_spinlock_unlock(&vq->access_lock); + return pkt_idx; } @@ -628,9 +667,11 @@ static inline bool virtio_net_with_host_offload(struct virtio_net *dev) { if (dev->features & - (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN | - VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | - VIRTIO_NET_F_HOST_UFO)) + ((1ULL << VIRTIO_NET_F_CSUM) | + (1ULL << VIRTIO_NET_F_HOST_ECN) | + (1ULL << VIRTIO_NET_F_HOST_TSO4) | + (1ULL << VIRTIO_NET_F_HOST_TSO6) | + (1ULL << VIRTIO_NET_F_HOST_UFO))) return true; return false; @@ -677,6 +718,7 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) default: m->l3_len = 0; *l4_proto = 0; + *l4_hdr = NULL; break; } } @@ -713,7 +755,7 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) } } - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (l4_hdr && hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV6: @@ -847,7 +889,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, desc->addr + desc_offset, cpy_len)))) { cur->data_len = cpy_len; cur->data_off = 0; - cur->buf_addr = (void *)(uintptr_t)desc_addr; + cur->buf_addr = (void *)(uintptr_t)(desc_addr + + desc_offset); cur->buf_physaddr = hpa; /* @@ -902,6 +945,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, "allocate memory for mbuf.\n"); return -1; } + if (unlikely(dev->dequeue_zero_copy)) + rte_mbuf_refcnt_update(cur, 1); prev->next = cur; prev->data_len = mbuf_offset; @@ -997,6 +1042,22 @@ mbuf_is_consumed(struct rte_mbuf *m) return true; } +static inline void __attribute__((always_inline)) +restore_mbuf(struct rte_mbuf *m) +{ + uint32_t mbuf_size, priv_size; + + while (m) { + priv_size = rte_pktmbuf_priv_size(m->pool); + mbuf_size = sizeof(struct rte_mbuf) + priv_size; + /* start of buffer is after mbuf structure and priv data */ + + m->buf_addr = (char *)m + mbuf_size; + m->buf_physaddr = rte_mempool_virt2phy(NULL, m) + mbuf_size; + m = m->next; + } +} + uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -1021,9 +1082,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, } vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) + + if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) return 0; + if (unlikely(vq->enabled == 0)) + goto out_access_unlock; + if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; int nr_updated = 0; @@ -1039,6 +1104,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, nr_updated += 1; TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + restore_mbuf(zmbuf->mbuf); rte_pktmbuf_free(zmbuf->mbuf); put_zmbuf(zmbuf); vq->nr_zmbuf -= 1; @@ -1053,14 +1119,26 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, * array, to looks like that guest actually send such packet. * * Check user_send_rarp() for more information. + * + * broadcast_rarp shares a cacheline in the virtio_net structure + * with some fields that are accessed during enqueue and + * rte_atomic16_cmpset() causes a write if using cmpxchg. This could + * result in false sharing between enqueue and dequeue. + * + * Prevent unnecessary false sharing by reading broadcast_rarp first + * and only performing cmpset if the read indicates it is likely to + * be set. */ - if (unlikely(rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { + + if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && + rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool); if (rarp_mbuf == NULL) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); - return 0; + goto out_access_unlock; } if (make_rarp_packet(rarp_mbuf, &dev->mac)) { @@ -1074,7 +1152,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, free_entries = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; if (free_entries == 0) - goto out; + goto out_access_unlock; LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); @@ -1167,7 +1245,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, update_used_idx(dev, vq, i); } -out: +out_access_unlock: + rte_spinlock_unlock(&vq->access_lock); + if (unlikely(rarp_mbuf != NULL)) { /* * Inject it to the head of "pkts" array, so that switch's mac