New upstream version 18.08
[deb_dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93                            uint32_t *len, uint16_t num)
94 {
95         struct vring_used_elem *uep;
96         struct rte_mbuf *cookie;
97         uint16_t used_idx, desc_idx;
98         uint16_t i;
99
100         /*  Caller does the check */
101         for (i = 0; i < num ; i++) {
102                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103                 uep = &vq->vq_ring.used->ring[used_idx];
104                 desc_idx = (uint16_t) uep->id;
105                 len[i] = uep->len;
106                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107
108                 if (unlikely(cookie == NULL)) {
109                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110                                 vq->vq_used_cons_idx);
111                         break;
112                 }
113
114                 rte_prefetch0(cookie);
115                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116                 rx_pkts[i]  = cookie;
117                 vq->vq_used_cons_idx++;
118                 vq_ring_free_chain(vq, desc_idx);
119                 vq->vq_descx[desc_idx].cookie = NULL;
120         }
121
122         return i;
123 }
124
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127                         struct rte_mbuf **rx_pkts,
128                         uint32_t *len,
129                         uint16_t num)
130 {
131         struct vring_used_elem *uep;
132         struct rte_mbuf *cookie;
133         uint16_t used_idx = 0;
134         uint16_t i;
135
136         if (unlikely(num == 0))
137                 return 0;
138
139         for (i = 0; i < num; i++) {
140                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141                 /* Desc idx same as used idx */
142                 uep = &vq->vq_ring.used->ring[used_idx];
143                 len[i] = uep->len;
144                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145
146                 if (unlikely(cookie == NULL)) {
147                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148                                 vq->vq_used_cons_idx);
149                         break;
150                 }
151
152                 rte_prefetch0(cookie);
153                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154                 rx_pkts[i]  = cookie;
155                 vq->vq_used_cons_idx++;
156                 vq->vq_descx[used_idx].cookie = NULL;
157         }
158
159         vq_ring_free_inorder(vq, used_idx, i);
160         return i;
161 }
162
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171         uint16_t i, used_idx, desc_idx;
172         for (i = 0; i < num; i++) {
173                 struct vring_used_elem *uep;
174                 struct vq_desc_extra *dxp;
175
176                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177                 uep = &vq->vq_ring.used->ring[used_idx];
178
179                 desc_idx = (uint16_t) uep->id;
180                 dxp = &vq->vq_descx[desc_idx];
181                 vq->vq_used_cons_idx++;
182                 vq_ring_free_chain(vq, desc_idx);
183
184                 if (dxp->cookie != NULL) {
185                         rte_pktmbuf_free(dxp->cookie);
186                         dxp->cookie = NULL;
187                 }
188         }
189 }
190
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195         uint16_t i, used_idx, desc_idx = 0, last_idx;
196         int16_t free_cnt = 0;
197         struct vq_desc_extra *dxp = NULL;
198
199         if (unlikely(num == 0))
200                 return;
201
202         for (i = 0; i < num; i++) {
203                 struct vring_used_elem *uep;
204
205                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
206                 uep = &vq->vq_ring.used->ring[used_idx];
207                 desc_idx = (uint16_t)uep->id;
208
209                 dxp = &vq->vq_descx[desc_idx];
210                 vq->vq_used_cons_idx++;
211
212                 if (dxp->cookie != NULL) {
213                         rte_pktmbuf_free(dxp->cookie);
214                         dxp->cookie = NULL;
215                 }
216         }
217
218         last_idx = desc_idx + dxp->ndescs - 1;
219         free_cnt = last_idx - vq->vq_desc_tail_idx;
220         if (free_cnt <= 0)
221                 free_cnt += vq->vq_nentries;
222
223         vq_ring_free_inorder(vq, last_idx, free_cnt);
224 }
225
226 static inline int
227 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
228                         struct rte_mbuf **cookies,
229                         uint16_t num)
230 {
231         struct vq_desc_extra *dxp;
232         struct virtio_hw *hw = vq->hw;
233         struct vring_desc *start_dp;
234         uint16_t head_idx, idx, i = 0;
235
236         if (unlikely(vq->vq_free_cnt == 0))
237                 return -ENOSPC;
238         if (unlikely(vq->vq_free_cnt < num))
239                 return -EMSGSIZE;
240
241         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
242         start_dp = vq->vq_ring.desc;
243
244         while (i < num) {
245                 idx = head_idx & (vq->vq_nentries - 1);
246                 dxp = &vq->vq_descx[idx];
247                 dxp->cookie = (void *)cookies[i];
248                 dxp->ndescs = 1;
249
250                 start_dp[idx].addr =
251                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
252                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
253                 start_dp[idx].len =
254                                 cookies[i]->buf_len -
255                                 RTE_PKTMBUF_HEADROOM +
256                                 hw->vtnet_hdr_size;
257                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
258
259                 vq_update_avail_ring(vq, idx);
260                 head_idx++;
261                 i++;
262         }
263
264         vq->vq_desc_head_idx += num;
265         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
266         return 0;
267 }
268
269 static inline int
270 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
271 {
272         struct vq_desc_extra *dxp;
273         struct virtio_hw *hw = vq->hw;
274         struct vring_desc *start_dp;
275         uint16_t needed = 1;
276         uint16_t head_idx, idx;
277
278         if (unlikely(vq->vq_free_cnt == 0))
279                 return -ENOSPC;
280         if (unlikely(vq->vq_free_cnt < needed))
281                 return -EMSGSIZE;
282
283         head_idx = vq->vq_desc_head_idx;
284         if (unlikely(head_idx >= vq->vq_nentries))
285                 return -EFAULT;
286
287         idx = head_idx;
288         dxp = &vq->vq_descx[idx];
289         dxp->cookie = (void *)cookie;
290         dxp->ndescs = needed;
291
292         start_dp = vq->vq_ring.desc;
293         start_dp[idx].addr =
294                 VIRTIO_MBUF_ADDR(cookie, vq) +
295                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
296         start_dp[idx].len =
297                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
298         start_dp[idx].flags =  VRING_DESC_F_WRITE;
299         idx = start_dp[idx].next;
300         vq->vq_desc_head_idx = idx;
301         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
302                 vq->vq_desc_tail_idx = idx;
303         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
304         vq_update_avail_ring(vq, head_idx);
305
306         return 0;
307 }
308
309 /* When doing TSO, the IP length is not included in the pseudo header
310  * checksum of the packet given to the PMD, but for virtio it is
311  * expected.
312  */
313 static void
314 virtio_tso_fix_cksum(struct rte_mbuf *m)
315 {
316         /* common case: header is not fragmented */
317         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
318                         m->l4_len)) {
319                 struct ipv4_hdr *iph;
320                 struct ipv6_hdr *ip6h;
321                 struct tcp_hdr *th;
322                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
323                 uint32_t tmp;
324
325                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
326                 th = RTE_PTR_ADD(iph, m->l3_len);
327                 if ((iph->version_ihl >> 4) == 4) {
328                         iph->hdr_checksum = 0;
329                         iph->hdr_checksum = rte_ipv4_cksum(iph);
330                         ip_len = iph->total_length;
331                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
332                                 m->l3_len);
333                 } else {
334                         ip6h = (struct ipv6_hdr *)iph;
335                         ip_paylen = ip6h->payload_len;
336                 }
337
338                 /* calculate the new phdr checksum not including ip_paylen */
339                 prev_cksum = th->cksum;
340                 tmp = prev_cksum;
341                 tmp += ip_paylen;
342                 tmp = (tmp & 0xffff) + (tmp >> 16);
343                 new_cksum = tmp;
344
345                 /* replace it in the packet */
346                 th->cksum = new_cksum;
347         }
348 }
349
350
351 /* avoid write operation when necessary, to lessen cache issues */
352 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
353         if ((var) != (val))                     \
354                 (var) = (val);                  \
355 } while (0)
356
357 static inline void
358 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
359                         struct rte_mbuf *cookie,
360                         bool offload)
361 {
362         if (offload) {
363                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
364                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
365
366                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
367                 case PKT_TX_UDP_CKSUM:
368                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
369                         hdr->csum_offset = offsetof(struct udp_hdr,
370                                 dgram_cksum);
371                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
372                         break;
373
374                 case PKT_TX_TCP_CKSUM:
375                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
376                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
377                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
378                         break;
379
380                 default:
381                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
382                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
383                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
384                         break;
385                 }
386
387                 /* TCP Segmentation Offload */
388                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
389                         virtio_tso_fix_cksum(cookie);
390                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
391                                 VIRTIO_NET_HDR_GSO_TCPV6 :
392                                 VIRTIO_NET_HDR_GSO_TCPV4;
393                         hdr->gso_size = cookie->tso_segsz;
394                         hdr->hdr_len =
395                                 cookie->l2_len +
396                                 cookie->l3_len +
397                                 cookie->l4_len;
398                 } else {
399                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
400                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
401                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
402                 }
403         }
404 }
405
406 static inline void
407 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
408                         struct rte_mbuf **cookies,
409                         uint16_t num)
410 {
411         struct vq_desc_extra *dxp;
412         struct virtqueue *vq = txvq->vq;
413         struct vring_desc *start_dp;
414         struct virtio_net_hdr *hdr;
415         uint16_t idx;
416         uint16_t head_size = vq->hw->vtnet_hdr_size;
417         uint16_t i = 0;
418
419         idx = vq->vq_desc_head_idx;
420         start_dp = vq->vq_ring.desc;
421
422         while (i < num) {
423                 idx = idx & (vq->vq_nentries - 1);
424                 dxp = &vq->vq_descx[idx];
425                 dxp->cookie = (void *)cookies[i];
426                 dxp->ndescs = 1;
427
428                 hdr = (struct virtio_net_hdr *)
429                         rte_pktmbuf_prepend(cookies[i], head_size);
430                 cookies[i]->pkt_len -= head_size;
431
432                 /* if offload disabled, it is not zeroed below, do it now */
433                 if (!vq->hw->has_tx_offload) {
434                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
435                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
436                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
437                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
438                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
439                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
440                 }
441
442                 virtqueue_xmit_offload(hdr, cookies[i],
443                                 vq->hw->has_tx_offload);
444
445                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
446                 start_dp[idx].len   = cookies[i]->data_len;
447                 start_dp[idx].flags = 0;
448
449                 vq_update_avail_ring(vq, idx);
450
451                 idx++;
452                 i++;
453         };
454
455         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
456         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
457 }
458
459 static inline void
460 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
461                         uint16_t needed, int use_indirect, int can_push,
462                         int in_order)
463 {
464         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
465         struct vq_desc_extra *dxp;
466         struct virtqueue *vq = txvq->vq;
467         struct vring_desc *start_dp;
468         uint16_t seg_num = cookie->nb_segs;
469         uint16_t head_idx, idx;
470         uint16_t head_size = vq->hw->vtnet_hdr_size;
471         struct virtio_net_hdr *hdr;
472
473         head_idx = vq->vq_desc_head_idx;
474         idx = head_idx;
475         dxp = &vq->vq_descx[idx];
476         dxp->cookie = (void *)cookie;
477         dxp->ndescs = needed;
478
479         start_dp = vq->vq_ring.desc;
480
481         if (can_push) {
482                 /* prepend cannot fail, checked by caller */
483                 hdr = (struct virtio_net_hdr *)
484                         rte_pktmbuf_prepend(cookie, head_size);
485                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
486                  * which is wrong. Below subtract restores correct pkt size.
487                  */
488                 cookie->pkt_len -= head_size;
489
490                 /* if offload disabled, it is not zeroed below, do it now */
491                 if (!vq->hw->has_tx_offload) {
492                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
493                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
494                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
495                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
496                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
497                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
498                 }
499         } else if (use_indirect) {
500                 /* setup tx ring slot to point to indirect
501                  * descriptor list stored in reserved region.
502                  *
503                  * the first slot in indirect ring is already preset
504                  * to point to the header in reserved region
505                  */
506                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
507                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
508                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
509                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
510                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
511
512                 /* loop below will fill in rest of the indirect elements */
513                 start_dp = txr[idx].tx_indir;
514                 idx = 1;
515         } else {
516                 /* setup first tx ring slot to point to header
517                  * stored in reserved region.
518                  */
519                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
520                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
521                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
522                 start_dp[idx].flags = VRING_DESC_F_NEXT;
523                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
524
525                 idx = start_dp[idx].next;
526         }
527
528         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
529
530         do {
531                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
532                 start_dp[idx].len   = cookie->data_len;
533                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
534                 idx = start_dp[idx].next;
535         } while ((cookie = cookie->next) != NULL);
536
537         if (use_indirect)
538                 idx = vq->vq_ring.desc[head_idx].next;
539
540         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
541
542         vq->vq_desc_head_idx = idx;
543         vq_update_avail_ring(vq, head_idx);
544
545         if (!in_order) {
546                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
547                         vq->vq_desc_tail_idx = idx;
548         }
549 }
550
551 void
552 virtio_dev_cq_start(struct rte_eth_dev *dev)
553 {
554         struct virtio_hw *hw = dev->data->dev_private;
555
556         if (hw->cvq && hw->cvq->vq) {
557                 rte_spinlock_init(&hw->cvq->lock);
558                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
559         }
560 }
561
562 int
563 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
564                         uint16_t queue_idx,
565                         uint16_t nb_desc,
566                         unsigned int socket_id __rte_unused,
567                         const struct rte_eth_rxconf *rx_conf __rte_unused,
568                         struct rte_mempool *mp)
569 {
570         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
571         struct virtio_hw *hw = dev->data->dev_private;
572         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
573         struct virtnet_rx *rxvq;
574
575         PMD_INIT_FUNC_TRACE();
576
577         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
578                 nb_desc = vq->vq_nentries;
579         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
580
581         rxvq = &vq->rxq;
582         rxvq->queue_id = queue_idx;
583         rxvq->mpool = mp;
584         if (rxvq->mpool == NULL) {
585                 rte_exit(EXIT_FAILURE,
586                         "Cannot allocate mbufs for rx virtqueue");
587         }
588
589         dev->data->rx_queues[queue_idx] = rxvq;
590
591         return 0;
592 }
593
594 int
595 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
596 {
597         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
598         struct virtio_hw *hw = dev->data->dev_private;
599         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
600         struct virtnet_rx *rxvq = &vq->rxq;
601         struct rte_mbuf *m;
602         uint16_t desc_idx;
603         int error, nbufs, i;
604
605         PMD_INIT_FUNC_TRACE();
606
607         /* Allocate blank mbufs for the each rx descriptor */
608         nbufs = 0;
609
610         if (hw->use_simple_rx) {
611                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
612                      desc_idx++) {
613                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
614                         vq->vq_ring.desc[desc_idx].flags =
615                                 VRING_DESC_F_WRITE;
616                 }
617
618                 virtio_rxq_vec_setup(rxvq);
619         }
620
621         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
622         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
623              desc_idx++) {
624                 vq->sw_ring[vq->vq_nentries + desc_idx] =
625                         &rxvq->fake_mbuf;
626         }
627
628         if (hw->use_simple_rx) {
629                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
630                         virtio_rxq_rearm_vec(rxvq);
631                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
632                 }
633         } else if (hw->use_inorder_rx) {
634                 if ((!virtqueue_full(vq))) {
635                         uint16_t free_cnt = vq->vq_free_cnt;
636                         struct rte_mbuf *pkts[free_cnt];
637
638                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
639                                 free_cnt)) {
640                                 error = virtqueue_enqueue_refill_inorder(vq,
641                                                 pkts,
642                                                 free_cnt);
643                                 if (unlikely(error)) {
644                                         for (i = 0; i < free_cnt; i++)
645                                                 rte_pktmbuf_free(pkts[i]);
646                                 }
647                         }
648
649                         nbufs += free_cnt;
650                         vq_update_avail_idx(vq);
651                 }
652         } else {
653                 while (!virtqueue_full(vq)) {
654                         m = rte_mbuf_raw_alloc(rxvq->mpool);
655                         if (m == NULL)
656                                 break;
657
658                         /* Enqueue allocated buffers */
659                         error = virtqueue_enqueue_recv_refill(vq, m);
660                         if (error) {
661                                 rte_pktmbuf_free(m);
662                                 break;
663                         }
664                         nbufs++;
665                 }
666
667                 vq_update_avail_idx(vq);
668         }
669
670         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
671
672         VIRTQUEUE_DUMP(vq);
673
674         return 0;
675 }
676
677 /*
678  * struct rte_eth_dev *dev: Used to update dev
679  * uint16_t nb_desc: Defaults to values read from config space
680  * unsigned int socket_id: Used to allocate memzone
681  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
682  * uint16_t queue_idx: Just used as an index in dev txq list
683  */
684 int
685 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
686                         uint16_t queue_idx,
687                         uint16_t nb_desc,
688                         unsigned int socket_id __rte_unused,
689                         const struct rte_eth_txconf *tx_conf)
690 {
691         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
692         struct virtio_hw *hw = dev->data->dev_private;
693         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
694         struct virtnet_tx *txvq;
695         uint16_t tx_free_thresh;
696
697         PMD_INIT_FUNC_TRACE();
698
699         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
700                 nb_desc = vq->vq_nentries;
701         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
702
703         txvq = &vq->txq;
704         txvq->queue_id = queue_idx;
705
706         tx_free_thresh = tx_conf->tx_free_thresh;
707         if (tx_free_thresh == 0)
708                 tx_free_thresh =
709                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
710
711         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
712                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
713                         "number of TX entries minus 3 (%u)."
714                         " (tx_free_thresh=%u port=%u queue=%u)\n",
715                         vq->vq_nentries - 3,
716                         tx_free_thresh, dev->data->port_id, queue_idx);
717                 return -EINVAL;
718         }
719
720         vq->vq_free_thresh = tx_free_thresh;
721
722         dev->data->tx_queues[queue_idx] = txvq;
723         return 0;
724 }
725
726 int
727 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
728                                 uint16_t queue_idx)
729 {
730         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
731         struct virtio_hw *hw = dev->data->dev_private;
732         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
733
734         PMD_INIT_FUNC_TRACE();
735
736         if (hw->use_inorder_tx)
737                 vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
738
739         VIRTQUEUE_DUMP(vq);
740
741         return 0;
742 }
743
744 static void
745 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
746 {
747         int error;
748         /*
749          * Requeue the discarded mbuf. This should always be
750          * successful since it was just dequeued.
751          */
752         error = virtqueue_enqueue_recv_refill(vq, m);
753
754         if (unlikely(error)) {
755                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
756                 rte_pktmbuf_free(m);
757         }
758 }
759
760 static void
761 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
762 {
763         int error;
764
765         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
766         if (unlikely(error)) {
767                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
768                 rte_pktmbuf_free(m);
769         }
770 }
771
772 static void
773 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
774 {
775         uint32_t s = mbuf->pkt_len;
776         struct ether_addr *ea;
777
778         if (s == 64) {
779                 stats->size_bins[1]++;
780         } else if (s > 64 && s < 1024) {
781                 uint32_t bin;
782
783                 /* count zeros, and offset into correct bin */
784                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
785                 stats->size_bins[bin]++;
786         } else {
787                 if (s < 64)
788                         stats->size_bins[0]++;
789                 else if (s < 1519)
790                         stats->size_bins[6]++;
791                 else if (s >= 1519)
792                         stats->size_bins[7]++;
793         }
794
795         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
796         if (is_multicast_ether_addr(ea)) {
797                 if (is_broadcast_ether_addr(ea))
798                         stats->broadcast++;
799                 else
800                         stats->multicast++;
801         }
802 }
803
804 static inline void
805 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
806 {
807         VIRTIO_DUMP_PACKET(m, m->data_len);
808
809         rxvq->stats.bytes += m->pkt_len;
810         virtio_update_packet_stats(&rxvq->stats, m);
811 }
812
813 /* Optionally fill offload information in structure */
814 static int
815 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
816 {
817         struct rte_net_hdr_lens hdr_lens;
818         uint32_t hdrlen, ptype;
819         int l4_supported = 0;
820
821         /* nothing to do */
822         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
823                 return 0;
824
825         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
826
827         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
828         m->packet_type = ptype;
829         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
830             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
831             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
832                 l4_supported = 1;
833
834         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
835                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
836                 if (hdr->csum_start <= hdrlen && l4_supported) {
837                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
838                 } else {
839                         /* Unknown proto or tunnel, do sw cksum. We can assume
840                          * the cksum field is in the first segment since the
841                          * buffers we provided to the host are large enough.
842                          * In case of SCTP, this will be wrong since it's a CRC
843                          * but there's nothing we can do.
844                          */
845                         uint16_t csum = 0, off;
846
847                         rte_raw_cksum_mbuf(m, hdr->csum_start,
848                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
849                                 &csum);
850                         if (likely(csum != 0xffff))
851                                 csum = ~csum;
852                         off = hdr->csum_offset + hdr->csum_start;
853                         if (rte_pktmbuf_data_len(m) >= off + 1)
854                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
855                                         off) = csum;
856                 }
857         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
858                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
859         }
860
861         /* GSO request, save required information in mbuf */
862         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
863                 /* Check unsupported modes */
864                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
865                     (hdr->gso_size == 0)) {
866                         return -EINVAL;
867                 }
868
869                 /* Update mss lengthes in mbuf */
870                 m->tso_segsz = hdr->gso_size;
871                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
872                         case VIRTIO_NET_HDR_GSO_TCPV4:
873                         case VIRTIO_NET_HDR_GSO_TCPV6:
874                                 m->ol_flags |= PKT_RX_LRO | \
875                                         PKT_RX_L4_CKSUM_NONE;
876                                 break;
877                         default:
878                                 return -EINVAL;
879                 }
880         }
881
882         return 0;
883 }
884
885 #define VIRTIO_MBUF_BURST_SZ 64
886 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
887 uint16_t
888 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
889 {
890         struct virtnet_rx *rxvq = rx_queue;
891         struct virtqueue *vq = rxvq->vq;
892         struct virtio_hw *hw = vq->hw;
893         struct rte_mbuf *rxm, *new_mbuf;
894         uint16_t nb_used, num, nb_rx;
895         uint32_t len[VIRTIO_MBUF_BURST_SZ];
896         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
897         int error;
898         uint32_t i, nb_enqueued;
899         uint32_t hdr_size;
900         struct virtio_net_hdr *hdr;
901
902         nb_rx = 0;
903         if (unlikely(hw->started == 0))
904                 return nb_rx;
905
906         nb_used = VIRTQUEUE_NUSED(vq);
907
908         virtio_rmb();
909
910         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
911         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
912                 num = VIRTIO_MBUF_BURST_SZ;
913         if (likely(num > DESC_PER_CACHELINE))
914                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
915
916         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
917         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
918
919         nb_enqueued = 0;
920         hdr_size = hw->vtnet_hdr_size;
921
922         for (i = 0; i < num ; i++) {
923                 rxm = rcv_pkts[i];
924
925                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
926
927                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
928                         PMD_RX_LOG(ERR, "Packet drop");
929                         nb_enqueued++;
930                         virtio_discard_rxbuf(vq, rxm);
931                         rxvq->stats.errors++;
932                         continue;
933                 }
934
935                 rxm->port = rxvq->port_id;
936                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
937                 rxm->ol_flags = 0;
938                 rxm->vlan_tci = 0;
939
940                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
941                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
942
943                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
944                         RTE_PKTMBUF_HEADROOM - hdr_size);
945
946                 if (hw->vlan_strip)
947                         rte_vlan_strip(rxm);
948
949                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
950                         virtio_discard_rxbuf(vq, rxm);
951                         rxvq->stats.errors++;
952                         continue;
953                 }
954
955                 virtio_rx_stats_updated(rxvq, rxm);
956
957                 rx_pkts[nb_rx++] = rxm;
958         }
959
960         rxvq->stats.packets += nb_rx;
961
962         /* Allocate new mbuf for the used descriptor */
963         while (likely(!virtqueue_full(vq))) {
964                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
965                 if (unlikely(new_mbuf == NULL)) {
966                         struct rte_eth_dev *dev
967                                 = &rte_eth_devices[rxvq->port_id];
968                         dev->data->rx_mbuf_alloc_failed++;
969                         break;
970                 }
971                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
972                 if (unlikely(error)) {
973                         rte_pktmbuf_free(new_mbuf);
974                         break;
975                 }
976                 nb_enqueued++;
977         }
978
979         if (likely(nb_enqueued)) {
980                 vq_update_avail_idx(vq);
981
982                 if (unlikely(virtqueue_kick_prepare(vq))) {
983                         virtqueue_notify(vq);
984                         PMD_RX_LOG(DEBUG, "Notified");
985                 }
986         }
987
988         return nb_rx;
989 }
990
991 uint16_t
992 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
993                         struct rte_mbuf **rx_pkts,
994                         uint16_t nb_pkts)
995 {
996         struct virtnet_rx *rxvq = rx_queue;
997         struct virtqueue *vq = rxvq->vq;
998         struct virtio_hw *hw = vq->hw;
999         struct rte_mbuf *rxm;
1000         struct rte_mbuf *prev;
1001         uint16_t nb_used, num, nb_rx;
1002         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1003         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1004         int error;
1005         uint32_t nb_enqueued;
1006         uint32_t seg_num;
1007         uint32_t seg_res;
1008         uint32_t hdr_size;
1009         int32_t i;
1010
1011         nb_rx = 0;
1012         if (unlikely(hw->started == 0))
1013                 return nb_rx;
1014
1015         nb_used = VIRTQUEUE_NUSED(vq);
1016         nb_used = RTE_MIN(nb_used, nb_pkts);
1017         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1018
1019         virtio_rmb();
1020
1021         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1022
1023         nb_enqueued = 0;
1024         seg_num = 1;
1025         seg_res = 0;
1026         hdr_size = hw->vtnet_hdr_size;
1027
1028         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1029
1030         for (i = 0; i < num; i++) {
1031                 struct virtio_net_hdr_mrg_rxbuf *header;
1032
1033                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1034                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1035
1036                 rxm = rcv_pkts[i];
1037
1038                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1039                         PMD_RX_LOG(ERR, "Packet drop");
1040                         nb_enqueued++;
1041                         virtio_discard_rxbuf_inorder(vq, rxm);
1042                         rxvq->stats.errors++;
1043                         continue;
1044                 }
1045
1046                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1047                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1048                          - hdr_size);
1049                 seg_num = header->num_buffers;
1050
1051                 if (seg_num == 0)
1052                         seg_num = 1;
1053
1054                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1055                 rxm->nb_segs = seg_num;
1056                 rxm->ol_flags = 0;
1057                 rxm->vlan_tci = 0;
1058                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1059                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1060
1061                 rxm->port = rxvq->port_id;
1062
1063                 rx_pkts[nb_rx] = rxm;
1064                 prev = rxm;
1065
1066                 if (vq->hw->has_rx_offload &&
1067                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1068                         virtio_discard_rxbuf_inorder(vq, rxm);
1069                         rxvq->stats.errors++;
1070                         continue;
1071                 }
1072
1073                 if (hw->vlan_strip)
1074                         rte_vlan_strip(rx_pkts[nb_rx]);
1075
1076                 seg_res = seg_num - 1;
1077
1078                 /* Merge remaining segments */
1079                 while (seg_res != 0 && i < (num - 1)) {
1080                         i++;
1081
1082                         rxm = rcv_pkts[i];
1083                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1084                         rxm->pkt_len = (uint32_t)(len[i]);
1085                         rxm->data_len = (uint16_t)(len[i]);
1086
1087                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1088                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1089
1090                         if (prev)
1091                                 prev->next = rxm;
1092
1093                         prev = rxm;
1094                         seg_res -= 1;
1095                 }
1096
1097                 if (!seg_res) {
1098                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1099                         nb_rx++;
1100                 }
1101         }
1102
1103         /* Last packet still need merge segments */
1104         while (seg_res != 0) {
1105                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1106                                         VIRTIO_MBUF_BURST_SZ);
1107
1108                 prev = rcv_pkts[nb_rx];
1109                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1110                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1111                                                            rcv_cnt);
1112                         uint16_t extra_idx = 0;
1113
1114                         rcv_cnt = num;
1115                         while (extra_idx < rcv_cnt) {
1116                                 rxm = rcv_pkts[extra_idx];
1117                                 rxm->data_off =
1118                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1119                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1120                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1121                                 prev->next = rxm;
1122                                 prev = rxm;
1123                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1124                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1125                                 extra_idx += 1;
1126                         };
1127                         seg_res -= rcv_cnt;
1128
1129                         if (!seg_res) {
1130                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1131                                 nb_rx++;
1132                         }
1133                 } else {
1134                         PMD_RX_LOG(ERR,
1135                                         "No enough segments for packet.");
1136                         virtio_discard_rxbuf_inorder(vq, prev);
1137                         rxvq->stats.errors++;
1138                         break;
1139                 }
1140         }
1141
1142         rxvq->stats.packets += nb_rx;
1143
1144         /* Allocate new mbuf for the used descriptor */
1145
1146         if (likely(!virtqueue_full(vq))) {
1147                 /* free_cnt may include mrg descs */
1148                 uint16_t free_cnt = vq->vq_free_cnt;
1149                 struct rte_mbuf *new_pkts[free_cnt];
1150
1151                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1152                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1153                                         free_cnt);
1154                         if (unlikely(error)) {
1155                                 for (i = 0; i < free_cnt; i++)
1156                                         rte_pktmbuf_free(new_pkts[i]);
1157                         }
1158                         nb_enqueued += free_cnt;
1159                 } else {
1160                         struct rte_eth_dev *dev =
1161                                 &rte_eth_devices[rxvq->port_id];
1162                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1163                 }
1164         }
1165
1166         if (likely(nb_enqueued)) {
1167                 vq_update_avail_idx(vq);
1168
1169                 if (unlikely(virtqueue_kick_prepare(vq))) {
1170                         virtqueue_notify(vq);
1171                         PMD_RX_LOG(DEBUG, "Notified");
1172                 }
1173         }
1174
1175         return nb_rx;
1176 }
1177
1178 uint16_t
1179 virtio_recv_mergeable_pkts(void *rx_queue,
1180                         struct rte_mbuf **rx_pkts,
1181                         uint16_t nb_pkts)
1182 {
1183         struct virtnet_rx *rxvq = rx_queue;
1184         struct virtqueue *vq = rxvq->vq;
1185         struct virtio_hw *hw = vq->hw;
1186         struct rte_mbuf *rxm, *new_mbuf;
1187         uint16_t nb_used, num, nb_rx;
1188         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1189         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1190         struct rte_mbuf *prev;
1191         int error;
1192         uint32_t i, nb_enqueued;
1193         uint32_t seg_num;
1194         uint16_t extra_idx;
1195         uint32_t seg_res;
1196         uint32_t hdr_size;
1197
1198         nb_rx = 0;
1199         if (unlikely(hw->started == 0))
1200                 return nb_rx;
1201
1202         nb_used = VIRTQUEUE_NUSED(vq);
1203
1204         virtio_rmb();
1205
1206         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1207
1208         i = 0;
1209         nb_enqueued = 0;
1210         seg_num = 0;
1211         extra_idx = 0;
1212         seg_res = 0;
1213         hdr_size = hw->vtnet_hdr_size;
1214
1215         while (i < nb_used) {
1216                 struct virtio_net_hdr_mrg_rxbuf *header;
1217
1218                 if (nb_rx == nb_pkts)
1219                         break;
1220
1221                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1222                 if (num != 1)
1223                         continue;
1224
1225                 i++;
1226
1227                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1228                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1229
1230                 rxm = rcv_pkts[0];
1231
1232                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1233                         PMD_RX_LOG(ERR, "Packet drop");
1234                         nb_enqueued++;
1235                         virtio_discard_rxbuf(vq, rxm);
1236                         rxvq->stats.errors++;
1237                         continue;
1238                 }
1239
1240                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1241                         RTE_PKTMBUF_HEADROOM - hdr_size);
1242                 seg_num = header->num_buffers;
1243
1244                 if (seg_num == 0)
1245                         seg_num = 1;
1246
1247                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1248                 rxm->nb_segs = seg_num;
1249                 rxm->ol_flags = 0;
1250                 rxm->vlan_tci = 0;
1251                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1252                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
1253
1254                 rxm->port = rxvq->port_id;
1255                 rx_pkts[nb_rx] = rxm;
1256                 prev = rxm;
1257
1258                 if (hw->has_rx_offload &&
1259                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1260                         virtio_discard_rxbuf(vq, rxm);
1261                         rxvq->stats.errors++;
1262                         continue;
1263                 }
1264
1265                 seg_res = seg_num - 1;
1266
1267                 while (seg_res != 0) {
1268                         /*
1269                          * Get extra segments for current uncompleted packet.
1270                          */
1271                         uint16_t  rcv_cnt =
1272                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1273                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1274                                 uint32_t rx_num =
1275                                         virtqueue_dequeue_burst_rx(vq,
1276                                         rcv_pkts, len, rcv_cnt);
1277                                 i += rx_num;
1278                                 rcv_cnt = rx_num;
1279                         } else {
1280                                 PMD_RX_LOG(ERR,
1281                                            "No enough segments for packet.");
1282                                 nb_enqueued++;
1283                                 virtio_discard_rxbuf(vq, rxm);
1284                                 rxvq->stats.errors++;
1285                                 break;
1286                         }
1287
1288                         extra_idx = 0;
1289
1290                         while (extra_idx < rcv_cnt) {
1291                                 rxm = rcv_pkts[extra_idx];
1292
1293                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1294                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1295                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1296
1297                                 if (prev)
1298                                         prev->next = rxm;
1299
1300                                 prev = rxm;
1301                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1302                                 extra_idx++;
1303                         };
1304                         seg_res -= rcv_cnt;
1305                 }
1306
1307                 if (hw->vlan_strip)
1308                         rte_vlan_strip(rx_pkts[nb_rx]);
1309
1310                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1311                         rx_pkts[nb_rx]->data_len);
1312
1313                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
1314                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1315                 nb_rx++;
1316         }
1317
1318         rxvq->stats.packets += nb_rx;
1319
1320         /* Allocate new mbuf for the used descriptor */
1321         while (likely(!virtqueue_full(vq))) {
1322                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1323                 if (unlikely(new_mbuf == NULL)) {
1324                         struct rte_eth_dev *dev
1325                                 = &rte_eth_devices[rxvq->port_id];
1326                         dev->data->rx_mbuf_alloc_failed++;
1327                         break;
1328                 }
1329                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1330                 if (unlikely(error)) {
1331                         rte_pktmbuf_free(new_mbuf);
1332                         break;
1333                 }
1334                 nb_enqueued++;
1335         }
1336
1337         if (likely(nb_enqueued)) {
1338                 vq_update_avail_idx(vq);
1339
1340                 if (unlikely(virtqueue_kick_prepare(vq))) {
1341                         virtqueue_notify(vq);
1342                         PMD_RX_LOG(DEBUG, "Notified");
1343                 }
1344         }
1345
1346         return nb_rx;
1347 }
1348
1349 uint16_t
1350 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1351 {
1352         struct virtnet_tx *txvq = tx_queue;
1353         struct virtqueue *vq = txvq->vq;
1354         struct virtio_hw *hw = vq->hw;
1355         uint16_t hdr_size = hw->vtnet_hdr_size;
1356         uint16_t nb_used, nb_tx = 0;
1357         int error;
1358
1359         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1360                 return nb_tx;
1361
1362         if (unlikely(nb_pkts < 1))
1363                 return nb_pkts;
1364
1365         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1366         nb_used = VIRTQUEUE_NUSED(vq);
1367
1368         virtio_rmb();
1369         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1370                 virtio_xmit_cleanup(vq, nb_used);
1371
1372         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1373                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1374                 int can_push = 0, use_indirect = 0, slots, need;
1375
1376                 /* Do VLAN tag insertion */
1377                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1378                         error = rte_vlan_insert(&txm);
1379                         if (unlikely(error)) {
1380                                 rte_pktmbuf_free(txm);
1381                                 continue;
1382                         }
1383                 }
1384
1385                 /* optimize ring usage */
1386                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1387                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1388                     rte_mbuf_refcnt_read(txm) == 1 &&
1389                     RTE_MBUF_DIRECT(txm) &&
1390                     txm->nb_segs == 1 &&
1391                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1392                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1393                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1394                         can_push = 1;
1395                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1396                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1397                         use_indirect = 1;
1398
1399                 /* How many main ring entries are needed to this Tx?
1400                  * any_layout => number of segments
1401                  * indirect   => 1
1402                  * default    => number of segments + 1
1403                  */
1404                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1405                 need = slots - vq->vq_free_cnt;
1406
1407                 /* Positive value indicates it need free vring descriptors */
1408                 if (unlikely(need > 0)) {
1409                         nb_used = VIRTQUEUE_NUSED(vq);
1410                         virtio_rmb();
1411                         need = RTE_MIN(need, (int)nb_used);
1412
1413                         virtio_xmit_cleanup(vq, need);
1414                         need = slots - vq->vq_free_cnt;
1415                         if (unlikely(need > 0)) {
1416                                 PMD_TX_LOG(ERR,
1417                                            "No free tx descriptors to transmit");
1418                                 break;
1419                         }
1420                 }
1421
1422                 /* Enqueue Packet buffers */
1423                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1424                         can_push, 0);
1425
1426                 txvq->stats.bytes += txm->pkt_len;
1427                 virtio_update_packet_stats(&txvq->stats, txm);
1428         }
1429
1430         txvq->stats.packets += nb_tx;
1431
1432         if (likely(nb_tx)) {
1433                 vq_update_avail_idx(vq);
1434
1435                 if (unlikely(virtqueue_kick_prepare(vq))) {
1436                         virtqueue_notify(vq);
1437                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1438                 }
1439         }
1440
1441         return nb_tx;
1442 }
1443
1444 uint16_t
1445 virtio_xmit_pkts_inorder(void *tx_queue,
1446                         struct rte_mbuf **tx_pkts,
1447                         uint16_t nb_pkts)
1448 {
1449         struct virtnet_tx *txvq = tx_queue;
1450         struct virtqueue *vq = txvq->vq;
1451         struct virtio_hw *hw = vq->hw;
1452         uint16_t hdr_size = hw->vtnet_hdr_size;
1453         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1454         struct rte_mbuf *inorder_pkts[nb_pkts];
1455         int error;
1456
1457         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1458                 return nb_tx;
1459
1460         if (unlikely(nb_pkts < 1))
1461                 return nb_pkts;
1462
1463         VIRTQUEUE_DUMP(vq);
1464         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1465         nb_used = VIRTQUEUE_NUSED(vq);
1466
1467         virtio_rmb();
1468         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1469                 virtio_xmit_cleanup_inorder(vq, nb_used);
1470
1471         if (unlikely(!vq->vq_free_cnt))
1472                 virtio_xmit_cleanup_inorder(vq, nb_used);
1473
1474         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1475
1476         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1477                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1478                 int slots, need;
1479
1480                 /* Do VLAN tag insertion */
1481                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1482                         error = rte_vlan_insert(&txm);
1483                         if (unlikely(error)) {
1484                                 rte_pktmbuf_free(txm);
1485                                 continue;
1486                         }
1487                 }
1488
1489                 /* optimize ring usage */
1490                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1491                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1492                      rte_mbuf_refcnt_read(txm) == 1 &&
1493                      RTE_MBUF_DIRECT(txm) &&
1494                      txm->nb_segs == 1 &&
1495                      rte_pktmbuf_headroom(txm) >= hdr_size &&
1496                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1497                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1498                         inorder_pkts[nb_inorder_pkts] = txm;
1499                         nb_inorder_pkts++;
1500
1501                         txvq->stats.bytes += txm->pkt_len;
1502                         virtio_update_packet_stats(&txvq->stats, txm);
1503                         continue;
1504                 }
1505
1506                 if (nb_inorder_pkts) {
1507                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1508                                                         nb_inorder_pkts);
1509                         nb_inorder_pkts = 0;
1510                 }
1511
1512                 slots = txm->nb_segs + 1;
1513                 need = slots - vq->vq_free_cnt;
1514                 if (unlikely(need > 0)) {
1515                         nb_used = VIRTQUEUE_NUSED(vq);
1516                         virtio_rmb();
1517                         need = RTE_MIN(need, (int)nb_used);
1518
1519                         virtio_xmit_cleanup_inorder(vq, need);
1520
1521                         need = slots - vq->vq_free_cnt;
1522
1523                         if (unlikely(need > 0)) {
1524                                 PMD_TX_LOG(ERR,
1525                                         "No free tx descriptors to transmit");
1526                                 break;
1527                         }
1528                 }
1529                 /* Enqueue Packet buffers */
1530                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1531
1532                 txvq->stats.bytes += txm->pkt_len;
1533                 virtio_update_packet_stats(&txvq->stats, txm);
1534         }
1535
1536         /* Transmit all inorder packets */
1537         if (nb_inorder_pkts)
1538                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1539                                                 nb_inorder_pkts);
1540
1541         txvq->stats.packets += nb_tx;
1542
1543         if (likely(nb_tx)) {
1544                 vq_update_avail_idx(vq);
1545
1546                 if (unlikely(virtqueue_kick_prepare(vq))) {
1547                         virtqueue_notify(vq);
1548                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1549                 }
1550         }
1551
1552         VIRTQUEUE_DUMP(vq);
1553
1554         return nb_tx;
1555 }