New upstream version 18.11.2
[deb_dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93                            uint32_t *len, uint16_t num)
94 {
95         struct vring_used_elem *uep;
96         struct rte_mbuf *cookie;
97         uint16_t used_idx, desc_idx;
98         uint16_t i;
99
100         /*  Caller does the check */
101         for (i = 0; i < num ; i++) {
102                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103                 uep = &vq->vq_ring.used->ring[used_idx];
104                 desc_idx = (uint16_t) uep->id;
105                 len[i] = uep->len;
106                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107
108                 if (unlikely(cookie == NULL)) {
109                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110                                 vq->vq_used_cons_idx);
111                         break;
112                 }
113
114                 rte_prefetch0(cookie);
115                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116                 rx_pkts[i]  = cookie;
117                 vq->vq_used_cons_idx++;
118                 vq_ring_free_chain(vq, desc_idx);
119                 vq->vq_descx[desc_idx].cookie = NULL;
120         }
121
122         return i;
123 }
124
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127                         struct rte_mbuf **rx_pkts,
128                         uint32_t *len,
129                         uint16_t num)
130 {
131         struct vring_used_elem *uep;
132         struct rte_mbuf *cookie;
133         uint16_t used_idx = 0;
134         uint16_t i;
135
136         if (unlikely(num == 0))
137                 return 0;
138
139         for (i = 0; i < num; i++) {
140                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141                 /* Desc idx same as used idx */
142                 uep = &vq->vq_ring.used->ring[used_idx];
143                 len[i] = uep->len;
144                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145
146                 if (unlikely(cookie == NULL)) {
147                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148                                 vq->vq_used_cons_idx);
149                         break;
150                 }
151
152                 rte_prefetch0(cookie);
153                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154                 rx_pkts[i]  = cookie;
155                 vq->vq_used_cons_idx++;
156                 vq->vq_descx[used_idx].cookie = NULL;
157         }
158
159         vq_ring_free_inorder(vq, used_idx, i);
160         return i;
161 }
162
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171         uint16_t i, used_idx, desc_idx;
172         for (i = 0; i < num; i++) {
173                 struct vring_used_elem *uep;
174                 struct vq_desc_extra *dxp;
175
176                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177                 uep = &vq->vq_ring.used->ring[used_idx];
178
179                 desc_idx = (uint16_t) uep->id;
180                 dxp = &vq->vq_descx[desc_idx];
181                 vq->vq_used_cons_idx++;
182                 vq_ring_free_chain(vq, desc_idx);
183
184                 if (dxp->cookie != NULL) {
185                         rte_pktmbuf_free(dxp->cookie);
186                         dxp->cookie = NULL;
187                 }
188         }
189 }
190
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195         uint16_t i, idx = vq->vq_used_cons_idx;
196         int16_t free_cnt = 0;
197         struct vq_desc_extra *dxp = NULL;
198
199         if (unlikely(num == 0))
200                 return;
201
202         for (i = 0; i < num; i++) {
203                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
204                 free_cnt += dxp->ndescs;
205                 if (dxp->cookie != NULL) {
206                         rte_pktmbuf_free(dxp->cookie);
207                         dxp->cookie = NULL;
208                 }
209         }
210
211         vq->vq_free_cnt += free_cnt;
212         vq->vq_used_cons_idx = idx;
213 }
214
215 static inline int
216 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
217                         struct rte_mbuf **cookies,
218                         uint16_t num)
219 {
220         struct vq_desc_extra *dxp;
221         struct virtio_hw *hw = vq->hw;
222         struct vring_desc *start_dp;
223         uint16_t head_idx, idx, i = 0;
224
225         if (unlikely(vq->vq_free_cnt == 0))
226                 return -ENOSPC;
227         if (unlikely(vq->vq_free_cnt < num))
228                 return -EMSGSIZE;
229
230         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
231         start_dp = vq->vq_ring.desc;
232
233         while (i < num) {
234                 idx = head_idx & (vq->vq_nentries - 1);
235                 dxp = &vq->vq_descx[idx];
236                 dxp->cookie = (void *)cookies[i];
237                 dxp->ndescs = 1;
238
239                 start_dp[idx].addr =
240                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
241                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
242                 start_dp[idx].len =
243                                 cookies[i]->buf_len -
244                                 RTE_PKTMBUF_HEADROOM +
245                                 hw->vtnet_hdr_size;
246                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
247
248                 vq_update_avail_ring(vq, idx);
249                 head_idx++;
250                 i++;
251         }
252
253         vq->vq_desc_head_idx += num;
254         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
255         return 0;
256 }
257
258 static inline int
259 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
260 {
261         struct vq_desc_extra *dxp;
262         struct virtio_hw *hw = vq->hw;
263         struct vring_desc *start_dp;
264         uint16_t needed = 1;
265         uint16_t head_idx, idx;
266
267         if (unlikely(vq->vq_free_cnt == 0))
268                 return -ENOSPC;
269         if (unlikely(vq->vq_free_cnt < needed))
270                 return -EMSGSIZE;
271
272         head_idx = vq->vq_desc_head_idx;
273         if (unlikely(head_idx >= vq->vq_nentries))
274                 return -EFAULT;
275
276         idx = head_idx;
277         dxp = &vq->vq_descx[idx];
278         dxp->cookie = (void *)cookie;
279         dxp->ndescs = needed;
280
281         start_dp = vq->vq_ring.desc;
282         start_dp[idx].addr =
283                 VIRTIO_MBUF_ADDR(cookie, vq) +
284                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
285         start_dp[idx].len =
286                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
287         start_dp[idx].flags =  VRING_DESC_F_WRITE;
288         idx = start_dp[idx].next;
289         vq->vq_desc_head_idx = idx;
290         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
291                 vq->vq_desc_tail_idx = idx;
292         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
293         vq_update_avail_ring(vq, head_idx);
294
295         return 0;
296 }
297
298 /* When doing TSO, the IP length is not included in the pseudo header
299  * checksum of the packet given to the PMD, but for virtio it is
300  * expected.
301  */
302 static void
303 virtio_tso_fix_cksum(struct rte_mbuf *m)
304 {
305         /* common case: header is not fragmented */
306         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
307                         m->l4_len)) {
308                 struct ipv4_hdr *iph;
309                 struct ipv6_hdr *ip6h;
310                 struct tcp_hdr *th;
311                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
312                 uint32_t tmp;
313
314                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
315                 th = RTE_PTR_ADD(iph, m->l3_len);
316                 if ((iph->version_ihl >> 4) == 4) {
317                         iph->hdr_checksum = 0;
318                         iph->hdr_checksum = rte_ipv4_cksum(iph);
319                         ip_len = iph->total_length;
320                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
321                                 m->l3_len);
322                 } else {
323                         ip6h = (struct ipv6_hdr *)iph;
324                         ip_paylen = ip6h->payload_len;
325                 }
326
327                 /* calculate the new phdr checksum not including ip_paylen */
328                 prev_cksum = th->cksum;
329                 tmp = prev_cksum;
330                 tmp += ip_paylen;
331                 tmp = (tmp & 0xffff) + (tmp >> 16);
332                 new_cksum = tmp;
333
334                 /* replace it in the packet */
335                 th->cksum = new_cksum;
336         }
337 }
338
339
340 /* avoid write operation when necessary, to lessen cache issues */
341 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
342         if ((var) != (val))                     \
343                 (var) = (val);                  \
344 } while (0)
345
346 static inline void
347 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
348                         struct rte_mbuf *cookie,
349                         bool offload)
350 {
351         if (offload) {
352                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
353                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
354
355                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
356                 case PKT_TX_UDP_CKSUM:
357                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
358                         hdr->csum_offset = offsetof(struct udp_hdr,
359                                 dgram_cksum);
360                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
361                         break;
362
363                 case PKT_TX_TCP_CKSUM:
364                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
365                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
366                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
367                         break;
368
369                 default:
370                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
371                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
372                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
373                         break;
374                 }
375
376                 /* TCP Segmentation Offload */
377                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
378                         virtio_tso_fix_cksum(cookie);
379                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
380                                 VIRTIO_NET_HDR_GSO_TCPV6 :
381                                 VIRTIO_NET_HDR_GSO_TCPV4;
382                         hdr->gso_size = cookie->tso_segsz;
383                         hdr->hdr_len =
384                                 cookie->l2_len +
385                                 cookie->l3_len +
386                                 cookie->l4_len;
387                 } else {
388                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
389                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
390                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
391                 }
392         }
393 }
394
395 static inline void
396 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
397                         struct rte_mbuf **cookies,
398                         uint16_t num)
399 {
400         struct vq_desc_extra *dxp;
401         struct virtqueue *vq = txvq->vq;
402         struct vring_desc *start_dp;
403         struct virtio_net_hdr *hdr;
404         uint16_t idx;
405         uint16_t head_size = vq->hw->vtnet_hdr_size;
406         uint16_t i = 0;
407
408         idx = vq->vq_desc_head_idx;
409         start_dp = vq->vq_ring.desc;
410
411         while (i < num) {
412                 idx = idx & (vq->vq_nentries - 1);
413                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
414                 dxp->cookie = (void *)cookies[i];
415                 dxp->ndescs = 1;
416
417                 hdr = (struct virtio_net_hdr *)
418                         rte_pktmbuf_prepend(cookies[i], head_size);
419                 cookies[i]->pkt_len -= head_size;
420
421                 /* if offload disabled, it is not zeroed below, do it now */
422                 if (!vq->hw->has_tx_offload) {
423                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
424                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
425                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
426                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
427                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
428                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
429                 }
430
431                 virtqueue_xmit_offload(hdr, cookies[i],
432                                 vq->hw->has_tx_offload);
433
434                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
435                 start_dp[idx].len   = cookies[i]->data_len;
436                 start_dp[idx].flags = 0;
437
438                 vq_update_avail_ring(vq, idx);
439
440                 idx++;
441                 i++;
442         };
443
444         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
445         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
446 }
447
448 static inline void
449 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
450                         uint16_t needed, int use_indirect, int can_push,
451                         int in_order)
452 {
453         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
454         struct vq_desc_extra *dxp;
455         struct virtqueue *vq = txvq->vq;
456         struct vring_desc *start_dp;
457         uint16_t seg_num = cookie->nb_segs;
458         uint16_t head_idx, idx;
459         uint16_t head_size = vq->hw->vtnet_hdr_size;
460         struct virtio_net_hdr *hdr;
461
462         head_idx = vq->vq_desc_head_idx;
463         idx = head_idx;
464         if (in_order)
465                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
466         else
467                 dxp = &vq->vq_descx[idx];
468         dxp->cookie = (void *)cookie;
469         dxp->ndescs = needed;
470
471         start_dp = vq->vq_ring.desc;
472
473         if (can_push) {
474                 /* prepend cannot fail, checked by caller */
475                 hdr = (struct virtio_net_hdr *)
476                         rte_pktmbuf_prepend(cookie, head_size);
477                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
478                  * which is wrong. Below subtract restores correct pkt size.
479                  */
480                 cookie->pkt_len -= head_size;
481
482                 /* if offload disabled, it is not zeroed below, do it now */
483                 if (!vq->hw->has_tx_offload) {
484                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
485                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
486                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
487                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
488                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
489                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
490                 }
491         } else if (use_indirect) {
492                 /* setup tx ring slot to point to indirect
493                  * descriptor list stored in reserved region.
494                  *
495                  * the first slot in indirect ring is already preset
496                  * to point to the header in reserved region
497                  */
498                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
499                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
500                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
501                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
502                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
503
504                 /* loop below will fill in rest of the indirect elements */
505                 start_dp = txr[idx].tx_indir;
506                 idx = 1;
507         } else {
508                 /* setup first tx ring slot to point to header
509                  * stored in reserved region.
510                  */
511                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
512                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
513                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
514                 start_dp[idx].flags = VRING_DESC_F_NEXT;
515                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
516
517                 idx = start_dp[idx].next;
518         }
519
520         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
521
522         do {
523                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
524                 start_dp[idx].len   = cookie->data_len;
525                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
526                 idx = start_dp[idx].next;
527         } while ((cookie = cookie->next) != NULL);
528
529         if (use_indirect)
530                 idx = vq->vq_ring.desc[head_idx].next;
531
532         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
533
534         vq->vq_desc_head_idx = idx;
535         vq_update_avail_ring(vq, head_idx);
536
537         if (!in_order) {
538                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
539                         vq->vq_desc_tail_idx = idx;
540         }
541 }
542
543 void
544 virtio_dev_cq_start(struct rte_eth_dev *dev)
545 {
546         struct virtio_hw *hw = dev->data->dev_private;
547
548         if (hw->cvq && hw->cvq->vq) {
549                 rte_spinlock_init(&hw->cvq->lock);
550                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
551         }
552 }
553
554 int
555 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
556                         uint16_t queue_idx,
557                         uint16_t nb_desc,
558                         unsigned int socket_id __rte_unused,
559                         const struct rte_eth_rxconf *rx_conf __rte_unused,
560                         struct rte_mempool *mp)
561 {
562         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
563         struct virtio_hw *hw = dev->data->dev_private;
564         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
565         struct virtnet_rx *rxvq;
566
567         PMD_INIT_FUNC_TRACE();
568
569         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
570                 nb_desc = vq->vq_nentries;
571         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
572
573         rxvq = &vq->rxq;
574         rxvq->queue_id = queue_idx;
575         rxvq->mpool = mp;
576         if (rxvq->mpool == NULL) {
577                 rte_exit(EXIT_FAILURE,
578                         "Cannot allocate mbufs for rx virtqueue");
579         }
580
581         dev->data->rx_queues[queue_idx] = rxvq;
582
583         return 0;
584 }
585
586 int
587 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
588 {
589         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
590         struct virtio_hw *hw = dev->data->dev_private;
591         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
592         struct virtnet_rx *rxvq = &vq->rxq;
593         struct rte_mbuf *m;
594         uint16_t desc_idx;
595         int error, nbufs, i;
596
597         PMD_INIT_FUNC_TRACE();
598
599         /* Allocate blank mbufs for the each rx descriptor */
600         nbufs = 0;
601
602         if (hw->use_simple_rx) {
603                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
604                      desc_idx++) {
605                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
606                         vq->vq_ring.desc[desc_idx].flags =
607                                 VRING_DESC_F_WRITE;
608                 }
609
610                 virtio_rxq_vec_setup(rxvq);
611         }
612
613         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
614         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
615              desc_idx++) {
616                 vq->sw_ring[vq->vq_nentries + desc_idx] =
617                         &rxvq->fake_mbuf;
618         }
619
620         if (hw->use_simple_rx) {
621                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
622                         virtio_rxq_rearm_vec(rxvq);
623                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
624                 }
625         } else if (hw->use_inorder_rx) {
626                 if ((!virtqueue_full(vq))) {
627                         uint16_t free_cnt = vq->vq_free_cnt;
628                         struct rte_mbuf *pkts[free_cnt];
629
630                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
631                                 free_cnt)) {
632                                 error = virtqueue_enqueue_refill_inorder(vq,
633                                                 pkts,
634                                                 free_cnt);
635                                 if (unlikely(error)) {
636                                         for (i = 0; i < free_cnt; i++)
637                                                 rte_pktmbuf_free(pkts[i]);
638                                 }
639                         }
640
641                         nbufs += free_cnt;
642                         vq_update_avail_idx(vq);
643                 }
644         } else {
645                 while (!virtqueue_full(vq)) {
646                         m = rte_mbuf_raw_alloc(rxvq->mpool);
647                         if (m == NULL)
648                                 break;
649
650                         /* Enqueue allocated buffers */
651                         error = virtqueue_enqueue_recv_refill(vq, m);
652                         if (error) {
653                                 rte_pktmbuf_free(m);
654                                 break;
655                         }
656                         nbufs++;
657                 }
658
659                 vq_update_avail_idx(vq);
660         }
661
662         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
663
664         VIRTQUEUE_DUMP(vq);
665
666         return 0;
667 }
668
669 /*
670  * struct rte_eth_dev *dev: Used to update dev
671  * uint16_t nb_desc: Defaults to values read from config space
672  * unsigned int socket_id: Used to allocate memzone
673  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
674  * uint16_t queue_idx: Just used as an index in dev txq list
675  */
676 int
677 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
678                         uint16_t queue_idx,
679                         uint16_t nb_desc,
680                         unsigned int socket_id __rte_unused,
681                         const struct rte_eth_txconf *tx_conf)
682 {
683         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
684         struct virtio_hw *hw = dev->data->dev_private;
685         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
686         struct virtnet_tx *txvq;
687         uint16_t tx_free_thresh;
688
689         PMD_INIT_FUNC_TRACE();
690
691         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
692                 nb_desc = vq->vq_nentries;
693         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
694
695         txvq = &vq->txq;
696         txvq->queue_id = queue_idx;
697
698         tx_free_thresh = tx_conf->tx_free_thresh;
699         if (tx_free_thresh == 0)
700                 tx_free_thresh =
701                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
702
703         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
704                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
705                         "number of TX entries minus 3 (%u)."
706                         " (tx_free_thresh=%u port=%u queue=%u)\n",
707                         vq->vq_nentries - 3,
708                         tx_free_thresh, dev->data->port_id, queue_idx);
709                 return -EINVAL;
710         }
711
712         vq->vq_free_thresh = tx_free_thresh;
713
714         dev->data->tx_queues[queue_idx] = txvq;
715         return 0;
716 }
717
718 int
719 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
720                                 uint16_t queue_idx)
721 {
722         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
723         struct virtio_hw *hw = dev->data->dev_private;
724         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
725
726         PMD_INIT_FUNC_TRACE();
727
728         if (hw->use_inorder_tx)
729                 vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
730
731         VIRTQUEUE_DUMP(vq);
732
733         return 0;
734 }
735
736 static void
737 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
738 {
739         int error;
740         /*
741          * Requeue the discarded mbuf. This should always be
742          * successful since it was just dequeued.
743          */
744         error = virtqueue_enqueue_recv_refill(vq, m);
745
746         if (unlikely(error)) {
747                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
748                 rte_pktmbuf_free(m);
749         }
750 }
751
752 static void
753 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
754 {
755         int error;
756
757         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
758         if (unlikely(error)) {
759                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
760                 rte_pktmbuf_free(m);
761         }
762 }
763
764 static void
765 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
766 {
767         uint32_t s = mbuf->pkt_len;
768         struct ether_addr *ea;
769
770         if (s == 64) {
771                 stats->size_bins[1]++;
772         } else if (s > 64 && s < 1024) {
773                 uint32_t bin;
774
775                 /* count zeros, and offset into correct bin */
776                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
777                 stats->size_bins[bin]++;
778         } else {
779                 if (s < 64)
780                         stats->size_bins[0]++;
781                 else if (s < 1519)
782                         stats->size_bins[6]++;
783                 else
784                         stats->size_bins[7]++;
785         }
786
787         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
788         if (is_multicast_ether_addr(ea)) {
789                 if (is_broadcast_ether_addr(ea))
790                         stats->broadcast++;
791                 else
792                         stats->multicast++;
793         }
794 }
795
796 static inline void
797 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
798 {
799         VIRTIO_DUMP_PACKET(m, m->data_len);
800
801         rxvq->stats.bytes += m->pkt_len;
802         virtio_update_packet_stats(&rxvq->stats, m);
803 }
804
805 /* Optionally fill offload information in structure */
806 static int
807 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
808 {
809         struct rte_net_hdr_lens hdr_lens;
810         uint32_t hdrlen, ptype;
811         int l4_supported = 0;
812
813         /* nothing to do */
814         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
815                 return 0;
816
817         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
818
819         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
820         m->packet_type = ptype;
821         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
822             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
823             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
824                 l4_supported = 1;
825
826         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
827                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
828                 if (hdr->csum_start <= hdrlen && l4_supported) {
829                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
830                 } else {
831                         /* Unknown proto or tunnel, do sw cksum. We can assume
832                          * the cksum field is in the first segment since the
833                          * buffers we provided to the host are large enough.
834                          * In case of SCTP, this will be wrong since it's a CRC
835                          * but there's nothing we can do.
836                          */
837                         uint16_t csum = 0, off;
838
839                         rte_raw_cksum_mbuf(m, hdr->csum_start,
840                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
841                                 &csum);
842                         if (likely(csum != 0xffff))
843                                 csum = ~csum;
844                         off = hdr->csum_offset + hdr->csum_start;
845                         if (rte_pktmbuf_data_len(m) >= off + 1)
846                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
847                                         off) = csum;
848                 }
849         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
850                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
851         }
852
853         /* GSO request, save required information in mbuf */
854         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
855                 /* Check unsupported modes */
856                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
857                     (hdr->gso_size == 0)) {
858                         return -EINVAL;
859                 }
860
861                 /* Update mss lengthes in mbuf */
862                 m->tso_segsz = hdr->gso_size;
863                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
864                         case VIRTIO_NET_HDR_GSO_TCPV4:
865                         case VIRTIO_NET_HDR_GSO_TCPV6:
866                                 m->ol_flags |= PKT_RX_LRO | \
867                                         PKT_RX_L4_CKSUM_NONE;
868                                 break;
869                         default:
870                                 return -EINVAL;
871                 }
872         }
873
874         return 0;
875 }
876
877 #define VIRTIO_MBUF_BURST_SZ 64
878 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
879 uint16_t
880 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
881 {
882         struct virtnet_rx *rxvq = rx_queue;
883         struct virtqueue *vq = rxvq->vq;
884         struct virtio_hw *hw = vq->hw;
885         struct rte_mbuf *rxm, *new_mbuf;
886         uint16_t nb_used, num, nb_rx;
887         uint32_t len[VIRTIO_MBUF_BURST_SZ];
888         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
889         int error;
890         uint32_t i, nb_enqueued;
891         uint32_t hdr_size;
892         struct virtio_net_hdr *hdr;
893
894         nb_rx = 0;
895         if (unlikely(hw->started == 0))
896                 return nb_rx;
897
898         nb_used = VIRTQUEUE_NUSED(vq);
899
900         virtio_rmb();
901
902         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
903         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
904                 num = VIRTIO_MBUF_BURST_SZ;
905         if (likely(num > DESC_PER_CACHELINE))
906                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
907
908         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
909         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
910
911         nb_enqueued = 0;
912         hdr_size = hw->vtnet_hdr_size;
913
914         for (i = 0; i < num ; i++) {
915                 rxm = rcv_pkts[i];
916
917                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
918
919                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
920                         PMD_RX_LOG(ERR, "Packet drop");
921                         nb_enqueued++;
922                         virtio_discard_rxbuf(vq, rxm);
923                         rxvq->stats.errors++;
924                         continue;
925                 }
926
927                 rxm->port = rxvq->port_id;
928                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
929                 rxm->ol_flags = 0;
930                 rxm->vlan_tci = 0;
931
932                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
933                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
934
935                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
936                         RTE_PKTMBUF_HEADROOM - hdr_size);
937
938                 if (hw->vlan_strip)
939                         rte_vlan_strip(rxm);
940
941                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
942                         virtio_discard_rxbuf(vq, rxm);
943                         rxvq->stats.errors++;
944                         continue;
945                 }
946
947                 virtio_rx_stats_updated(rxvq, rxm);
948
949                 rx_pkts[nb_rx++] = rxm;
950         }
951
952         rxvq->stats.packets += nb_rx;
953
954         /* Allocate new mbuf for the used descriptor */
955         while (likely(!virtqueue_full(vq))) {
956                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
957                 if (unlikely(new_mbuf == NULL)) {
958                         struct rte_eth_dev *dev
959                                 = &rte_eth_devices[rxvq->port_id];
960                         dev->data->rx_mbuf_alloc_failed++;
961                         break;
962                 }
963                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
964                 if (unlikely(error)) {
965                         rte_pktmbuf_free(new_mbuf);
966                         break;
967                 }
968                 nb_enqueued++;
969         }
970
971         if (likely(nb_enqueued)) {
972                 vq_update_avail_idx(vq);
973
974                 if (unlikely(virtqueue_kick_prepare(vq))) {
975                         virtqueue_notify(vq);
976                         PMD_RX_LOG(DEBUG, "Notified");
977                 }
978         }
979
980         return nb_rx;
981 }
982
983 uint16_t
984 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
985                         struct rte_mbuf **rx_pkts,
986                         uint16_t nb_pkts)
987 {
988         struct virtnet_rx *rxvq = rx_queue;
989         struct virtqueue *vq = rxvq->vq;
990         struct virtio_hw *hw = vq->hw;
991         struct rte_mbuf *rxm;
992         struct rte_mbuf *prev;
993         uint16_t nb_used, num, nb_rx;
994         uint32_t len[VIRTIO_MBUF_BURST_SZ];
995         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
996         int error;
997         uint32_t nb_enqueued;
998         uint32_t seg_num;
999         uint32_t seg_res;
1000         uint32_t hdr_size;
1001         int32_t i;
1002
1003         nb_rx = 0;
1004         if (unlikely(hw->started == 0))
1005                 return nb_rx;
1006
1007         nb_used = VIRTQUEUE_NUSED(vq);
1008         nb_used = RTE_MIN(nb_used, nb_pkts);
1009         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1010
1011         virtio_rmb();
1012
1013         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1014
1015         nb_enqueued = 0;
1016         seg_num = 1;
1017         seg_res = 0;
1018         hdr_size = hw->vtnet_hdr_size;
1019
1020         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1021
1022         for (i = 0; i < num; i++) {
1023                 struct virtio_net_hdr_mrg_rxbuf *header;
1024
1025                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1026                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1027
1028                 rxm = rcv_pkts[i];
1029
1030                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1031                         PMD_RX_LOG(ERR, "Packet drop");
1032                         nb_enqueued++;
1033                         virtio_discard_rxbuf_inorder(vq, rxm);
1034                         rxvq->stats.errors++;
1035                         continue;
1036                 }
1037
1038                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1039                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1040                          - hdr_size);
1041                 seg_num = header->num_buffers;
1042
1043                 if (seg_num == 0)
1044                         seg_num = 1;
1045
1046                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1047                 rxm->nb_segs = seg_num;
1048                 rxm->ol_flags = 0;
1049                 rxm->vlan_tci = 0;
1050                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1051                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1052
1053                 rxm->port = rxvq->port_id;
1054
1055                 rx_pkts[nb_rx] = rxm;
1056                 prev = rxm;
1057
1058                 if (vq->hw->has_rx_offload &&
1059                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1060                         virtio_discard_rxbuf_inorder(vq, rxm);
1061                         rxvq->stats.errors++;
1062                         continue;
1063                 }
1064
1065                 if (hw->vlan_strip)
1066                         rte_vlan_strip(rx_pkts[nb_rx]);
1067
1068                 seg_res = seg_num - 1;
1069
1070                 /* Merge remaining segments */
1071                 while (seg_res != 0 && i < (num - 1)) {
1072                         i++;
1073
1074                         rxm = rcv_pkts[i];
1075                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1076                         rxm->pkt_len = (uint32_t)(len[i]);
1077                         rxm->data_len = (uint16_t)(len[i]);
1078
1079                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1080                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1081
1082                         if (prev)
1083                                 prev->next = rxm;
1084
1085                         prev = rxm;
1086                         seg_res -= 1;
1087                 }
1088
1089                 if (!seg_res) {
1090                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1091                         nb_rx++;
1092                 }
1093         }
1094
1095         /* Last packet still need merge segments */
1096         while (seg_res != 0) {
1097                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1098                                         VIRTIO_MBUF_BURST_SZ);
1099
1100                 prev = rcv_pkts[nb_rx];
1101                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1102                         virtio_rmb();
1103                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1104                                                            rcv_cnt);
1105                         uint16_t extra_idx = 0;
1106
1107                         rcv_cnt = num;
1108                         while (extra_idx < rcv_cnt) {
1109                                 rxm = rcv_pkts[extra_idx];
1110                                 rxm->data_off =
1111                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1112                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1113                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1114                                 prev->next = rxm;
1115                                 prev = rxm;
1116                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1117                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1118                                 extra_idx += 1;
1119                         };
1120                         seg_res -= rcv_cnt;
1121
1122                         if (!seg_res) {
1123                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1124                                 nb_rx++;
1125                         }
1126                 } else {
1127                         PMD_RX_LOG(ERR,
1128                                         "No enough segments for packet.");
1129                         virtio_discard_rxbuf_inorder(vq, prev);
1130                         rxvq->stats.errors++;
1131                         break;
1132                 }
1133         }
1134
1135         rxvq->stats.packets += nb_rx;
1136
1137         /* Allocate new mbuf for the used descriptor */
1138
1139         if (likely(!virtqueue_full(vq))) {
1140                 /* free_cnt may include mrg descs */
1141                 uint16_t free_cnt = vq->vq_free_cnt;
1142                 struct rte_mbuf *new_pkts[free_cnt];
1143
1144                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1145                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1146                                         free_cnt);
1147                         if (unlikely(error)) {
1148                                 for (i = 0; i < free_cnt; i++)
1149                                         rte_pktmbuf_free(new_pkts[i]);
1150                         }
1151                         nb_enqueued += free_cnt;
1152                 } else {
1153                         struct rte_eth_dev *dev =
1154                                 &rte_eth_devices[rxvq->port_id];
1155                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1156                 }
1157         }
1158
1159         if (likely(nb_enqueued)) {
1160                 vq_update_avail_idx(vq);
1161
1162                 if (unlikely(virtqueue_kick_prepare(vq))) {
1163                         virtqueue_notify(vq);
1164                         PMD_RX_LOG(DEBUG, "Notified");
1165                 }
1166         }
1167
1168         return nb_rx;
1169 }
1170
1171 uint16_t
1172 virtio_recv_mergeable_pkts(void *rx_queue,
1173                         struct rte_mbuf **rx_pkts,
1174                         uint16_t nb_pkts)
1175 {
1176         struct virtnet_rx *rxvq = rx_queue;
1177         struct virtqueue *vq = rxvq->vq;
1178         struct virtio_hw *hw = vq->hw;
1179         struct rte_mbuf *rxm, *new_mbuf;
1180         uint16_t nb_used, num, nb_rx;
1181         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1182         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1183         struct rte_mbuf *prev;
1184         int error;
1185         uint32_t i, nb_enqueued;
1186         uint32_t seg_num;
1187         uint16_t extra_idx;
1188         uint32_t seg_res;
1189         uint32_t hdr_size;
1190
1191         nb_rx = 0;
1192         if (unlikely(hw->started == 0))
1193                 return nb_rx;
1194
1195         nb_used = VIRTQUEUE_NUSED(vq);
1196
1197         virtio_rmb();
1198
1199         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1200
1201         i = 0;
1202         nb_enqueued = 0;
1203         seg_num = 0;
1204         extra_idx = 0;
1205         seg_res = 0;
1206         hdr_size = hw->vtnet_hdr_size;
1207
1208         while (i < nb_used) {
1209                 struct virtio_net_hdr_mrg_rxbuf *header;
1210
1211                 if (nb_rx == nb_pkts)
1212                         break;
1213
1214                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1215                 if (num != 1)
1216                         continue;
1217
1218                 i++;
1219
1220                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1221                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1222
1223                 rxm = rcv_pkts[0];
1224
1225                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1226                         PMD_RX_LOG(ERR, "Packet drop");
1227                         nb_enqueued++;
1228                         virtio_discard_rxbuf(vq, rxm);
1229                         rxvq->stats.errors++;
1230                         continue;
1231                 }
1232
1233                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1234                         RTE_PKTMBUF_HEADROOM - hdr_size);
1235                 seg_num = header->num_buffers;
1236
1237                 if (seg_num == 0)
1238                         seg_num = 1;
1239
1240                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1241                 rxm->nb_segs = seg_num;
1242                 rxm->ol_flags = 0;
1243                 rxm->vlan_tci = 0;
1244                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1245                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
1246
1247                 rxm->port = rxvq->port_id;
1248                 rx_pkts[nb_rx] = rxm;
1249                 prev = rxm;
1250
1251                 if (hw->has_rx_offload &&
1252                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1253                         virtio_discard_rxbuf(vq, rxm);
1254                         rxvq->stats.errors++;
1255                         continue;
1256                 }
1257
1258                 seg_res = seg_num - 1;
1259
1260                 while (seg_res != 0) {
1261                         /*
1262                          * Get extra segments for current uncompleted packet.
1263                          */
1264                         uint16_t  rcv_cnt =
1265                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1266                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1267                                 virtio_rmb();
1268                                 uint32_t rx_num =
1269                                         virtqueue_dequeue_burst_rx(vq,
1270                                         rcv_pkts, len, rcv_cnt);
1271                                 i += rx_num;
1272                                 rcv_cnt = rx_num;
1273                         } else {
1274                                 PMD_RX_LOG(ERR,
1275                                            "No enough segments for packet.");
1276                                 nb_enqueued++;
1277                                 virtio_discard_rxbuf(vq, rxm);
1278                                 rxvq->stats.errors++;
1279                                 break;
1280                         }
1281
1282                         extra_idx = 0;
1283
1284                         while (extra_idx < rcv_cnt) {
1285                                 rxm = rcv_pkts[extra_idx];
1286
1287                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1288                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1289                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1290
1291                                 if (prev)
1292                                         prev->next = rxm;
1293
1294                                 prev = rxm;
1295                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1296                                 extra_idx++;
1297                         };
1298                         seg_res -= rcv_cnt;
1299                 }
1300
1301                 if (hw->vlan_strip)
1302                         rte_vlan_strip(rx_pkts[nb_rx]);
1303
1304                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1305                         rx_pkts[nb_rx]->data_len);
1306
1307                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
1308                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1309                 nb_rx++;
1310         }
1311
1312         rxvq->stats.packets += nb_rx;
1313
1314         /* Allocate new mbuf for the used descriptor */
1315         while (likely(!virtqueue_full(vq))) {
1316                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1317                 if (unlikely(new_mbuf == NULL)) {
1318                         struct rte_eth_dev *dev
1319                                 = &rte_eth_devices[rxvq->port_id];
1320                         dev->data->rx_mbuf_alloc_failed++;
1321                         break;
1322                 }
1323                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1324                 if (unlikely(error)) {
1325                         rte_pktmbuf_free(new_mbuf);
1326                         break;
1327                 }
1328                 nb_enqueued++;
1329         }
1330
1331         if (likely(nb_enqueued)) {
1332                 vq_update_avail_idx(vq);
1333
1334                 if (unlikely(virtqueue_kick_prepare(vq))) {
1335                         virtqueue_notify(vq);
1336                         PMD_RX_LOG(DEBUG, "Notified");
1337                 }
1338         }
1339
1340         return nb_rx;
1341 }
1342
1343 uint16_t
1344 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1345 {
1346         struct virtnet_tx *txvq = tx_queue;
1347         struct virtqueue *vq = txvq->vq;
1348         struct virtio_hw *hw = vq->hw;
1349         uint16_t hdr_size = hw->vtnet_hdr_size;
1350         uint16_t nb_used, nb_tx = 0;
1351         int error;
1352
1353         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1354                 return nb_tx;
1355
1356         if (unlikely(nb_pkts < 1))
1357                 return nb_pkts;
1358
1359         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1360         nb_used = VIRTQUEUE_NUSED(vq);
1361
1362         virtio_rmb();
1363         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1364                 virtio_xmit_cleanup(vq, nb_used);
1365
1366         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1367                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1368                 int can_push = 0, use_indirect = 0, slots, need;
1369
1370                 /* Do VLAN tag insertion */
1371                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1372                         error = rte_vlan_insert(&txm);
1373                         if (unlikely(error)) {
1374                                 rte_pktmbuf_free(txm);
1375                                 continue;
1376                         }
1377                         /* vlan_insert may add a header mbuf */
1378                         tx_pkts[nb_tx] = txm;
1379                 }
1380
1381                 /* optimize ring usage */
1382                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1383                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1384                     rte_mbuf_refcnt_read(txm) == 1 &&
1385                     RTE_MBUF_DIRECT(txm) &&
1386                     txm->nb_segs == 1 &&
1387                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1388                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1389                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1390                         can_push = 1;
1391                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1392                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1393                         use_indirect = 1;
1394
1395                 /* How many main ring entries are needed to this Tx?
1396                  * any_layout => number of segments
1397                  * indirect   => 1
1398                  * default    => number of segments + 1
1399                  */
1400                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1401                 need = slots - vq->vq_free_cnt;
1402
1403                 /* Positive value indicates it need free vring descriptors */
1404                 if (unlikely(need > 0)) {
1405                         nb_used = VIRTQUEUE_NUSED(vq);
1406                         virtio_rmb();
1407                         need = RTE_MIN(need, (int)nb_used);
1408
1409                         virtio_xmit_cleanup(vq, need);
1410                         need = slots - vq->vq_free_cnt;
1411                         if (unlikely(need > 0)) {
1412                                 PMD_TX_LOG(ERR,
1413                                            "No free tx descriptors to transmit");
1414                                 break;
1415                         }
1416                 }
1417
1418                 /* Enqueue Packet buffers */
1419                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1420                         can_push, 0);
1421
1422                 txvq->stats.bytes += txm->pkt_len;
1423                 virtio_update_packet_stats(&txvq->stats, txm);
1424         }
1425
1426         txvq->stats.packets += nb_tx;
1427
1428         if (likely(nb_tx)) {
1429                 vq_update_avail_idx(vq);
1430
1431                 if (unlikely(virtqueue_kick_prepare(vq))) {
1432                         virtqueue_notify(vq);
1433                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1434                 }
1435         }
1436
1437         return nb_tx;
1438 }
1439
1440 uint16_t
1441 virtio_xmit_pkts_inorder(void *tx_queue,
1442                         struct rte_mbuf **tx_pkts,
1443                         uint16_t nb_pkts)
1444 {
1445         struct virtnet_tx *txvq = tx_queue;
1446         struct virtqueue *vq = txvq->vq;
1447         struct virtio_hw *hw = vq->hw;
1448         uint16_t hdr_size = hw->vtnet_hdr_size;
1449         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1450         struct rte_mbuf *inorder_pkts[nb_pkts];
1451         int error;
1452
1453         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1454                 return nb_tx;
1455
1456         if (unlikely(nb_pkts < 1))
1457                 return nb_pkts;
1458
1459         VIRTQUEUE_DUMP(vq);
1460         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1461         nb_used = VIRTQUEUE_NUSED(vq);
1462
1463         virtio_rmb();
1464         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1465                 virtio_xmit_cleanup_inorder(vq, nb_used);
1466
1467         if (unlikely(!vq->vq_free_cnt))
1468                 virtio_xmit_cleanup_inorder(vq, nb_used);
1469
1470         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1471
1472         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1473                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1474                 int slots, need;
1475
1476                 /* Do VLAN tag insertion */
1477                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1478                         error = rte_vlan_insert(&txm);
1479                         if (unlikely(error)) {
1480                                 rte_pktmbuf_free(txm);
1481                                 continue;
1482                         }
1483                         /* vlan_insert may add a header mbuf */
1484                         tx_pkts[nb_tx] = txm;
1485                 }
1486
1487                 /* optimize ring usage */
1488                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1489                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1490                      rte_mbuf_refcnt_read(txm) == 1 &&
1491                      RTE_MBUF_DIRECT(txm) &&
1492                      txm->nb_segs == 1 &&
1493                      rte_pktmbuf_headroom(txm) >= hdr_size &&
1494                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1495                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1496                         inorder_pkts[nb_inorder_pkts] = txm;
1497                         nb_inorder_pkts++;
1498
1499                         txvq->stats.bytes += txm->pkt_len;
1500                         virtio_update_packet_stats(&txvq->stats, txm);
1501                         continue;
1502                 }
1503
1504                 if (nb_inorder_pkts) {
1505                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1506                                                         nb_inorder_pkts);
1507                         nb_inorder_pkts = 0;
1508                 }
1509
1510                 slots = txm->nb_segs + 1;
1511                 need = slots - vq->vq_free_cnt;
1512                 if (unlikely(need > 0)) {
1513                         nb_used = VIRTQUEUE_NUSED(vq);
1514                         virtio_rmb();
1515                         need = RTE_MIN(need, (int)nb_used);
1516
1517                         virtio_xmit_cleanup_inorder(vq, need);
1518
1519                         need = slots - vq->vq_free_cnt;
1520
1521                         if (unlikely(need > 0)) {
1522                                 PMD_TX_LOG(ERR,
1523                                         "No free tx descriptors to transmit");
1524                                 break;
1525                         }
1526                 }
1527                 /* Enqueue Packet buffers */
1528                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1529
1530                 txvq->stats.bytes += txm->pkt_len;
1531                 virtio_update_packet_stats(&txvq->stats, txm);
1532         }
1533
1534         /* Transmit all inorder packets */
1535         if (nb_inorder_pkts)
1536                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1537                                                 nb_inorder_pkts);
1538
1539         txvq->stats.packets += nb_tx;
1540
1541         if (likely(nb_tx)) {
1542                 vq_update_avail_idx(vq);
1543
1544                 if (unlikely(virtqueue_kick_prepare(vq))) {
1545                         virtqueue_notify(vq);
1546                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1547                 }
1548         }
1549
1550         VIRTQUEUE_DUMP(vq);
1551
1552         return nb_tx;
1553 }