New upstream version 17.11-rc3
[deb_dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_bus_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
51 #include <rte_spinlock.h>
52 #include <rte_byteorder.h>
53 #include <rte_dev.h>
54 #include <rte_memory.h>
55 #include <rte_eal.h>
56 #include <rte_io.h>
57
58 #include "rte_avp_common.h"
59 #include "rte_avp_fifo.h"
60
61 #include "avp_logs.h"
62
63
64 static int avp_dev_create(struct rte_pci_device *pci_dev,
65                           struct rte_eth_dev *eth_dev);
66
67 static int avp_dev_configure(struct rte_eth_dev *dev);
68 static int avp_dev_start(struct rte_eth_dev *dev);
69 static void avp_dev_stop(struct rte_eth_dev *dev);
70 static void avp_dev_close(struct rte_eth_dev *dev);
71 static void avp_dev_info_get(struct rte_eth_dev *dev,
72                              struct rte_eth_dev_info *dev_info);
73 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
74 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79                                   uint16_t rx_queue_id,
80                                   uint16_t nb_rx_desc,
81                                   unsigned int socket_id,
82                                   const struct rte_eth_rxconf *rx_conf,
83                                   struct rte_mempool *pool);
84
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86                                   uint16_t tx_queue_id,
87                                   uint16_t nb_tx_desc,
88                                   unsigned int socket_id,
89                                   const struct rte_eth_txconf *tx_conf);
90
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92                                         struct rte_mbuf **rx_pkts,
93                                         uint16_t nb_pkts);
94
95 static uint16_t avp_recv_pkts(void *rx_queue,
96                               struct rte_mbuf **rx_pkts,
97                               uint16_t nb_pkts);
98
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100                                         struct rte_mbuf **tx_pkts,
101                                         uint16_t nb_pkts);
102
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104                               struct rte_mbuf **tx_pkts,
105                               uint16_t nb_pkts);
106
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109
110 static int avp_dev_stats_get(struct rte_eth_dev *dev,
111                               struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113
114
115 #define AVP_MAX_RX_BURST 64
116 #define AVP_MAX_TX_BURST 64
117 #define AVP_MAX_MAC_ADDRS 1
118 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
119
120
121 /*
122  * Defines the number of microseconds to wait before checking the response
123  * queue for completion.
124  */
125 #define AVP_REQUEST_DELAY_USECS (5000)
126
127 /*
128  * Defines the number times to check the response queue for completion before
129  * declaring a timeout.
130  */
131 #define AVP_MAX_REQUEST_RETRY (100)
132
133 /* Defines the current PCI driver version number */
134 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
135
136 /*
137  * The set of PCI devices this driver supports
138  */
139 static const struct rte_pci_id pci_id_avp_map[] = {
140         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
141           .device_id = RTE_AVP_PCI_DEVICE_ID,
142           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
143           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
144           .class_id = RTE_CLASS_ANY_ID,
145         },
146
147         { .vendor_id = 0, /* sentinel */
148         },
149 };
150
151 /*
152  * dev_ops for avp, bare necessities for basic operation
153  */
154 static const struct eth_dev_ops avp_eth_dev_ops = {
155         .dev_configure       = avp_dev_configure,
156         .dev_start           = avp_dev_start,
157         .dev_stop            = avp_dev_stop,
158         .dev_close           = avp_dev_close,
159         .dev_infos_get       = avp_dev_info_get,
160         .vlan_offload_set    = avp_vlan_offload_set,
161         .stats_get           = avp_dev_stats_get,
162         .stats_reset         = avp_dev_stats_reset,
163         .link_update         = avp_dev_link_update,
164         .promiscuous_enable  = avp_dev_promiscuous_enable,
165         .promiscuous_disable = avp_dev_promiscuous_disable,
166         .rx_queue_setup      = avp_dev_rx_queue_setup,
167         .rx_queue_release    = avp_dev_rx_queue_release,
168         .tx_queue_setup      = avp_dev_tx_queue_setup,
169         .tx_queue_release    = avp_dev_tx_queue_release,
170 };
171
172 /**@{ AVP device flags */
173 #define AVP_F_PROMISC (1 << 1)
174 #define AVP_F_CONFIGURED (1 << 2)
175 #define AVP_F_LINKUP (1 << 3)
176 #define AVP_F_DETACHED (1 << 4)
177 /**@} */
178
179 /* Ethernet device validation marker */
180 #define AVP_ETHDEV_MAGIC 0x92972862
181
182 /*
183  * Defines the AVP device attributes which are attached to an RTE ethernet
184  * device
185  */
186 struct avp_dev {
187         uint32_t magic; /**< Memory validation marker */
188         uint64_t device_id; /**< Unique system identifier */
189         struct ether_addr ethaddr; /**< Host specified MAC address */
190         struct rte_eth_dev_data *dev_data;
191         /**< Back pointer to ethernet device data */
192         volatile uint32_t flags; /**< Device operational flags */
193         uint16_t port_id; /**< Ethernet port identifier */
194         struct rte_mempool *pool; /**< pkt mbuf mempool */
195         unsigned int guest_mbuf_size; /**< local pool mbuf size */
196         unsigned int host_mbuf_size; /**< host mbuf size */
197         unsigned int max_rx_pkt_len; /**< maximum receive unit */
198         uint32_t host_features; /**< Supported feature bitmap */
199         uint32_t features; /**< Enabled feature bitmap */
200         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
201         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
202         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
203         unsigned int max_rx_queues; /**< Maximum number of receive queues */
204
205         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
206         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
207         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
208         /**< Allocated mbufs queue */
209         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
210         /**< To be freed mbufs queue */
211
212         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
213         rte_spinlock_t lock;
214
215         /* For request & response */
216         struct rte_avp_fifo *req_q; /**< Request queue */
217         struct rte_avp_fifo *resp_q; /**< Response queue */
218         void *host_sync_addr; /**< (host) Req/Resp Mem address */
219         void *sync_addr; /**< Req/Resp Mem address */
220         void *host_mbuf_addr; /**< (host) MBUF pool start address */
221         void *mbuf_addr; /**< MBUF pool start address */
222 } __rte_cache_aligned;
223
224 /* RTE ethernet private data */
225 struct avp_adapter {
226         struct avp_dev avp;
227 } __rte_cache_aligned;
228
229
230 /* 32-bit MMIO register write */
231 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
232
233 /* 32-bit MMIO register read */
234 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
235
236 /* Macro to cast the ethernet device private data to a AVP object */
237 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
238         (&((struct avp_adapter *)adapter)->avp)
239
240 /*
241  * Defines the structure of a AVP device queue for the purpose of handling the
242  * receive and transmit burst callback functions
243  */
244 struct avp_queue {
245         struct rte_eth_dev_data *dev_data;
246         /**< Backpointer to ethernet device data */
247         struct avp_dev *avp; /**< Backpointer to AVP device */
248         uint16_t queue_id;
249         /**< Queue identifier used for indexing current queue */
250         uint16_t queue_base;
251         /**< Base queue identifier for queue servicing */
252         uint16_t queue_limit;
253         /**< Maximum queue identifier for queue servicing */
254
255         uint64_t packets;
256         uint64_t bytes;
257         uint64_t errors;
258 };
259
260 /* send a request and wait for a response
261  *
262  * @warning must be called while holding the avp->lock spinlock.
263  */
264 static int
265 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
266 {
267         unsigned int retry = AVP_MAX_REQUEST_RETRY;
268         void *resp_addr = NULL;
269         unsigned int count;
270         int ret;
271
272         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
273
274         request->result = -ENOTSUP;
275
276         /* Discard any stale responses before starting a new request */
277         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
278                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
279
280         rte_memcpy(avp->sync_addr, request, sizeof(*request));
281         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
282         if (count < 1) {
283                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
284                             request->req_id);
285                 ret = -EBUSY;
286                 goto done;
287         }
288
289         while (retry--) {
290                 /* wait for a response */
291                 usleep(AVP_REQUEST_DELAY_USECS);
292
293                 count = avp_fifo_count(avp->resp_q);
294                 if (count >= 1) {
295                         /* response received */
296                         break;
297                 }
298
299                 if ((count < 1) && (retry == 0)) {
300                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
301                                     request->req_id);
302                         ret = -ETIME;
303                         goto done;
304                 }
305         }
306
307         /* retrieve the response */
308         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
309         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
310                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
311                             count, resp_addr, avp->host_sync_addr);
312                 ret = -ENODATA;
313                 goto done;
314         }
315
316         /* copy to user buffer */
317         rte_memcpy(request, avp->sync_addr, sizeof(*request));
318         ret = 0;
319
320         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
321                     request->result, request->req_id);
322
323 done:
324         return ret;
325 }
326
327 static int
328 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
329 {
330         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
331         struct rte_avp_request request;
332         int ret;
333
334         /* setup a link state change request */
335         memset(&request, 0, sizeof(request));
336         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
337         request.if_up = state;
338
339         ret = avp_dev_process_request(avp, &request);
340
341         return ret == 0 ? request.result : ret;
342 }
343
344 static int
345 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
346                         struct rte_avp_device_config *config)
347 {
348         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
349         struct rte_avp_request request;
350         int ret;
351
352         /* setup a configure request */
353         memset(&request, 0, sizeof(request));
354         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
355         memcpy(&request.config, config, sizeof(request.config));
356
357         ret = avp_dev_process_request(avp, &request);
358
359         return ret == 0 ? request.result : ret;
360 }
361
362 static int
363 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
364 {
365         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
366         struct rte_avp_request request;
367         int ret;
368
369         /* setup a shutdown request */
370         memset(&request, 0, sizeof(request));
371         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
372
373         ret = avp_dev_process_request(avp, &request);
374
375         return ret == 0 ? request.result : ret;
376 }
377
378 /* translate from host mbuf virtual address to guest virtual address */
379 static inline void *
380 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
381 {
382         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
383                                        (uintptr_t)avp->host_mbuf_addr),
384                            (uintptr_t)avp->mbuf_addr);
385 }
386
387 /* translate from host physical address to guest virtual address */
388 static void *
389 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
390                           rte_iova_t host_phys_addr)
391 {
392         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
393         struct rte_mem_resource *resource;
394         struct rte_avp_memmap_info *info;
395         struct rte_avp_memmap *map;
396         off_t offset;
397         void *addr;
398         unsigned int i;
399
400         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
401         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
402         info = (struct rte_avp_memmap_info *)resource->addr;
403
404         offset = 0;
405         for (i = 0; i < info->nb_maps; i++) {
406                 /* search all segments looking for a matching address */
407                 map = &info->maps[i];
408
409                 if ((host_phys_addr >= map->phys_addr) &&
410                         (host_phys_addr < (map->phys_addr + map->length))) {
411                         /* address is within this segment */
412                         offset += (host_phys_addr - map->phys_addr);
413                         addr = RTE_PTR_ADD(addr, offset);
414
415                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
416                                     host_phys_addr, addr);
417
418                         return addr;
419                 }
420                 offset += map->length;
421         }
422
423         return NULL;
424 }
425
426 /* verify that the incoming device version is compatible with our version */
427 static int
428 avp_dev_version_check(uint32_t version)
429 {
430         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
431         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
432
433         if (device <= driver) {
434                 /* the host driver version is less than or equal to ours */
435                 return 0;
436         }
437
438         return 1;
439 }
440
441 /* verify that memory regions have expected version and validation markers */
442 static int
443 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
444 {
445         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
446         struct rte_avp_memmap_info *memmap;
447         struct rte_avp_device_info *info;
448         struct rte_mem_resource *resource;
449         unsigned int i;
450
451         /* Dump resource info for debug */
452         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
453                 resource = &pci_dev->mem_resource[i];
454                 if ((resource->phys_addr == 0) || (resource->len == 0))
455                         continue;
456
457                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
458                             i, resource->phys_addr,
459                             resource->len, resource->addr);
460
461                 switch (i) {
462                 case RTE_AVP_PCI_MEMMAP_BAR:
463                         memmap = (struct rte_avp_memmap_info *)resource->addr;
464                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
465                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
466                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
467                                             memmap->magic, memmap->version);
468                                 return -EINVAL;
469                         }
470                         break;
471
472                 case RTE_AVP_PCI_DEVICE_BAR:
473                         info = (struct rte_avp_device_info *)resource->addr;
474                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
475                             avp_dev_version_check(info->version)) {
476                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
477                                             info->magic, info->version,
478                                             AVP_DPDK_DRIVER_VERSION);
479                                 return -EINVAL;
480                         }
481                         break;
482
483                 case RTE_AVP_PCI_MEMORY_BAR:
484                 case RTE_AVP_PCI_MMIO_BAR:
485                         if (resource->addr == NULL) {
486                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
487                                             i);
488                                 return -EINVAL;
489                         }
490                         break;
491
492                 case RTE_AVP_PCI_MSIX_BAR:
493                 default:
494                         /* no validation required */
495                         break;
496                 }
497         }
498
499         return 0;
500 }
501
502 static int
503 avp_dev_detach(struct rte_eth_dev *eth_dev)
504 {
505         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
506         int ret;
507
508         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
509                     eth_dev->data->port_id, avp->device_id);
510
511         rte_spinlock_lock(&avp->lock);
512
513         if (avp->flags & AVP_F_DETACHED) {
514                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
515                             eth_dev->data->port_id);
516                 ret = 0;
517                 goto unlock;
518         }
519
520         /* shutdown the device first so the host stops sending us packets. */
521         ret = avp_dev_ctrl_shutdown(eth_dev);
522         if (ret < 0) {
523                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
524                             ret);
525                 avp->flags &= ~AVP_F_DETACHED;
526                 goto unlock;
527         }
528
529         avp->flags |= AVP_F_DETACHED;
530         rte_wmb();
531
532         /* wait for queues to acknowledge the presence of the detach flag */
533         rte_delay_ms(1);
534
535         ret = 0;
536
537 unlock:
538         rte_spinlock_unlock(&avp->lock);
539         return ret;
540 }
541
542 static void
543 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
544 {
545         struct avp_dev *avp =
546                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
547         struct avp_queue *rxq;
548         uint16_t queue_count;
549         uint16_t remainder;
550
551         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
552
553         /*
554          * Must map all AVP fifos as evenly as possible between the configured
555          * device queues.  Each device queue will service a subset of the AVP
556          * fifos. If there is an odd number of device queues the first set of
557          * device queues will get the extra AVP fifos.
558          */
559         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
560         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
561         if (rx_queue_id < remainder) {
562                 /* these queues must service one extra FIFO */
563                 rxq->queue_base = rx_queue_id * (queue_count + 1);
564                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
565         } else {
566                 /* these queues service the regular number of FIFO */
567                 rxq->queue_base = ((remainder * (queue_count + 1)) +
568                                    ((rx_queue_id - remainder) * queue_count));
569                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
570         }
571
572         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
573                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
574
575         rxq->queue_id = rxq->queue_base;
576 }
577
578 static void
579 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
580 {
581         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
582         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
583         struct rte_avp_device_info *host_info;
584         void *addr;
585
586         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
587         host_info = (struct rte_avp_device_info *)addr;
588
589         /*
590          * the transmit direction is not negotiated beyond respecting the max
591          * number of queues because the host can handle arbitrary guest tx
592          * queues (host rx queues).
593          */
594         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
595
596         /*
597          * the receive direction is more restrictive.  The host requires a
598          * minimum number of guest rx queues (host tx queues) therefore
599          * negotiate a value that is at least as large as the host minimum
600          * requirement.  If the host and guest values are not identical then a
601          * mapping will be established in the receive_queue_setup function.
602          */
603         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
604                                      eth_dev->data->nb_rx_queues);
605
606         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
607                     avp->num_tx_queues, avp->num_rx_queues);
608 }
609
610 static int
611 avp_dev_attach(struct rte_eth_dev *eth_dev)
612 {
613         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
614         struct rte_avp_device_config config;
615         unsigned int i;
616         int ret;
617
618         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
619                     eth_dev->data->port_id, avp->device_id);
620
621         rte_spinlock_lock(&avp->lock);
622
623         if (!(avp->flags & AVP_F_DETACHED)) {
624                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
625                             eth_dev->data->port_id);
626                 ret = 0;
627                 goto unlock;
628         }
629
630         /*
631          * make sure that the detached flag is set prior to reconfiguring the
632          * queues.
633          */
634         avp->flags |= AVP_F_DETACHED;
635         rte_wmb();
636
637         /*
638          * re-run the device create utility which will parse the new host info
639          * and setup the AVP device queue pointers.
640          */
641         ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
642         if (ret < 0) {
643                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
644                             ret);
645                 goto unlock;
646         }
647
648         if (avp->flags & AVP_F_CONFIGURED) {
649                 /*
650                  * Update the receive queue mapping to handle cases where the
651                  * source and destination hosts have different queue
652                  * requirements.  As long as the DETACHED flag is asserted the
653                  * queue table should not be referenced so it should be safe to
654                  * update it.
655                  */
656                 _avp_set_queue_counts(eth_dev);
657                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
658                         _avp_set_rx_queue_mappings(eth_dev, i);
659
660                 /*
661                  * Update the host with our config details so that it knows the
662                  * device is active.
663                  */
664                 memset(&config, 0, sizeof(config));
665                 config.device_id = avp->device_id;
666                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
667                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
668                 config.features = avp->features;
669                 config.num_tx_queues = avp->num_tx_queues;
670                 config.num_rx_queues = avp->num_rx_queues;
671                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
672
673                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
674                 if (ret < 0) {
675                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
676                                     ret);
677                         goto unlock;
678                 }
679         }
680
681         rte_wmb();
682         avp->flags &= ~AVP_F_DETACHED;
683
684         ret = 0;
685
686 unlock:
687         rte_spinlock_unlock(&avp->lock);
688         return ret;
689 }
690
691 static void
692 avp_dev_interrupt_handler(void *data)
693 {
694         struct rte_eth_dev *eth_dev = data;
695         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
696         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
697         uint32_t status, value;
698         int ret;
699
700         if (registers == NULL)
701                 rte_panic("no mapped MMIO register space\n");
702
703         /* read the interrupt status register
704          * note: this register clears on read so all raised interrupts must be
705          *    handled or remembered for later processing
706          */
707         status = AVP_READ32(
708                 RTE_PTR_ADD(registers,
709                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
710
711         if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
712                 /* handle interrupt based on current status */
713                 value = AVP_READ32(
714                         RTE_PTR_ADD(registers,
715                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
716                 switch (value) {
717                 case RTE_AVP_MIGRATION_DETACHED:
718                         ret = avp_dev_detach(eth_dev);
719                         break;
720                 case RTE_AVP_MIGRATION_ATTACHED:
721                         ret = avp_dev_attach(eth_dev);
722                         break;
723                 default:
724                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
725                                     value);
726                         ret = -EINVAL;
727                 }
728
729                 /* acknowledge the request by writing out our current status */
730                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
731                 AVP_WRITE32(value,
732                             RTE_PTR_ADD(registers,
733                                         RTE_AVP_MIGRATION_ACK_OFFSET));
734
735                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
736         }
737
738         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
739                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
740                             status);
741
742         /* re-enable UIO interrupt handling */
743         ret = rte_intr_enable(&pci_dev->intr_handle);
744         if (ret < 0) {
745                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
746                             ret);
747                 /* continue */
748         }
749 }
750
751 static int
752 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
753 {
754         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
755         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
756         int ret;
757
758         if (registers == NULL)
759                 return -EINVAL;
760
761         /* enable UIO interrupt handling */
762         ret = rte_intr_enable(&pci_dev->intr_handle);
763         if (ret < 0) {
764                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
765                             ret);
766                 return ret;
767         }
768
769         /* inform the device that all interrupts are enabled */
770         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
771                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
772
773         return 0;
774 }
775
776 static int
777 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
778 {
779         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
780         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
781         int ret;
782
783         if (registers == NULL)
784                 return 0;
785
786         /* inform the device that all interrupts are disabled */
787         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
788                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
789
790         /* enable UIO interrupt handling */
791         ret = rte_intr_disable(&pci_dev->intr_handle);
792         if (ret < 0) {
793                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
794                             ret);
795                 return ret;
796         }
797
798         return 0;
799 }
800
801 static int
802 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
803 {
804         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
805         int ret;
806
807         /* register a callback handler with UIO for interrupt notifications */
808         ret = rte_intr_callback_register(&pci_dev->intr_handle,
809                                          avp_dev_interrupt_handler,
810                                          (void *)eth_dev);
811         if (ret < 0) {
812                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
813                             ret);
814                 return ret;
815         }
816
817         /* enable interrupt processing */
818         return avp_dev_enable_interrupts(eth_dev);
819 }
820
821 static int
822 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
823 {
824         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
825         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
826         uint32_t value;
827
828         if (registers == NULL)
829                 return 0;
830
831         value = AVP_READ32(RTE_PTR_ADD(registers,
832                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
833         if (value == RTE_AVP_MIGRATION_DETACHED) {
834                 /* migration is in progress; ack it if we have not already */
835                 AVP_WRITE32(value,
836                             RTE_PTR_ADD(registers,
837                                         RTE_AVP_MIGRATION_ACK_OFFSET));
838                 return 1;
839         }
840         return 0;
841 }
842
843 /*
844  * create a AVP device using the supplied device info by first translating it
845  * to guest address space(s).
846  */
847 static int
848 avp_dev_create(struct rte_pci_device *pci_dev,
849                struct rte_eth_dev *eth_dev)
850 {
851         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
852         struct rte_avp_device_info *host_info;
853         struct rte_mem_resource *resource;
854         unsigned int i;
855
856         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
857         if (resource->addr == NULL) {
858                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
859                             RTE_AVP_PCI_DEVICE_BAR);
860                 return -EFAULT;
861         }
862         host_info = (struct rte_avp_device_info *)resource->addr;
863
864         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
865                 avp_dev_version_check(host_info->version)) {
866                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
867                             host_info->magic, host_info->version,
868                             AVP_DPDK_DRIVER_VERSION);
869                 return -EINVAL;
870         }
871
872         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
873                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
874                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
875                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
876
877         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
878                     host_info->min_tx_queues, host_info->max_tx_queues);
879         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
880                     host_info->min_rx_queues, host_info->max_rx_queues);
881         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
882                     host_info->features);
883
884         if (avp->magic != AVP_ETHDEV_MAGIC) {
885                 /*
886                  * First time initialization (i.e., not during a VM
887                  * migration)
888                  */
889                 memset(avp, 0, sizeof(*avp));
890                 avp->magic = AVP_ETHDEV_MAGIC;
891                 avp->dev_data = eth_dev->data;
892                 avp->port_id = eth_dev->data->port_id;
893                 avp->host_mbuf_size = host_info->mbuf_size;
894                 avp->host_features = host_info->features;
895                 rte_spinlock_init(&avp->lock);
896                 memcpy(&avp->ethaddr.addr_bytes[0],
897                        host_info->ethaddr, ETHER_ADDR_LEN);
898                 /* adjust max values to not exceed our max */
899                 avp->max_tx_queues =
900                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
901                 avp->max_rx_queues =
902                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
903         } else {
904                 /* Re-attaching during migration */
905
906                 /* TODO... requires validation of host values */
907                 if ((host_info->features & avp->features) != avp->features) {
908                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
909                                     avp->features, host_info->features);
910                         /* this should not be possible; continue for now */
911                 }
912         }
913
914         /* the device id is allowed to change over migrations */
915         avp->device_id = host_info->device_id;
916
917         /* translate incoming host addresses to guest address space */
918         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
919                     host_info->tx_phys);
920         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
921                     host_info->alloc_phys);
922         for (i = 0; i < avp->max_tx_queues; i++) {
923                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
924                         host_info->tx_phys + (i * host_info->tx_size));
925
926                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
927                         host_info->alloc_phys + (i * host_info->alloc_size));
928         }
929
930         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
931                     host_info->rx_phys);
932         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
933                     host_info->free_phys);
934         for (i = 0; i < avp->max_rx_queues; i++) {
935                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
936                         host_info->rx_phys + (i * host_info->rx_size));
937                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
938                         host_info->free_phys + (i * host_info->free_size));
939         }
940
941         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
942                     host_info->req_phys);
943         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
944                     host_info->resp_phys);
945         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
946                     host_info->sync_phys);
947         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
948                     host_info->mbuf_phys);
949         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
950         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
951         avp->sync_addr =
952                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
953         avp->mbuf_addr =
954                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
955
956         /*
957          * store the host mbuf virtual address so that we can calculate
958          * relative offsets for each mbuf as they are processed
959          */
960         avp->host_mbuf_addr = host_info->mbuf_va;
961         avp->host_sync_addr = host_info->sync_va;
962
963         /*
964          * store the maximum packet length that is supported by the host.
965          */
966         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
967         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
968                                 host_info->max_rx_pkt_len);
969
970         return 0;
971 }
972
973 /*
974  * This function is based on probe() function in avp_pci.c
975  * It returns 0 on success.
976  */
977 static int
978 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
979 {
980         struct avp_dev *avp =
981                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
982         struct rte_pci_device *pci_dev;
983         int ret;
984
985         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
986         eth_dev->dev_ops = &avp_eth_dev_ops;
987         eth_dev->rx_pkt_burst = &avp_recv_pkts;
988         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
989
990         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
991                 /*
992                  * no setup required on secondary processes.  All data is saved
993                  * in dev_private by the primary process. All resource should
994                  * be mapped to the same virtual address so all pointers should
995                  * be valid.
996                  */
997                 if (eth_dev->data->scattered_rx) {
998                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
999                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1000                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1001                 }
1002                 return 0;
1003         }
1004
1005         rte_eth_copy_pci_info(eth_dev, pci_dev);
1006
1007         /* Check current migration status */
1008         if (avp_dev_migration_pending(eth_dev)) {
1009                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1010                 return -EBUSY;
1011         }
1012
1013         /* Check BAR resources */
1014         ret = avp_dev_check_regions(eth_dev);
1015         if (ret < 0) {
1016                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1017                             ret);
1018                 return ret;
1019         }
1020
1021         /* Enable interrupts */
1022         ret = avp_dev_setup_interrupts(eth_dev);
1023         if (ret < 0) {
1024                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1025                 return ret;
1026         }
1027
1028         /* Handle each subtype */
1029         ret = avp_dev_create(pci_dev, eth_dev);
1030         if (ret < 0) {
1031                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1032                 return ret;
1033         }
1034
1035         /* Allocate memory for storing MAC addresses */
1036         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1037         if (eth_dev->data->mac_addrs == NULL) {
1038                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1039                             ETHER_ADDR_LEN);
1040                 return -ENOMEM;
1041         }
1042
1043         /* Get a mac from device config */
1044         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1045
1046         return 0;
1047 }
1048
1049 static int
1050 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1051 {
1052         int ret;
1053
1054         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1055                 return -EPERM;
1056
1057         if (eth_dev->data == NULL)
1058                 return 0;
1059
1060         ret = avp_dev_disable_interrupts(eth_dev);
1061         if (ret != 0) {
1062                 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1063                 return ret;
1064         }
1065
1066         if (eth_dev->data->mac_addrs != NULL) {
1067                 rte_free(eth_dev->data->mac_addrs);
1068                 eth_dev->data->mac_addrs = NULL;
1069         }
1070
1071         return 0;
1072 }
1073
1074 static int
1075 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1076                   struct rte_pci_device *pci_dev)
1077 {
1078         struct rte_eth_dev *eth_dev;
1079         int ret;
1080
1081         eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1082                                            sizeof(struct avp_adapter));
1083         if (eth_dev == NULL)
1084                 return -ENOMEM;
1085
1086         ret = eth_avp_dev_init(eth_dev);
1087         if (ret)
1088                 rte_eth_dev_pci_release(eth_dev);
1089
1090         return ret;
1091 }
1092
1093 static int
1094 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1095 {
1096         return rte_eth_dev_pci_generic_remove(pci_dev,
1097                                               eth_avp_dev_uninit);
1098 }
1099
1100 static struct rte_pci_driver rte_avp_pmd = {
1101         .id_table = pci_id_avp_map,
1102         .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1103         .probe = eth_avp_pci_probe,
1104         .remove = eth_avp_pci_remove,
1105 };
1106
1107 static int
1108 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1109                          struct avp_dev *avp)
1110 {
1111         unsigned int max_rx_pkt_len;
1112
1113         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1114
1115         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1116             (max_rx_pkt_len > avp->host_mbuf_size)) {
1117                 /*
1118                  * If the guest MTU is greater than either the host or guest
1119                  * buffers then chained mbufs have to be enabled in the TX
1120                  * direction.  It is assumed that the application will not need
1121                  * to send packets larger than their max_rx_pkt_len (MRU).
1122                  */
1123                 return 1;
1124         }
1125
1126         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1127             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1128                 /*
1129                  * If the host MRU is greater than its own mbuf size or the
1130                  * guest mbuf size then chained mbufs have to be enabled in the
1131                  * RX direction.
1132                  */
1133                 return 1;
1134         }
1135
1136         return 0;
1137 }
1138
1139 static int
1140 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1141                        uint16_t rx_queue_id,
1142                        uint16_t nb_rx_desc,
1143                        unsigned int socket_id,
1144                        const struct rte_eth_rxconf *rx_conf,
1145                        struct rte_mempool *pool)
1146 {
1147         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1148         struct rte_pktmbuf_pool_private *mbp_priv;
1149         struct avp_queue *rxq;
1150
1151         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1152                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1153                             rx_queue_id, eth_dev->data->nb_rx_queues);
1154                 return -EINVAL;
1155         }
1156
1157         /* Save mbuf pool pointer */
1158         avp->pool = pool;
1159
1160         /* Save the local mbuf size */
1161         mbp_priv = rte_mempool_get_priv(pool);
1162         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1163         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1164
1165         if (avp_dev_enable_scattered(eth_dev, avp)) {
1166                 if (!eth_dev->data->scattered_rx) {
1167                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1168                         eth_dev->data->scattered_rx = 1;
1169                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1170                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1171                 }
1172         }
1173
1174         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1175                     avp->max_rx_pkt_len,
1176                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1177                     avp->host_mbuf_size,
1178                     avp->guest_mbuf_size);
1179
1180         /* allocate a queue object */
1181         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1182                                  RTE_CACHE_LINE_SIZE, socket_id);
1183         if (rxq == NULL) {
1184                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1185                 return -ENOMEM;
1186         }
1187
1188         /* save back pointers to AVP and Ethernet devices */
1189         rxq->avp = avp;
1190         rxq->dev_data = eth_dev->data;
1191         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1192
1193         /* setup the queue receive mapping for the current queue. */
1194         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1195
1196         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1197
1198         (void)nb_rx_desc;
1199         (void)rx_conf;
1200         return 0;
1201 }
1202
1203 static int
1204 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1205                        uint16_t tx_queue_id,
1206                        uint16_t nb_tx_desc,
1207                        unsigned int socket_id,
1208                        const struct rte_eth_txconf *tx_conf)
1209 {
1210         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1211         struct avp_queue *txq;
1212
1213         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1214                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1215                             tx_queue_id, eth_dev->data->nb_tx_queues);
1216                 return -EINVAL;
1217         }
1218
1219         /* allocate a queue object */
1220         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1221                                  RTE_CACHE_LINE_SIZE, socket_id);
1222         if (txq == NULL) {
1223                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1224                 return -ENOMEM;
1225         }
1226
1227         /* only the configured set of transmit queues are used */
1228         txq->queue_id = tx_queue_id;
1229         txq->queue_base = tx_queue_id;
1230         txq->queue_limit = tx_queue_id;
1231
1232         /* save back pointers to AVP and Ethernet devices */
1233         txq->avp = avp;
1234         txq->dev_data = eth_dev->data;
1235         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1236
1237         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1238
1239         (void)nb_tx_desc;
1240         (void)tx_conf;
1241         return 0;
1242 }
1243
1244 static inline int
1245 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1246 {
1247         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1248         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1249         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1250 }
1251
1252 static inline int
1253 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1254 {
1255         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1256
1257         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1258                 /* allow all packets destined to our address */
1259                 return 0;
1260         }
1261
1262         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1263                 /* allow all broadcast packets */
1264                 return 0;
1265         }
1266
1267         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1268                 /* allow all multicast packets */
1269                 return 0;
1270         }
1271
1272         if (avp->flags & AVP_F_PROMISC) {
1273                 /* allow all packets when in promiscuous mode */
1274                 return 0;
1275         }
1276
1277         return -1;
1278 }
1279
1280 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1281 static inline void
1282 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1283 {
1284         struct rte_avp_desc *first_buf;
1285         struct rte_avp_desc *pkt_buf;
1286         unsigned int pkt_len;
1287         unsigned int nb_segs;
1288         void *pkt_data;
1289         unsigned int i;
1290
1291         first_buf = avp_dev_translate_buffer(avp, buf);
1292
1293         i = 0;
1294         pkt_len = 0;
1295         nb_segs = first_buf->nb_segs;
1296         do {
1297                 /* Adjust pointers for guest addressing */
1298                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1299                 if (pkt_buf == NULL)
1300                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1301                                   i, buf);
1302                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1303                 if (pkt_data == NULL)
1304                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1305                                   i);
1306                 if (pkt_buf->data_len == 0)
1307                         rte_panic("bad buffer: segment %u has 0 data length\n",
1308                                   i);
1309                 pkt_len += pkt_buf->data_len;
1310                 nb_segs--;
1311                 i++;
1312
1313         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1314
1315         if (nb_segs != 0)
1316                 rte_panic("bad buffer: expected %u segments found %u\n",
1317                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1318         if (pkt_len != first_buf->pkt_len)
1319                 rte_panic("bad buffer: expected length %u found %u\n",
1320                           first_buf->pkt_len, pkt_len);
1321 }
1322
1323 #define avp_dev_buffer_sanity_check(a, b) \
1324         __avp_dev_buffer_sanity_check((a), (b))
1325
1326 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1327
1328 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1329
1330 #endif
1331
1332 /*
1333  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1334  * there exactly the required number of mbufs to copy all source bytes.
1335  */
1336 static inline struct rte_mbuf *
1337 avp_dev_copy_from_buffers(struct avp_dev *avp,
1338                           struct rte_avp_desc *buf,
1339                           struct rte_mbuf **mbufs,
1340                           unsigned int count)
1341 {
1342         struct rte_mbuf *m_previous = NULL;
1343         struct rte_avp_desc *pkt_buf;
1344         unsigned int total_length = 0;
1345         unsigned int copy_length;
1346         unsigned int src_offset;
1347         struct rte_mbuf *m;
1348         uint16_t ol_flags;
1349         uint16_t vlan_tci;
1350         void *pkt_data;
1351         unsigned int i;
1352
1353         avp_dev_buffer_sanity_check(avp, buf);
1354
1355         /* setup the first source buffer */
1356         pkt_buf = avp_dev_translate_buffer(avp, buf);
1357         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1358         total_length = pkt_buf->pkt_len;
1359         src_offset = 0;
1360
1361         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1362                 ol_flags = PKT_RX_VLAN;
1363                 vlan_tci = pkt_buf->vlan_tci;
1364         } else {
1365                 ol_flags = 0;
1366                 vlan_tci = 0;
1367         }
1368
1369         for (i = 0; (i < count) && (buf != NULL); i++) {
1370                 /* fill each destination buffer */
1371                 m = mbufs[i];
1372
1373                 if (m_previous != NULL)
1374                         m_previous->next = m;
1375
1376                 m_previous = m;
1377
1378                 do {
1379                         /*
1380                          * Copy as many source buffers as will fit in the
1381                          * destination buffer.
1382                          */
1383                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1384                                                rte_pktmbuf_data_len(m)),
1385                                               (pkt_buf->data_len -
1386                                                src_offset));
1387                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1388                                                rte_pktmbuf_data_len(m)),
1389                                    RTE_PTR_ADD(pkt_data, src_offset),
1390                                    copy_length);
1391                         rte_pktmbuf_data_len(m) += copy_length;
1392                         src_offset += copy_length;
1393
1394                         if (likely(src_offset == pkt_buf->data_len)) {
1395                                 /* need a new source buffer */
1396                                 buf = pkt_buf->next;
1397                                 if (buf != NULL) {
1398                                         pkt_buf = avp_dev_translate_buffer(
1399                                                 avp, buf);
1400                                         pkt_data = avp_dev_translate_buffer(
1401                                                 avp, pkt_buf->data);
1402                                         src_offset = 0;
1403                                 }
1404                         }
1405
1406                         if (unlikely(rte_pktmbuf_data_len(m) ==
1407                                      avp->guest_mbuf_size)) {
1408                                 /* need a new destination mbuf */
1409                                 break;
1410                         }
1411
1412                 } while (buf != NULL);
1413         }
1414
1415         m = mbufs[0];
1416         m->ol_flags = ol_flags;
1417         m->nb_segs = count;
1418         rte_pktmbuf_pkt_len(m) = total_length;
1419         m->vlan_tci = vlan_tci;
1420
1421         __rte_mbuf_sanity_check(m, 1);
1422
1423         return m;
1424 }
1425
1426 static uint16_t
1427 avp_recv_scattered_pkts(void *rx_queue,
1428                         struct rte_mbuf **rx_pkts,
1429                         uint16_t nb_pkts)
1430 {
1431         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1432         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1433         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1434         struct avp_dev *avp = rxq->avp;
1435         struct rte_avp_desc *pkt_buf;
1436         struct rte_avp_fifo *free_q;
1437         struct rte_avp_fifo *rx_q;
1438         struct rte_avp_desc *buf;
1439         unsigned int count, avail, n;
1440         unsigned int guest_mbuf_size;
1441         struct rte_mbuf *m;
1442         unsigned int required;
1443         unsigned int buf_len;
1444         unsigned int port_id;
1445         unsigned int i;
1446
1447         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1448                 /* VM live migration in progress */
1449                 return 0;
1450         }
1451
1452         guest_mbuf_size = avp->guest_mbuf_size;
1453         port_id = avp->port_id;
1454         rx_q = avp->rx_q[rxq->queue_id];
1455         free_q = avp->free_q[rxq->queue_id];
1456
1457         /* setup next queue to service */
1458         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1459                 (rxq->queue_id + 1) : rxq->queue_base;
1460
1461         /* determine how many slots are available in the free queue */
1462         count = avp_fifo_free_count(free_q);
1463
1464         /* determine how many packets are available in the rx queue */
1465         avail = avp_fifo_count(rx_q);
1466
1467         /* determine how many packets can be received */
1468         count = RTE_MIN(count, avail);
1469         count = RTE_MIN(count, nb_pkts);
1470         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1471
1472         if (unlikely(count == 0)) {
1473                 /* no free buffers, or no buffers on the rx queue */
1474                 return 0;
1475         }
1476
1477         /* retrieve pending packets */
1478         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1479         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1480                    count, rx_q);
1481
1482         count = 0;
1483         for (i = 0; i < n; i++) {
1484                 /* prefetch next entry while processing current one */
1485                 if (i + 1 < n) {
1486                         pkt_buf = avp_dev_translate_buffer(avp,
1487                                                            avp_bufs[i + 1]);
1488                         rte_prefetch0(pkt_buf);
1489                 }
1490                 buf = avp_bufs[i];
1491
1492                 /* Peek into the first buffer to determine the total length */
1493                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1494                 buf_len = pkt_buf->pkt_len;
1495
1496                 /* Allocate enough mbufs to receive the entire packet */
1497                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1498                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1499                         rxq->dev_data->rx_mbuf_alloc_failed++;
1500                         continue;
1501                 }
1502
1503                 /* Copy the data from the buffers to our mbufs */
1504                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1505
1506                 /* finalize mbuf */
1507                 m->port = port_id;
1508
1509                 if (_avp_mac_filter(avp, m) != 0) {
1510                         /* silently discard packets not destined to our MAC */
1511                         rte_pktmbuf_free(m);
1512                         continue;
1513                 }
1514
1515                 /* return new mbuf to caller */
1516                 rx_pkts[count++] = m;
1517                 rxq->bytes += buf_len;
1518         }
1519
1520         rxq->packets += count;
1521
1522         /* return the buffers to the free queue */
1523         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1524
1525         return count;
1526 }
1527
1528
1529 static uint16_t
1530 avp_recv_pkts(void *rx_queue,
1531               struct rte_mbuf **rx_pkts,
1532               uint16_t nb_pkts)
1533 {
1534         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1535         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1536         struct avp_dev *avp = rxq->avp;
1537         struct rte_avp_desc *pkt_buf;
1538         struct rte_avp_fifo *free_q;
1539         struct rte_avp_fifo *rx_q;
1540         unsigned int count, avail, n;
1541         unsigned int pkt_len;
1542         struct rte_mbuf *m;
1543         char *pkt_data;
1544         unsigned int i;
1545
1546         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1547                 /* VM live migration in progress */
1548                 return 0;
1549         }
1550
1551         rx_q = avp->rx_q[rxq->queue_id];
1552         free_q = avp->free_q[rxq->queue_id];
1553
1554         /* setup next queue to service */
1555         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1556                 (rxq->queue_id + 1) : rxq->queue_base;
1557
1558         /* determine how many slots are available in the free queue */
1559         count = avp_fifo_free_count(free_q);
1560
1561         /* determine how many packets are available in the rx queue */
1562         avail = avp_fifo_count(rx_q);
1563
1564         /* determine how many packets can be received */
1565         count = RTE_MIN(count, avail);
1566         count = RTE_MIN(count, nb_pkts);
1567         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1568
1569         if (unlikely(count == 0)) {
1570                 /* no free buffers, or no buffers on the rx queue */
1571                 return 0;
1572         }
1573
1574         /* retrieve pending packets */
1575         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1576         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1577                    count, rx_q);
1578
1579         count = 0;
1580         for (i = 0; i < n; i++) {
1581                 /* prefetch next entry while processing current one */
1582                 if (i < n - 1) {
1583                         pkt_buf = avp_dev_translate_buffer(avp,
1584                                                            avp_bufs[i + 1]);
1585                         rte_prefetch0(pkt_buf);
1586                 }
1587
1588                 /* Adjust host pointers for guest addressing */
1589                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1590                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1591                 pkt_len = pkt_buf->pkt_len;
1592
1593                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1594                              (pkt_buf->nb_segs > 1))) {
1595                         /*
1596                          * application should be using the scattered receive
1597                          * function
1598                          */
1599                         rxq->errors++;
1600                         continue;
1601                 }
1602
1603                 /* process each packet to be transmitted */
1604                 m = rte_pktmbuf_alloc(avp->pool);
1605                 if (unlikely(m == NULL)) {
1606                         rxq->dev_data->rx_mbuf_alloc_failed++;
1607                         continue;
1608                 }
1609
1610                 /* copy data out of the host buffer to our buffer */
1611                 m->data_off = RTE_PKTMBUF_HEADROOM;
1612                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1613
1614                 /* initialize the local mbuf */
1615                 rte_pktmbuf_data_len(m) = pkt_len;
1616                 rte_pktmbuf_pkt_len(m) = pkt_len;
1617                 m->port = avp->port_id;
1618
1619                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1620                         m->ol_flags = PKT_RX_VLAN;
1621                         m->vlan_tci = pkt_buf->vlan_tci;
1622                 }
1623
1624                 if (_avp_mac_filter(avp, m) != 0) {
1625                         /* silently discard packets not destined to our MAC */
1626                         rte_pktmbuf_free(m);
1627                         continue;
1628                 }
1629
1630                 /* return new mbuf to caller */
1631                 rx_pkts[count++] = m;
1632                 rxq->bytes += pkt_len;
1633         }
1634
1635         rxq->packets += count;
1636
1637         /* return the buffers to the free queue */
1638         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1639
1640         return count;
1641 }
1642
1643 /*
1644  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1645  * there are sufficient destination buffers to contain the entire source
1646  * packet.
1647  */
1648 static inline uint16_t
1649 avp_dev_copy_to_buffers(struct avp_dev *avp,
1650                         struct rte_mbuf *mbuf,
1651                         struct rte_avp_desc **buffers,
1652                         unsigned int count)
1653 {
1654         struct rte_avp_desc *previous_buf = NULL;
1655         struct rte_avp_desc *first_buf = NULL;
1656         struct rte_avp_desc *pkt_buf;
1657         struct rte_avp_desc *buf;
1658         size_t total_length;
1659         struct rte_mbuf *m;
1660         size_t copy_length;
1661         size_t src_offset;
1662         char *pkt_data;
1663         unsigned int i;
1664
1665         __rte_mbuf_sanity_check(mbuf, 1);
1666
1667         m = mbuf;
1668         src_offset = 0;
1669         total_length = rte_pktmbuf_pkt_len(m);
1670         for (i = 0; (i < count) && (m != NULL); i++) {
1671                 /* fill each destination buffer */
1672                 buf = buffers[i];
1673
1674                 if (i < count - 1) {
1675                         /* prefetch next entry while processing this one */
1676                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1677                         rte_prefetch0(pkt_buf);
1678                 }
1679
1680                 /* Adjust pointers for guest addressing */
1681                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1682                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1683
1684                 /* setup the buffer chain */
1685                 if (previous_buf != NULL)
1686                         previous_buf->next = buf;
1687                 else
1688                         first_buf = pkt_buf;
1689
1690                 previous_buf = pkt_buf;
1691
1692                 do {
1693                         /*
1694                          * copy as many source mbuf segments as will fit in the
1695                          * destination buffer.
1696                          */
1697                         copy_length = RTE_MIN((avp->host_mbuf_size -
1698                                                pkt_buf->data_len),
1699                                               (rte_pktmbuf_data_len(m) -
1700                                                src_offset));
1701                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1702                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1703                                                src_offset),
1704                                    copy_length);
1705                         pkt_buf->data_len += copy_length;
1706                         src_offset += copy_length;
1707
1708                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1709                                 /* need a new source buffer */
1710                                 m = m->next;
1711                                 src_offset = 0;
1712                         }
1713
1714                         if (unlikely(pkt_buf->data_len ==
1715                                      avp->host_mbuf_size)) {
1716                                 /* need a new destination buffer */
1717                                 break;
1718                         }
1719
1720                 } while (m != NULL);
1721         }
1722
1723         first_buf->nb_segs = count;
1724         first_buf->pkt_len = total_length;
1725
1726         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1727                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1728                 first_buf->vlan_tci = mbuf->vlan_tci;
1729         }
1730
1731         avp_dev_buffer_sanity_check(avp, buffers[0]);
1732
1733         return total_length;
1734 }
1735
1736
1737 static uint16_t
1738 avp_xmit_scattered_pkts(void *tx_queue,
1739                         struct rte_mbuf **tx_pkts,
1740                         uint16_t nb_pkts)
1741 {
1742         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1743                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1744         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1745         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1746         struct avp_dev *avp = txq->avp;
1747         struct rte_avp_fifo *alloc_q;
1748         struct rte_avp_fifo *tx_q;
1749         unsigned int count, avail, n;
1750         unsigned int orig_nb_pkts;
1751         struct rte_mbuf *m;
1752         unsigned int required;
1753         unsigned int segments;
1754         unsigned int tx_bytes;
1755         unsigned int i;
1756
1757         orig_nb_pkts = nb_pkts;
1758         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1759                 /* VM live migration in progress */
1760                 /* TODO ... buffer for X packets then drop? */
1761                 txq->errors += nb_pkts;
1762                 return 0;
1763         }
1764
1765         tx_q = avp->tx_q[txq->queue_id];
1766         alloc_q = avp->alloc_q[txq->queue_id];
1767
1768         /* limit the number of transmitted packets to the max burst size */
1769         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1770                 nb_pkts = AVP_MAX_TX_BURST;
1771
1772         /* determine how many buffers are available to copy into */
1773         avail = avp_fifo_count(alloc_q);
1774         if (unlikely(avail > (AVP_MAX_TX_BURST *
1775                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1776                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1777
1778         /* determine how many slots are available in the transmit queue */
1779         count = avp_fifo_free_count(tx_q);
1780
1781         /* determine how many packets can be sent */
1782         nb_pkts = RTE_MIN(count, nb_pkts);
1783
1784         /* determine how many packets will fit in the available buffers */
1785         count = 0;
1786         segments = 0;
1787         for (i = 0; i < nb_pkts; i++) {
1788                 m = tx_pkts[i];
1789                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1790                         /* prefetch next entry while processing this one */
1791                         rte_prefetch0(tx_pkts[i + 1]);
1792                 }
1793                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1794                         avp->host_mbuf_size;
1795
1796                 if (unlikely((required == 0) ||
1797                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1798                         break;
1799                 else if (unlikely(required + segments > avail))
1800                         break;
1801                 segments += required;
1802                 count++;
1803         }
1804         nb_pkts = count;
1805
1806         if (unlikely(nb_pkts == 0)) {
1807                 /* no available buffers, or no space on the tx queue */
1808                 txq->errors += orig_nb_pkts;
1809                 return 0;
1810         }
1811
1812         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1813                    nb_pkts, tx_q);
1814
1815         /* retrieve sufficient send buffers */
1816         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1817         if (unlikely(n != segments)) {
1818                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1819                            "n=%u, segments=%u, orig=%u\n",
1820                            n, segments, orig_nb_pkts);
1821                 txq->errors += orig_nb_pkts;
1822                 return 0;
1823         }
1824
1825         tx_bytes = 0;
1826         count = 0;
1827         for (i = 0; i < nb_pkts; i++) {
1828                 /* process each packet to be transmitted */
1829                 m = tx_pkts[i];
1830
1831                 /* determine how many buffers are required for this packet */
1832                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1833                         avp->host_mbuf_size;
1834
1835                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1836                                                     &avp_bufs[count], required);
1837                 tx_bufs[i] = avp_bufs[count];
1838                 count += required;
1839
1840                 /* free the original mbuf */
1841                 rte_pktmbuf_free(m);
1842         }
1843
1844         txq->packets += nb_pkts;
1845         txq->bytes += tx_bytes;
1846
1847 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1848         for (i = 0; i < nb_pkts; i++)
1849                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1850 #endif
1851
1852         /* send the packets */
1853         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1854         if (unlikely(n != orig_nb_pkts))
1855                 txq->errors += (orig_nb_pkts - n);
1856
1857         return n;
1858 }
1859
1860
1861 static uint16_t
1862 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1863 {
1864         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1865         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1866         struct avp_dev *avp = txq->avp;
1867         struct rte_avp_desc *pkt_buf;
1868         struct rte_avp_fifo *alloc_q;
1869         struct rte_avp_fifo *tx_q;
1870         unsigned int count, avail, n;
1871         struct rte_mbuf *m;
1872         unsigned int pkt_len;
1873         unsigned int tx_bytes;
1874         char *pkt_data;
1875         unsigned int i;
1876
1877         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1878                 /* VM live migration in progress */
1879                 /* TODO ... buffer for X packets then drop?! */
1880                 txq->errors++;
1881                 return 0;
1882         }
1883
1884         tx_q = avp->tx_q[txq->queue_id];
1885         alloc_q = avp->alloc_q[txq->queue_id];
1886
1887         /* limit the number of transmitted packets to the max burst size */
1888         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1889                 nb_pkts = AVP_MAX_TX_BURST;
1890
1891         /* determine how many buffers are available to copy into */
1892         avail = avp_fifo_count(alloc_q);
1893
1894         /* determine how many slots are available in the transmit queue */
1895         count = avp_fifo_free_count(tx_q);
1896
1897         /* determine how many packets can be sent */
1898         count = RTE_MIN(count, avail);
1899         count = RTE_MIN(count, nb_pkts);
1900
1901         if (unlikely(count == 0)) {
1902                 /* no available buffers, or no space on the tx queue */
1903                 txq->errors += nb_pkts;
1904                 return 0;
1905         }
1906
1907         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1908                    count, tx_q);
1909
1910         /* retrieve sufficient send buffers */
1911         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1912         if (unlikely(n != count)) {
1913                 txq->errors++;
1914                 return 0;
1915         }
1916
1917         tx_bytes = 0;
1918         for (i = 0; i < count; i++) {
1919                 /* prefetch next entry while processing the current one */
1920                 if (i < count - 1) {
1921                         pkt_buf = avp_dev_translate_buffer(avp,
1922                                                            avp_bufs[i + 1]);
1923                         rte_prefetch0(pkt_buf);
1924                 }
1925
1926                 /* process each packet to be transmitted */
1927                 m = tx_pkts[i];
1928
1929                 /* Adjust pointers for guest addressing */
1930                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1931                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1932                 pkt_len = rte_pktmbuf_pkt_len(m);
1933
1934                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1935                                          (pkt_len > avp->host_mbuf_size))) {
1936                         /*
1937                          * application should be using the scattered transmit
1938                          * function; send it truncated to avoid the performance
1939                          * hit of having to manage returning the already
1940                          * allocated buffer to the free list.  This should not
1941                          * happen since the application should have set the
1942                          * max_rx_pkt_len based on its MTU and it should be
1943                          * policing its own packet sizes.
1944                          */
1945                         txq->errors++;
1946                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1947                                           avp->host_mbuf_size);
1948                 }
1949
1950                 /* copy data out of our mbuf and into the AVP buffer */
1951                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1952                 pkt_buf->pkt_len = pkt_len;
1953                 pkt_buf->data_len = pkt_len;
1954                 pkt_buf->nb_segs = 1;
1955                 pkt_buf->next = NULL;
1956
1957                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1958                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1959                         pkt_buf->vlan_tci = m->vlan_tci;
1960                 }
1961
1962                 tx_bytes += pkt_len;
1963
1964                 /* free the original mbuf */
1965                 rte_pktmbuf_free(m);
1966         }
1967
1968         txq->packets += count;
1969         txq->bytes += tx_bytes;
1970
1971         /* send the packets */
1972         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1973
1974         return n;
1975 }
1976
1977 static void
1978 avp_dev_rx_queue_release(void *rx_queue)
1979 {
1980         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1981         struct avp_dev *avp = rxq->avp;
1982         struct rte_eth_dev_data *data = avp->dev_data;
1983         unsigned int i;
1984
1985         for (i = 0; i < avp->num_rx_queues; i++) {
1986                 if (data->rx_queues[i] == rxq)
1987                         data->rx_queues[i] = NULL;
1988         }
1989 }
1990
1991 static void
1992 avp_dev_tx_queue_release(void *tx_queue)
1993 {
1994         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1995         struct avp_dev *avp = txq->avp;
1996         struct rte_eth_dev_data *data = avp->dev_data;
1997         unsigned int i;
1998
1999         for (i = 0; i < avp->num_tx_queues; i++) {
2000                 if (data->tx_queues[i] == txq)
2001                         data->tx_queues[i] = NULL;
2002         }
2003 }
2004
2005 static int
2006 avp_dev_configure(struct rte_eth_dev *eth_dev)
2007 {
2008         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2009         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2010         struct rte_avp_device_info *host_info;
2011         struct rte_avp_device_config config;
2012         int mask = 0;
2013         void *addr;
2014         int ret;
2015
2016         rte_spinlock_lock(&avp->lock);
2017         if (avp->flags & AVP_F_DETACHED) {
2018                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2019                 ret = -ENOTSUP;
2020                 goto unlock;
2021         }
2022
2023         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2024         host_info = (struct rte_avp_device_info *)addr;
2025
2026         /* Setup required number of queues */
2027         _avp_set_queue_counts(eth_dev);
2028
2029         mask = (ETH_VLAN_STRIP_MASK |
2030                 ETH_VLAN_FILTER_MASK |
2031                 ETH_VLAN_EXTEND_MASK);
2032         ret = avp_vlan_offload_set(eth_dev, mask);
2033         if (ret < 0) {
2034                 PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2035                             ret);
2036                 goto unlock;
2037         }
2038
2039         /* update device config */
2040         memset(&config, 0, sizeof(config));
2041         config.device_id = host_info->device_id;
2042         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2043         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2044         config.features = avp->features;
2045         config.num_tx_queues = avp->num_tx_queues;
2046         config.num_rx_queues = avp->num_rx_queues;
2047
2048         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2049         if (ret < 0) {
2050                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2051                             ret);
2052                 goto unlock;
2053         }
2054
2055         avp->flags |= AVP_F_CONFIGURED;
2056         ret = 0;
2057
2058 unlock:
2059         rte_spinlock_unlock(&avp->lock);
2060         return ret;
2061 }
2062
2063 static int
2064 avp_dev_start(struct rte_eth_dev *eth_dev)
2065 {
2066         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2067         int ret;
2068
2069         rte_spinlock_lock(&avp->lock);
2070         if (avp->flags & AVP_F_DETACHED) {
2071                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2072                 ret = -ENOTSUP;
2073                 goto unlock;
2074         }
2075
2076         /* disable features that we do not support */
2077         eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2078         eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2079         eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2080         eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2081
2082         /* update link state */
2083         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2084         if (ret < 0) {
2085                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2086                             ret);
2087                 goto unlock;
2088         }
2089
2090         /* remember current link state */
2091         avp->flags |= AVP_F_LINKUP;
2092
2093         ret = 0;
2094
2095 unlock:
2096         rte_spinlock_unlock(&avp->lock);
2097         return ret;
2098 }
2099
2100 static void
2101 avp_dev_stop(struct rte_eth_dev *eth_dev)
2102 {
2103         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2104         int ret;
2105
2106         rte_spinlock_lock(&avp->lock);
2107         if (avp->flags & AVP_F_DETACHED) {
2108                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2109                 goto unlock;
2110         }
2111
2112         /* remember current link state */
2113         avp->flags &= ~AVP_F_LINKUP;
2114
2115         /* update link state */
2116         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2117         if (ret < 0) {
2118                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2119                             ret);
2120         }
2121
2122 unlock:
2123         rte_spinlock_unlock(&avp->lock);
2124 }
2125
2126 static void
2127 avp_dev_close(struct rte_eth_dev *eth_dev)
2128 {
2129         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2130         int ret;
2131
2132         rte_spinlock_lock(&avp->lock);
2133         if (avp->flags & AVP_F_DETACHED) {
2134                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2135                 goto unlock;
2136         }
2137
2138         /* remember current link state */
2139         avp->flags &= ~AVP_F_LINKUP;
2140         avp->flags &= ~AVP_F_CONFIGURED;
2141
2142         ret = avp_dev_disable_interrupts(eth_dev);
2143         if (ret < 0) {
2144                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2145                 /* continue */
2146         }
2147
2148         /* update device state */
2149         ret = avp_dev_ctrl_shutdown(eth_dev);
2150         if (ret < 0) {
2151                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2152                             ret);
2153                 /* continue */
2154         }
2155
2156 unlock:
2157         rte_spinlock_unlock(&avp->lock);
2158 }
2159
2160 static int
2161 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2162                                         __rte_unused int wait_to_complete)
2163 {
2164         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2165         struct rte_eth_link *link = &eth_dev->data->dev_link;
2166
2167         link->link_speed = ETH_SPEED_NUM_10G;
2168         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2169         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2170
2171         return -1;
2172 }
2173
2174 static void
2175 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2176 {
2177         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2178
2179         rte_spinlock_lock(&avp->lock);
2180         if ((avp->flags & AVP_F_PROMISC) == 0) {
2181                 avp->flags |= AVP_F_PROMISC;
2182                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2183                             eth_dev->data->port_id);
2184         }
2185         rte_spinlock_unlock(&avp->lock);
2186 }
2187
2188 static void
2189 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2190 {
2191         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2192
2193         rte_spinlock_lock(&avp->lock);
2194         if ((avp->flags & AVP_F_PROMISC) != 0) {
2195                 avp->flags &= ~AVP_F_PROMISC;
2196                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2197                             eth_dev->data->port_id);
2198         }
2199         rte_spinlock_unlock(&avp->lock);
2200 }
2201
2202 static void
2203 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2204                  struct rte_eth_dev_info *dev_info)
2205 {
2206         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2207
2208         dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2209         dev_info->max_rx_queues = avp->max_rx_queues;
2210         dev_info->max_tx_queues = avp->max_tx_queues;
2211         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2212         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2213         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2214         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2215                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2216                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2217         }
2218 }
2219
2220 static int
2221 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2222 {
2223         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2224
2225         if (mask & ETH_VLAN_STRIP_MASK) {
2226                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2227                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2228                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2229                         else
2230                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2231                 } else {
2232                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2233                 }
2234         }
2235
2236         if (mask & ETH_VLAN_FILTER_MASK) {
2237                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2238                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2239         }
2240
2241         if (mask & ETH_VLAN_EXTEND_MASK) {
2242                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2243                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2244         }
2245
2246         return 0;
2247 }
2248
2249 static int
2250 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2251 {
2252         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2253         unsigned int i;
2254
2255         for (i = 0; i < avp->num_rx_queues; i++) {
2256                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2257
2258                 if (rxq) {
2259                         stats->ipackets += rxq->packets;
2260                         stats->ibytes += rxq->bytes;
2261                         stats->ierrors += rxq->errors;
2262
2263                         stats->q_ipackets[i] += rxq->packets;
2264                         stats->q_ibytes[i] += rxq->bytes;
2265                         stats->q_errors[i] += rxq->errors;
2266                 }
2267         }
2268
2269         for (i = 0; i < avp->num_tx_queues; i++) {
2270                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2271
2272                 if (txq) {
2273                         stats->opackets += txq->packets;
2274                         stats->obytes += txq->bytes;
2275                         stats->oerrors += txq->errors;
2276
2277                         stats->q_opackets[i] += txq->packets;
2278                         stats->q_obytes[i] += txq->bytes;
2279                         stats->q_errors[i] += txq->errors;
2280                 }
2281         }
2282
2283         return 0;
2284 }
2285
2286 static void
2287 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2288 {
2289         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2290         unsigned int i;
2291
2292         for (i = 0; i < avp->num_rx_queues; i++) {
2293                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2294
2295                 if (rxq) {
2296                         rxq->bytes = 0;
2297                         rxq->packets = 0;
2298                         rxq->errors = 0;
2299                 }
2300         }
2301
2302         for (i = 0; i < avp->num_tx_queues; i++) {
2303                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2304
2305                 if (txq) {
2306                         txq->bytes = 0;
2307                         txq->packets = 0;
2308                         txq->errors = 0;
2309                 }
2310         }
2311 }
2312
2313 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2314 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);