fe6849f59b239e870277c528e087d67b86942119
[deb_dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_memzone.h>
44 #include <rte_malloc.h>
45 #include <rte_atomic.h>
46 #include <rte_branch_prediction.h>
47 #include <rte_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
51 #include <rte_spinlock.h>
52 #include <rte_byteorder.h>
53 #include <rte_dev.h>
54 #include <rte_memory.h>
55 #include <rte_eal.h>
56 #include <rte_io.h>
57
58 #include "rte_avp_common.h"
59 #include "rte_avp_fifo.h"
60
61 #include "avp_logs.h"
62
63
64 static int avp_dev_create(struct rte_pci_device *pci_dev,
65                           struct rte_eth_dev *eth_dev);
66
67 static int avp_dev_configure(struct rte_eth_dev *dev);
68 static int avp_dev_start(struct rte_eth_dev *dev);
69 static void avp_dev_stop(struct rte_eth_dev *dev);
70 static void avp_dev_close(struct rte_eth_dev *dev);
71 static void avp_dev_info_get(struct rte_eth_dev *dev,
72                              struct rte_eth_dev_info *dev_info);
73 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
74 static int avp_dev_link_update(struct rte_eth_dev *dev,
75                                __rte_unused int wait_to_complete);
76 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
77 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
78
79 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
80                                   uint16_t rx_queue_id,
81                                   uint16_t nb_rx_desc,
82                                   unsigned int socket_id,
83                                   const struct rte_eth_rxconf *rx_conf,
84                                   struct rte_mempool *pool);
85
86 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
87                                   uint16_t tx_queue_id,
88                                   uint16_t nb_tx_desc,
89                                   unsigned int socket_id,
90                                   const struct rte_eth_txconf *tx_conf);
91
92 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
93                                         struct rte_mbuf **rx_pkts,
94                                         uint16_t nb_pkts);
95
96 static uint16_t avp_recv_pkts(void *rx_queue,
97                               struct rte_mbuf **rx_pkts,
98                               uint16_t nb_pkts);
99
100 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
101                                         struct rte_mbuf **tx_pkts,
102                                         uint16_t nb_pkts);
103
104 static uint16_t avp_xmit_pkts(void *tx_queue,
105                               struct rte_mbuf **tx_pkts,
106                               uint16_t nb_pkts);
107
108 static void avp_dev_rx_queue_release(void *rxq);
109 static void avp_dev_tx_queue_release(void *txq);
110
111 static void avp_dev_stats_get(struct rte_eth_dev *dev,
112                               struct rte_eth_stats *stats);
113 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
114
115
116 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
117
118
119 #define AVP_MAX_RX_BURST 64
120 #define AVP_MAX_TX_BURST 64
121 #define AVP_MAX_MAC_ADDRS 1
122 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
123
124
125 /*
126  * Defines the number of microseconds to wait before checking the response
127  * queue for completion.
128  */
129 #define AVP_REQUEST_DELAY_USECS (5000)
130
131 /*
132  * Defines the number times to check the response queue for completion before
133  * declaring a timeout.
134  */
135 #define AVP_MAX_REQUEST_RETRY (100)
136
137 /* Defines the current PCI driver version number */
138 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
139
140 /*
141  * The set of PCI devices this driver supports
142  */
143 static const struct rte_pci_id pci_id_avp_map[] = {
144         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
145           .device_id = RTE_AVP_PCI_DEVICE_ID,
146           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
147           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
148           .class_id = RTE_CLASS_ANY_ID,
149         },
150
151         { .vendor_id = 0, /* sentinel */
152         },
153 };
154
155 /*
156  * dev_ops for avp, bare necessities for basic operation
157  */
158 static const struct eth_dev_ops avp_eth_dev_ops = {
159         .dev_configure       = avp_dev_configure,
160         .dev_start           = avp_dev_start,
161         .dev_stop            = avp_dev_stop,
162         .dev_close           = avp_dev_close,
163         .dev_infos_get       = avp_dev_info_get,
164         .vlan_offload_set    = avp_vlan_offload_set,
165         .stats_get           = avp_dev_stats_get,
166         .stats_reset         = avp_dev_stats_reset,
167         .link_update         = avp_dev_link_update,
168         .promiscuous_enable  = avp_dev_promiscuous_enable,
169         .promiscuous_disable = avp_dev_promiscuous_disable,
170         .rx_queue_setup      = avp_dev_rx_queue_setup,
171         .rx_queue_release    = avp_dev_rx_queue_release,
172         .tx_queue_setup      = avp_dev_tx_queue_setup,
173         .tx_queue_release    = avp_dev_tx_queue_release,
174 };
175
176 /**@{ AVP device flags */
177 #define AVP_F_PROMISC (1 << 1)
178 #define AVP_F_CONFIGURED (1 << 2)
179 #define AVP_F_LINKUP (1 << 3)
180 #define AVP_F_DETACHED (1 << 4)
181 /**@} */
182
183 /* Ethernet device validation marker */
184 #define AVP_ETHDEV_MAGIC 0x92972862
185
186 /*
187  * Defines the AVP device attributes which are attached to an RTE ethernet
188  * device
189  */
190 struct avp_dev {
191         uint32_t magic; /**< Memory validation marker */
192         uint64_t device_id; /**< Unique system identifier */
193         struct ether_addr ethaddr; /**< Host specified MAC address */
194         struct rte_eth_dev_data *dev_data;
195         /**< Back pointer to ethernet device data */
196         volatile uint32_t flags; /**< Device operational flags */
197         uint8_t port_id; /**< Ethernet port identifier */
198         struct rte_mempool *pool; /**< pkt mbuf mempool */
199         unsigned int guest_mbuf_size; /**< local pool mbuf size */
200         unsigned int host_mbuf_size; /**< host mbuf size */
201         unsigned int max_rx_pkt_len; /**< maximum receive unit */
202         uint32_t host_features; /**< Supported feature bitmap */
203         uint32_t features; /**< Enabled feature bitmap */
204         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
205         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
206         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
207         unsigned int max_rx_queues; /**< Maximum number of receive queues */
208
209         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
210         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
211         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
212         /**< Allocated mbufs queue */
213         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
214         /**< To be freed mbufs queue */
215
216         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
217         rte_spinlock_t lock;
218
219         /* For request & response */
220         struct rte_avp_fifo *req_q; /**< Request queue */
221         struct rte_avp_fifo *resp_q; /**< Response queue */
222         void *host_sync_addr; /**< (host) Req/Resp Mem address */
223         void *sync_addr; /**< Req/Resp Mem address */
224         void *host_mbuf_addr; /**< (host) MBUF pool start address */
225         void *mbuf_addr; /**< MBUF pool start address */
226 } __rte_cache_aligned;
227
228 /* RTE ethernet private data */
229 struct avp_adapter {
230         struct avp_dev avp;
231 } __rte_cache_aligned;
232
233
234 /* 32-bit MMIO register write */
235 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
236
237 /* 32-bit MMIO register read */
238 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
239
240 /* Macro to cast the ethernet device private data to a AVP object */
241 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
242         (&((struct avp_adapter *)adapter)->avp)
243
244 /*
245  * Defines the structure of a AVP device queue for the purpose of handling the
246  * receive and transmit burst callback functions
247  */
248 struct avp_queue {
249         struct rte_eth_dev_data *dev_data;
250         /**< Backpointer to ethernet device data */
251         struct avp_dev *avp; /**< Backpointer to AVP device */
252         uint16_t queue_id;
253         /**< Queue identifier used for indexing current queue */
254         uint16_t queue_base;
255         /**< Base queue identifier for queue servicing */
256         uint16_t queue_limit;
257         /**< Maximum queue identifier for queue servicing */
258
259         uint64_t packets;
260         uint64_t bytes;
261         uint64_t errors;
262 };
263
264 /* send a request and wait for a response
265  *
266  * @warning must be called while holding the avp->lock spinlock.
267  */
268 static int
269 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
270 {
271         unsigned int retry = AVP_MAX_REQUEST_RETRY;
272         void *resp_addr = NULL;
273         unsigned int count;
274         int ret;
275
276         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
277
278         request->result = -ENOTSUP;
279
280         /* Discard any stale responses before starting a new request */
281         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
282                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
283
284         rte_memcpy(avp->sync_addr, request, sizeof(*request));
285         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
286         if (count < 1) {
287                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
288                             request->req_id);
289                 ret = -EBUSY;
290                 goto done;
291         }
292
293         while (retry--) {
294                 /* wait for a response */
295                 usleep(AVP_REQUEST_DELAY_USECS);
296
297                 count = avp_fifo_count(avp->resp_q);
298                 if (count >= 1) {
299                         /* response received */
300                         break;
301                 }
302
303                 if ((count < 1) && (retry == 0)) {
304                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
305                                     request->req_id);
306                         ret = -ETIME;
307                         goto done;
308                 }
309         }
310
311         /* retrieve the response */
312         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
313         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
314                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
315                             count, resp_addr, avp->host_sync_addr);
316                 ret = -ENODATA;
317                 goto done;
318         }
319
320         /* copy to user buffer */
321         rte_memcpy(request, avp->sync_addr, sizeof(*request));
322         ret = 0;
323
324         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
325                     request->result, request->req_id);
326
327 done:
328         return ret;
329 }
330
331 static int
332 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
333 {
334         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
335         struct rte_avp_request request;
336         int ret;
337
338         /* setup a link state change request */
339         memset(&request, 0, sizeof(request));
340         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
341         request.if_up = state;
342
343         ret = avp_dev_process_request(avp, &request);
344
345         return ret == 0 ? request.result : ret;
346 }
347
348 static int
349 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
350                         struct rte_avp_device_config *config)
351 {
352         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
353         struct rte_avp_request request;
354         int ret;
355
356         /* setup a configure request */
357         memset(&request, 0, sizeof(request));
358         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
359         memcpy(&request.config, config, sizeof(request.config));
360
361         ret = avp_dev_process_request(avp, &request);
362
363         return ret == 0 ? request.result : ret;
364 }
365
366 static int
367 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
368 {
369         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
370         struct rte_avp_request request;
371         int ret;
372
373         /* setup a shutdown request */
374         memset(&request, 0, sizeof(request));
375         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
376
377         ret = avp_dev_process_request(avp, &request);
378
379         return ret == 0 ? request.result : ret;
380 }
381
382 /* translate from host mbuf virtual address to guest virtual address */
383 static inline void *
384 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
385 {
386         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
387                                        (uintptr_t)avp->host_mbuf_addr),
388                            (uintptr_t)avp->mbuf_addr);
389 }
390
391 /* translate from host physical address to guest virtual address */
392 static void *
393 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
394                           phys_addr_t host_phys_addr)
395 {
396         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
397         struct rte_mem_resource *resource;
398         struct rte_avp_memmap_info *info;
399         struct rte_avp_memmap *map;
400         off_t offset;
401         void *addr;
402         unsigned int i;
403
404         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
405         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
406         info = (struct rte_avp_memmap_info *)resource->addr;
407
408         offset = 0;
409         for (i = 0; i < info->nb_maps; i++) {
410                 /* search all segments looking for a matching address */
411                 map = &info->maps[i];
412
413                 if ((host_phys_addr >= map->phys_addr) &&
414                         (host_phys_addr < (map->phys_addr + map->length))) {
415                         /* address is within this segment */
416                         offset += (host_phys_addr - map->phys_addr);
417                         addr = RTE_PTR_ADD(addr, offset);
418
419                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
420                                     host_phys_addr, addr);
421
422                         return addr;
423                 }
424                 offset += map->length;
425         }
426
427         return NULL;
428 }
429
430 /* verify that the incoming device version is compatible with our version */
431 static int
432 avp_dev_version_check(uint32_t version)
433 {
434         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
435         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
436
437         if (device <= driver) {
438                 /* the host driver version is less than or equal to ours */
439                 return 0;
440         }
441
442         return 1;
443 }
444
445 /* verify that memory regions have expected version and validation markers */
446 static int
447 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
448 {
449         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
450         struct rte_avp_memmap_info *memmap;
451         struct rte_avp_device_info *info;
452         struct rte_mem_resource *resource;
453         unsigned int i;
454
455         /* Dump resource info for debug */
456         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
457                 resource = &pci_dev->mem_resource[i];
458                 if ((resource->phys_addr == 0) || (resource->len == 0))
459                         continue;
460
461                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
462                             i, resource->phys_addr,
463                             resource->len, resource->addr);
464
465                 switch (i) {
466                 case RTE_AVP_PCI_MEMMAP_BAR:
467                         memmap = (struct rte_avp_memmap_info *)resource->addr;
468                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
469                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
470                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
471                                             memmap->magic, memmap->version);
472                                 return -EINVAL;
473                         }
474                         break;
475
476                 case RTE_AVP_PCI_DEVICE_BAR:
477                         info = (struct rte_avp_device_info *)resource->addr;
478                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
479                             avp_dev_version_check(info->version)) {
480                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
481                                             info->magic, info->version,
482                                             AVP_DPDK_DRIVER_VERSION);
483                                 return -EINVAL;
484                         }
485                         break;
486
487                 case RTE_AVP_PCI_MEMORY_BAR:
488                 case RTE_AVP_PCI_MMIO_BAR:
489                         if (resource->addr == NULL) {
490                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
491                                             i);
492                                 return -EINVAL;
493                         }
494                         break;
495
496                 case RTE_AVP_PCI_MSIX_BAR:
497                 default:
498                         /* no validation required */
499                         break;
500                 }
501         }
502
503         return 0;
504 }
505
506 static int
507 avp_dev_detach(struct rte_eth_dev *eth_dev)
508 {
509         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
510         int ret;
511
512         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
513                     eth_dev->data->port_id, avp->device_id);
514
515         rte_spinlock_lock(&avp->lock);
516
517         if (avp->flags & AVP_F_DETACHED) {
518                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
519                             eth_dev->data->port_id);
520                 ret = 0;
521                 goto unlock;
522         }
523
524         /* shutdown the device first so the host stops sending us packets. */
525         ret = avp_dev_ctrl_shutdown(eth_dev);
526         if (ret < 0) {
527                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
528                             ret);
529                 avp->flags &= ~AVP_F_DETACHED;
530                 goto unlock;
531         }
532
533         avp->flags |= AVP_F_DETACHED;
534         rte_wmb();
535
536         /* wait for queues to acknowledge the presence of the detach flag */
537         rte_delay_ms(1);
538
539         ret = 0;
540
541 unlock:
542         rte_spinlock_unlock(&avp->lock);
543         return ret;
544 }
545
546 static void
547 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
548 {
549         struct avp_dev *avp =
550                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
551         struct avp_queue *rxq;
552         uint16_t queue_count;
553         uint16_t remainder;
554
555         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
556
557         /*
558          * Must map all AVP fifos as evenly as possible between the configured
559          * device queues.  Each device queue will service a subset of the AVP
560          * fifos. If there is an odd number of device queues the first set of
561          * device queues will get the extra AVP fifos.
562          */
563         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
564         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
565         if (rx_queue_id < remainder) {
566                 /* these queues must service one extra FIFO */
567                 rxq->queue_base = rx_queue_id * (queue_count + 1);
568                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
569         } else {
570                 /* these queues service the regular number of FIFO */
571                 rxq->queue_base = ((remainder * (queue_count + 1)) +
572                                    ((rx_queue_id - remainder) * queue_count));
573                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
574         }
575
576         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
577                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
578
579         rxq->queue_id = rxq->queue_base;
580 }
581
582 static void
583 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
584 {
585         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
586         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
587         struct rte_avp_device_info *host_info;
588         void *addr;
589
590         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
591         host_info = (struct rte_avp_device_info *)addr;
592
593         /*
594          * the transmit direction is not negotiated beyond respecting the max
595          * number of queues because the host can handle arbitrary guest tx
596          * queues (host rx queues).
597          */
598         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
599
600         /*
601          * the receive direction is more restrictive.  The host requires a
602          * minimum number of guest rx queues (host tx queues) therefore
603          * negotiate a value that is at least as large as the host minimum
604          * requirement.  If the host and guest values are not identical then a
605          * mapping will be established in the receive_queue_setup function.
606          */
607         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
608                                      eth_dev->data->nb_rx_queues);
609
610         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
611                     avp->num_tx_queues, avp->num_rx_queues);
612 }
613
614 static int
615 avp_dev_attach(struct rte_eth_dev *eth_dev)
616 {
617         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
618         struct rte_avp_device_config config;
619         unsigned int i;
620         int ret;
621
622         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
623                     eth_dev->data->port_id, avp->device_id);
624
625         rte_spinlock_lock(&avp->lock);
626
627         if (!(avp->flags & AVP_F_DETACHED)) {
628                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
629                             eth_dev->data->port_id);
630                 ret = 0;
631                 goto unlock;
632         }
633
634         /*
635          * make sure that the detached flag is set prior to reconfiguring the
636          * queues.
637          */
638         avp->flags |= AVP_F_DETACHED;
639         rte_wmb();
640
641         /*
642          * re-run the device create utility which will parse the new host info
643          * and setup the AVP device queue pointers.
644          */
645         ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
646         if (ret < 0) {
647                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
648                             ret);
649                 goto unlock;
650         }
651
652         if (avp->flags & AVP_F_CONFIGURED) {
653                 /*
654                  * Update the receive queue mapping to handle cases where the
655                  * source and destination hosts have different queue
656                  * requirements.  As long as the DETACHED flag is asserted the
657                  * queue table should not be referenced so it should be safe to
658                  * update it.
659                  */
660                 _avp_set_queue_counts(eth_dev);
661                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
662                         _avp_set_rx_queue_mappings(eth_dev, i);
663
664                 /*
665                  * Update the host with our config details so that it knows the
666                  * device is active.
667                  */
668                 memset(&config, 0, sizeof(config));
669                 config.device_id = avp->device_id;
670                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
671                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
672                 config.features = avp->features;
673                 config.num_tx_queues = avp->num_tx_queues;
674                 config.num_rx_queues = avp->num_rx_queues;
675                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
676
677                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
678                 if (ret < 0) {
679                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
680                                     ret);
681                         goto unlock;
682                 }
683         }
684
685         rte_wmb();
686         avp->flags &= ~AVP_F_DETACHED;
687
688         ret = 0;
689
690 unlock:
691         rte_spinlock_unlock(&avp->lock);
692         return ret;
693 }
694
695 static void
696 avp_dev_interrupt_handler(void *data)
697 {
698         struct rte_eth_dev *eth_dev = data;
699         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
700         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
701         uint32_t status, value;
702         int ret;
703
704         if (registers == NULL)
705                 rte_panic("no mapped MMIO register space\n");
706
707         /* read the interrupt status register
708          * note: this register clears on read so all raised interrupts must be
709          *    handled or remembered for later processing
710          */
711         status = AVP_READ32(
712                 RTE_PTR_ADD(registers,
713                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
714
715         if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
716                 /* handle interrupt based on current status */
717                 value = AVP_READ32(
718                         RTE_PTR_ADD(registers,
719                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
720                 switch (value) {
721                 case RTE_AVP_MIGRATION_DETACHED:
722                         ret = avp_dev_detach(eth_dev);
723                         break;
724                 case RTE_AVP_MIGRATION_ATTACHED:
725                         ret = avp_dev_attach(eth_dev);
726                         break;
727                 default:
728                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
729                                     value);
730                         ret = -EINVAL;
731                 }
732
733                 /* acknowledge the request by writing out our current status */
734                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
735                 AVP_WRITE32(value,
736                             RTE_PTR_ADD(registers,
737                                         RTE_AVP_MIGRATION_ACK_OFFSET));
738
739                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
740         }
741
742         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
743                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
744                             status);
745
746         /* re-enable UIO interrupt handling */
747         ret = rte_intr_enable(&pci_dev->intr_handle);
748         if (ret < 0) {
749                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
750                             ret);
751                 /* continue */
752         }
753 }
754
755 static int
756 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
757 {
758         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
759         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
760         int ret;
761
762         if (registers == NULL)
763                 return -EINVAL;
764
765         /* enable UIO interrupt handling */
766         ret = rte_intr_enable(&pci_dev->intr_handle);
767         if (ret < 0) {
768                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
769                             ret);
770                 return ret;
771         }
772
773         /* inform the device that all interrupts are enabled */
774         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
775                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
776
777         return 0;
778 }
779
780 static int
781 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
782 {
783         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
784         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
785         int ret;
786
787         if (registers == NULL)
788                 return 0;
789
790         /* inform the device that all interrupts are disabled */
791         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
792                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
793
794         /* enable UIO interrupt handling */
795         ret = rte_intr_disable(&pci_dev->intr_handle);
796         if (ret < 0) {
797                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
798                             ret);
799                 return ret;
800         }
801
802         return 0;
803 }
804
805 static int
806 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
807 {
808         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
809         int ret;
810
811         /* register a callback handler with UIO for interrupt notifications */
812         ret = rte_intr_callback_register(&pci_dev->intr_handle,
813                                          avp_dev_interrupt_handler,
814                                          (void *)eth_dev);
815         if (ret < 0) {
816                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
817                             ret);
818                 return ret;
819         }
820
821         /* enable interrupt processing */
822         return avp_dev_enable_interrupts(eth_dev);
823 }
824
825 static int
826 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
827 {
828         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
829         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
830         uint32_t value;
831
832         if (registers == NULL)
833                 return 0;
834
835         value = AVP_READ32(RTE_PTR_ADD(registers,
836                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
837         if (value == RTE_AVP_MIGRATION_DETACHED) {
838                 /* migration is in progress; ack it if we have not already */
839                 AVP_WRITE32(value,
840                             RTE_PTR_ADD(registers,
841                                         RTE_AVP_MIGRATION_ACK_OFFSET));
842                 return 1;
843         }
844         return 0;
845 }
846
847 /*
848  * create a AVP device using the supplied device info by first translating it
849  * to guest address space(s).
850  */
851 static int
852 avp_dev_create(struct rte_pci_device *pci_dev,
853                struct rte_eth_dev *eth_dev)
854 {
855         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
856         struct rte_avp_device_info *host_info;
857         struct rte_mem_resource *resource;
858         unsigned int i;
859
860         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
861         if (resource->addr == NULL) {
862                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
863                             RTE_AVP_PCI_DEVICE_BAR);
864                 return -EFAULT;
865         }
866         host_info = (struct rte_avp_device_info *)resource->addr;
867
868         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
869                 avp_dev_version_check(host_info->version)) {
870                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
871                             host_info->magic, host_info->version,
872                             AVP_DPDK_DRIVER_VERSION);
873                 return -EINVAL;
874         }
875
876         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
877                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
878                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
879                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
880
881         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
882                     host_info->min_tx_queues, host_info->max_tx_queues);
883         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
884                     host_info->min_rx_queues, host_info->max_rx_queues);
885         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
886                     host_info->features);
887
888         if (avp->magic != AVP_ETHDEV_MAGIC) {
889                 /*
890                  * First time initialization (i.e., not during a VM
891                  * migration)
892                  */
893                 memset(avp, 0, sizeof(*avp));
894                 avp->magic = AVP_ETHDEV_MAGIC;
895                 avp->dev_data = eth_dev->data;
896                 avp->port_id = eth_dev->data->port_id;
897                 avp->host_mbuf_size = host_info->mbuf_size;
898                 avp->host_features = host_info->features;
899                 rte_spinlock_init(&avp->lock);
900                 memcpy(&avp->ethaddr.addr_bytes[0],
901                        host_info->ethaddr, ETHER_ADDR_LEN);
902                 /* adjust max values to not exceed our max */
903                 avp->max_tx_queues =
904                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
905                 avp->max_rx_queues =
906                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
907         } else {
908                 /* Re-attaching during migration */
909
910                 /* TODO... requires validation of host values */
911                 if ((host_info->features & avp->features) != avp->features) {
912                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
913                                     avp->features, host_info->features);
914                         /* this should not be possible; continue for now */
915                 }
916         }
917
918         /* the device id is allowed to change over migrations */
919         avp->device_id = host_info->device_id;
920
921         /* translate incoming host addresses to guest address space */
922         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
923                     host_info->tx_phys);
924         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
925                     host_info->alloc_phys);
926         for (i = 0; i < avp->max_tx_queues; i++) {
927                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
928                         host_info->tx_phys + (i * host_info->tx_size));
929
930                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
931                         host_info->alloc_phys + (i * host_info->alloc_size));
932         }
933
934         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
935                     host_info->rx_phys);
936         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
937                     host_info->free_phys);
938         for (i = 0; i < avp->max_rx_queues; i++) {
939                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
940                         host_info->rx_phys + (i * host_info->rx_size));
941                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
942                         host_info->free_phys + (i * host_info->free_size));
943         }
944
945         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
946                     host_info->req_phys);
947         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
948                     host_info->resp_phys);
949         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
950                     host_info->sync_phys);
951         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
952                     host_info->mbuf_phys);
953         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
954         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
955         avp->sync_addr =
956                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
957         avp->mbuf_addr =
958                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
959
960         /*
961          * store the host mbuf virtual address so that we can calculate
962          * relative offsets for each mbuf as they are processed
963          */
964         avp->host_mbuf_addr = host_info->mbuf_va;
965         avp->host_sync_addr = host_info->sync_va;
966
967         /*
968          * store the maximum packet length that is supported by the host.
969          */
970         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
971         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
972                                 host_info->max_rx_pkt_len);
973
974         return 0;
975 }
976
977 /*
978  * This function is based on probe() function in avp_pci.c
979  * It returns 0 on success.
980  */
981 static int
982 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
983 {
984         struct avp_dev *avp =
985                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
986         struct rte_pci_device *pci_dev;
987         int ret;
988
989         pci_dev = AVP_DEV_TO_PCI(eth_dev);
990         eth_dev->dev_ops = &avp_eth_dev_ops;
991         eth_dev->rx_pkt_burst = &avp_recv_pkts;
992         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
993
994         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
995                 /*
996                  * no setup required on secondary processes.  All data is saved
997                  * in dev_private by the primary process. All resource should
998                  * be mapped to the same virtual address so all pointers should
999                  * be valid.
1000                  */
1001                 if (eth_dev->data->scattered_rx) {
1002                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1003                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1004                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1005                 }
1006                 return 0;
1007         }
1008
1009         rte_eth_copy_pci_info(eth_dev, pci_dev);
1010
1011         eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1012
1013         /* Check current migration status */
1014         if (avp_dev_migration_pending(eth_dev)) {
1015                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1016                 return -EBUSY;
1017         }
1018
1019         /* Check BAR resources */
1020         ret = avp_dev_check_regions(eth_dev);
1021         if (ret < 0) {
1022                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1023                             ret);
1024                 return ret;
1025         }
1026
1027         /* Enable interrupts */
1028         ret = avp_dev_setup_interrupts(eth_dev);
1029         if (ret < 0) {
1030                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1031                 return ret;
1032         }
1033
1034         /* Handle each subtype */
1035         ret = avp_dev_create(pci_dev, eth_dev);
1036         if (ret < 0) {
1037                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1038                 return ret;
1039         }
1040
1041         /* Allocate memory for storing MAC addresses */
1042         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1043         if (eth_dev->data->mac_addrs == NULL) {
1044                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1045                             ETHER_ADDR_LEN);
1046                 return -ENOMEM;
1047         }
1048
1049         /* Get a mac from device config */
1050         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1051
1052         return 0;
1053 }
1054
1055 static int
1056 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1057 {
1058         int ret;
1059
1060         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1061                 return -EPERM;
1062
1063         if (eth_dev->data == NULL)
1064                 return 0;
1065
1066         ret = avp_dev_disable_interrupts(eth_dev);
1067         if (ret != 0) {
1068                 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1069                 return ret;
1070         }
1071
1072         if (eth_dev->data->mac_addrs != NULL) {
1073                 rte_free(eth_dev->data->mac_addrs);
1074                 eth_dev->data->mac_addrs = NULL;
1075         }
1076
1077         return 0;
1078 }
1079
1080 static int
1081 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1082                   struct rte_pci_device *pci_dev)
1083 {
1084         struct rte_eth_dev *eth_dev;
1085         int ret;
1086
1087         eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1088                                            sizeof(struct avp_adapter));
1089         if (eth_dev == NULL)
1090                 return -ENOMEM;
1091
1092         ret = eth_avp_dev_init(eth_dev);
1093         if (ret)
1094                 rte_eth_dev_pci_release(eth_dev);
1095
1096         return ret;
1097 }
1098
1099 static int
1100 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1101 {
1102         return rte_eth_dev_pci_generic_remove(pci_dev,
1103                                               eth_avp_dev_uninit);
1104 }
1105
1106 static struct rte_pci_driver rte_avp_pmd = {
1107         .id_table = pci_id_avp_map,
1108         .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1109         .probe = eth_avp_pci_probe,
1110         .remove = eth_avp_pci_remove,
1111 };
1112
1113 static int
1114 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1115                          struct avp_dev *avp)
1116 {
1117         unsigned int max_rx_pkt_len;
1118
1119         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1120
1121         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1122             (max_rx_pkt_len > avp->host_mbuf_size)) {
1123                 /*
1124                  * If the guest MTU is greater than either the host or guest
1125                  * buffers then chained mbufs have to be enabled in the TX
1126                  * direction.  It is assumed that the application will not need
1127                  * to send packets larger than their max_rx_pkt_len (MRU).
1128                  */
1129                 return 1;
1130         }
1131
1132         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1133             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1134                 /*
1135                  * If the host MRU is greater than its own mbuf size or the
1136                  * guest mbuf size then chained mbufs have to be enabled in the
1137                  * RX direction.
1138                  */
1139                 return 1;
1140         }
1141
1142         return 0;
1143 }
1144
1145 static int
1146 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1147                        uint16_t rx_queue_id,
1148                        uint16_t nb_rx_desc,
1149                        unsigned int socket_id,
1150                        const struct rte_eth_rxconf *rx_conf,
1151                        struct rte_mempool *pool)
1152 {
1153         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1154         struct rte_pktmbuf_pool_private *mbp_priv;
1155         struct avp_queue *rxq;
1156
1157         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1158                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1159                             rx_queue_id, eth_dev->data->nb_rx_queues);
1160                 return -EINVAL;
1161         }
1162
1163         /* Save mbuf pool pointer */
1164         avp->pool = pool;
1165
1166         /* Save the local mbuf size */
1167         mbp_priv = rte_mempool_get_priv(pool);
1168         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1169         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1170
1171         if (avp_dev_enable_scattered(eth_dev, avp)) {
1172                 if (!eth_dev->data->scattered_rx) {
1173                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1174                         eth_dev->data->scattered_rx = 1;
1175                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1176                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1177                 }
1178         }
1179
1180         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1181                     avp->max_rx_pkt_len,
1182                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1183                     avp->host_mbuf_size,
1184                     avp->guest_mbuf_size);
1185
1186         /* allocate a queue object */
1187         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1188                                  RTE_CACHE_LINE_SIZE, socket_id);
1189         if (rxq == NULL) {
1190                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1191                 return -ENOMEM;
1192         }
1193
1194         /* save back pointers to AVP and Ethernet devices */
1195         rxq->avp = avp;
1196         rxq->dev_data = eth_dev->data;
1197         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1198
1199         /* setup the queue receive mapping for the current queue. */
1200         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1201
1202         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1203
1204         (void)nb_rx_desc;
1205         (void)rx_conf;
1206         return 0;
1207 }
1208
1209 static int
1210 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1211                        uint16_t tx_queue_id,
1212                        uint16_t nb_tx_desc,
1213                        unsigned int socket_id,
1214                        const struct rte_eth_txconf *tx_conf)
1215 {
1216         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1217         struct avp_queue *txq;
1218
1219         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1220                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1221                             tx_queue_id, eth_dev->data->nb_tx_queues);
1222                 return -EINVAL;
1223         }
1224
1225         /* allocate a queue object */
1226         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1227                                  RTE_CACHE_LINE_SIZE, socket_id);
1228         if (txq == NULL) {
1229                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1230                 return -ENOMEM;
1231         }
1232
1233         /* only the configured set of transmit queues are used */
1234         txq->queue_id = tx_queue_id;
1235         txq->queue_base = tx_queue_id;
1236         txq->queue_limit = tx_queue_id;
1237
1238         /* save back pointers to AVP and Ethernet devices */
1239         txq->avp = avp;
1240         txq->dev_data = eth_dev->data;
1241         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1242
1243         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1244
1245         (void)nb_tx_desc;
1246         (void)tx_conf;
1247         return 0;
1248 }
1249
1250 static inline int
1251 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1252 {
1253         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1254         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1255         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1256 }
1257
1258 static inline int
1259 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1260 {
1261         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1262
1263         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1264                 /* allow all packets destined to our address */
1265                 return 0;
1266         }
1267
1268         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1269                 /* allow all broadcast packets */
1270                 return 0;
1271         }
1272
1273         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1274                 /* allow all multicast packets */
1275                 return 0;
1276         }
1277
1278         if (avp->flags & AVP_F_PROMISC) {
1279                 /* allow all packets when in promiscuous mode */
1280                 return 0;
1281         }
1282
1283         return -1;
1284 }
1285
1286 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1287 static inline void
1288 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1289 {
1290         struct rte_avp_desc *first_buf;
1291         struct rte_avp_desc *pkt_buf;
1292         unsigned int pkt_len;
1293         unsigned int nb_segs;
1294         void *pkt_data;
1295         unsigned int i;
1296
1297         first_buf = avp_dev_translate_buffer(avp, buf);
1298
1299         i = 0;
1300         pkt_len = 0;
1301         nb_segs = first_buf->nb_segs;
1302         do {
1303                 /* Adjust pointers for guest addressing */
1304                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1305                 if (pkt_buf == NULL)
1306                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1307                                   i, buf);
1308                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1309                 if (pkt_data == NULL)
1310                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1311                                   i);
1312                 if (pkt_buf->data_len == 0)
1313                         rte_panic("bad buffer: segment %u has 0 data length\n",
1314                                   i);
1315                 pkt_len += pkt_buf->data_len;
1316                 nb_segs--;
1317                 i++;
1318
1319         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1320
1321         if (nb_segs != 0)
1322                 rte_panic("bad buffer: expected %u segments found %u\n",
1323                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1324         if (pkt_len != first_buf->pkt_len)
1325                 rte_panic("bad buffer: expected length %u found %u\n",
1326                           first_buf->pkt_len, pkt_len);
1327 }
1328
1329 #define avp_dev_buffer_sanity_check(a, b) \
1330         __avp_dev_buffer_sanity_check((a), (b))
1331
1332 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1333
1334 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1335
1336 #endif
1337
1338 /*
1339  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1340  * there exactly the required number of mbufs to copy all source bytes.
1341  */
1342 static inline struct rte_mbuf *
1343 avp_dev_copy_from_buffers(struct avp_dev *avp,
1344                           struct rte_avp_desc *buf,
1345                           struct rte_mbuf **mbufs,
1346                           unsigned int count)
1347 {
1348         struct rte_mbuf *m_previous = NULL;
1349         struct rte_avp_desc *pkt_buf;
1350         unsigned int total_length = 0;
1351         unsigned int copy_length;
1352         unsigned int src_offset;
1353         struct rte_mbuf *m;
1354         uint16_t ol_flags;
1355         uint16_t vlan_tci;
1356         void *pkt_data;
1357         unsigned int i;
1358
1359         avp_dev_buffer_sanity_check(avp, buf);
1360
1361         /* setup the first source buffer */
1362         pkt_buf = avp_dev_translate_buffer(avp, buf);
1363         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1364         total_length = pkt_buf->pkt_len;
1365         src_offset = 0;
1366
1367         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1368                 ol_flags = PKT_RX_VLAN_PKT;
1369                 vlan_tci = pkt_buf->vlan_tci;
1370         } else {
1371                 ol_flags = 0;
1372                 vlan_tci = 0;
1373         }
1374
1375         for (i = 0; (i < count) && (buf != NULL); i++) {
1376                 /* fill each destination buffer */
1377                 m = mbufs[i];
1378
1379                 if (m_previous != NULL)
1380                         m_previous->next = m;
1381
1382                 m_previous = m;
1383
1384                 do {
1385                         /*
1386                          * Copy as many source buffers as will fit in the
1387                          * destination buffer.
1388                          */
1389                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1390                                                rte_pktmbuf_data_len(m)),
1391                                               (pkt_buf->data_len -
1392                                                src_offset));
1393                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1394                                                rte_pktmbuf_data_len(m)),
1395                                    RTE_PTR_ADD(pkt_data, src_offset),
1396                                    copy_length);
1397                         rte_pktmbuf_data_len(m) += copy_length;
1398                         src_offset += copy_length;
1399
1400                         if (likely(src_offset == pkt_buf->data_len)) {
1401                                 /* need a new source buffer */
1402                                 buf = pkt_buf->next;
1403                                 if (buf != NULL) {
1404                                         pkt_buf = avp_dev_translate_buffer(
1405                                                 avp, buf);
1406                                         pkt_data = avp_dev_translate_buffer(
1407                                                 avp, pkt_buf->data);
1408                                         src_offset = 0;
1409                                 }
1410                         }
1411
1412                         if (unlikely(rte_pktmbuf_data_len(m) ==
1413                                      avp->guest_mbuf_size)) {
1414                                 /* need a new destination mbuf */
1415                                 break;
1416                         }
1417
1418                 } while (buf != NULL);
1419         }
1420
1421         m = mbufs[0];
1422         m->ol_flags = ol_flags;
1423         m->nb_segs = count;
1424         rte_pktmbuf_pkt_len(m) = total_length;
1425         m->vlan_tci = vlan_tci;
1426
1427         __rte_mbuf_sanity_check(m, 1);
1428
1429         return m;
1430 }
1431
1432 static uint16_t
1433 avp_recv_scattered_pkts(void *rx_queue,
1434                         struct rte_mbuf **rx_pkts,
1435                         uint16_t nb_pkts)
1436 {
1437         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1438         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1439         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1440         struct avp_dev *avp = rxq->avp;
1441         struct rte_avp_desc *pkt_buf;
1442         struct rte_avp_fifo *free_q;
1443         struct rte_avp_fifo *rx_q;
1444         struct rte_avp_desc *buf;
1445         unsigned int count, avail, n;
1446         unsigned int guest_mbuf_size;
1447         struct rte_mbuf *m;
1448         unsigned int required;
1449         unsigned int buf_len;
1450         unsigned int port_id;
1451         unsigned int i;
1452
1453         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1454                 /* VM live migration in progress */
1455                 return 0;
1456         }
1457
1458         guest_mbuf_size = avp->guest_mbuf_size;
1459         port_id = avp->port_id;
1460         rx_q = avp->rx_q[rxq->queue_id];
1461         free_q = avp->free_q[rxq->queue_id];
1462
1463         /* setup next queue to service */
1464         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1465                 (rxq->queue_id + 1) : rxq->queue_base;
1466
1467         /* determine how many slots are available in the free queue */
1468         count = avp_fifo_free_count(free_q);
1469
1470         /* determine how many packets are available in the rx queue */
1471         avail = avp_fifo_count(rx_q);
1472
1473         /* determine how many packets can be received */
1474         count = RTE_MIN(count, avail);
1475         count = RTE_MIN(count, nb_pkts);
1476         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1477
1478         if (unlikely(count == 0)) {
1479                 /* no free buffers, or no buffers on the rx queue */
1480                 return 0;
1481         }
1482
1483         /* retrieve pending packets */
1484         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1485         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1486                    count, rx_q);
1487
1488         count = 0;
1489         for (i = 0; i < n; i++) {
1490                 /* prefetch next entry while processing current one */
1491                 if (i + 1 < n) {
1492                         pkt_buf = avp_dev_translate_buffer(avp,
1493                                                            avp_bufs[i + 1]);
1494                         rte_prefetch0(pkt_buf);
1495                 }
1496                 buf = avp_bufs[i];
1497
1498                 /* Peek into the first buffer to determine the total length */
1499                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1500                 buf_len = pkt_buf->pkt_len;
1501
1502                 /* Allocate enough mbufs to receive the entire packet */
1503                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1504                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1505                         rxq->dev_data->rx_mbuf_alloc_failed++;
1506                         continue;
1507                 }
1508
1509                 /* Copy the data from the buffers to our mbufs */
1510                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1511
1512                 /* finalize mbuf */
1513                 m->port = port_id;
1514
1515                 if (_avp_mac_filter(avp, m) != 0) {
1516                         /* silently discard packets not destined to our MAC */
1517                         rte_pktmbuf_free(m);
1518                         continue;
1519                 }
1520
1521                 /* return new mbuf to caller */
1522                 rx_pkts[count++] = m;
1523                 rxq->bytes += buf_len;
1524         }
1525
1526         rxq->packets += count;
1527
1528         /* return the buffers to the free queue */
1529         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1530
1531         return count;
1532 }
1533
1534
1535 static uint16_t
1536 avp_recv_pkts(void *rx_queue,
1537               struct rte_mbuf **rx_pkts,
1538               uint16_t nb_pkts)
1539 {
1540         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1541         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1542         struct avp_dev *avp = rxq->avp;
1543         struct rte_avp_desc *pkt_buf;
1544         struct rte_avp_fifo *free_q;
1545         struct rte_avp_fifo *rx_q;
1546         unsigned int count, avail, n;
1547         unsigned int pkt_len;
1548         struct rte_mbuf *m;
1549         char *pkt_data;
1550         unsigned int i;
1551
1552         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1553                 /* VM live migration in progress */
1554                 return 0;
1555         }
1556
1557         rx_q = avp->rx_q[rxq->queue_id];
1558         free_q = avp->free_q[rxq->queue_id];
1559
1560         /* setup next queue to service */
1561         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1562                 (rxq->queue_id + 1) : rxq->queue_base;
1563
1564         /* determine how many slots are available in the free queue */
1565         count = avp_fifo_free_count(free_q);
1566
1567         /* determine how many packets are available in the rx queue */
1568         avail = avp_fifo_count(rx_q);
1569
1570         /* determine how many packets can be received */
1571         count = RTE_MIN(count, avail);
1572         count = RTE_MIN(count, nb_pkts);
1573         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1574
1575         if (unlikely(count == 0)) {
1576                 /* no free buffers, or no buffers on the rx queue */
1577                 return 0;
1578         }
1579
1580         /* retrieve pending packets */
1581         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1582         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1583                    count, rx_q);
1584
1585         count = 0;
1586         for (i = 0; i < n; i++) {
1587                 /* prefetch next entry while processing current one */
1588                 if (i < n - 1) {
1589                         pkt_buf = avp_dev_translate_buffer(avp,
1590                                                            avp_bufs[i + 1]);
1591                         rte_prefetch0(pkt_buf);
1592                 }
1593
1594                 /* Adjust host pointers for guest addressing */
1595                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1596                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1597                 pkt_len = pkt_buf->pkt_len;
1598
1599                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1600                              (pkt_buf->nb_segs > 1))) {
1601                         /*
1602                          * application should be using the scattered receive
1603                          * function
1604                          */
1605                         rxq->errors++;
1606                         continue;
1607                 }
1608
1609                 /* process each packet to be transmitted */
1610                 m = rte_pktmbuf_alloc(avp->pool);
1611                 if (unlikely(m == NULL)) {
1612                         rxq->dev_data->rx_mbuf_alloc_failed++;
1613                         continue;
1614                 }
1615
1616                 /* copy data out of the host buffer to our buffer */
1617                 m->data_off = RTE_PKTMBUF_HEADROOM;
1618                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1619
1620                 /* initialize the local mbuf */
1621                 rte_pktmbuf_data_len(m) = pkt_len;
1622                 rte_pktmbuf_pkt_len(m) = pkt_len;
1623                 m->port = avp->port_id;
1624
1625                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1626                         m->ol_flags = PKT_RX_VLAN_PKT;
1627                         m->vlan_tci = pkt_buf->vlan_tci;
1628                 }
1629
1630                 if (_avp_mac_filter(avp, m) != 0) {
1631                         /* silently discard packets not destined to our MAC */
1632                         rte_pktmbuf_free(m);
1633                         continue;
1634                 }
1635
1636                 /* return new mbuf to caller */
1637                 rx_pkts[count++] = m;
1638                 rxq->bytes += pkt_len;
1639         }
1640
1641         rxq->packets += count;
1642
1643         /* return the buffers to the free queue */
1644         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1645
1646         return count;
1647 }
1648
1649 /*
1650  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1651  * there are sufficient destination buffers to contain the entire source
1652  * packet.
1653  */
1654 static inline uint16_t
1655 avp_dev_copy_to_buffers(struct avp_dev *avp,
1656                         struct rte_mbuf *mbuf,
1657                         struct rte_avp_desc **buffers,
1658                         unsigned int count)
1659 {
1660         struct rte_avp_desc *previous_buf = NULL;
1661         struct rte_avp_desc *first_buf = NULL;
1662         struct rte_avp_desc *pkt_buf;
1663         struct rte_avp_desc *buf;
1664         size_t total_length;
1665         struct rte_mbuf *m;
1666         size_t copy_length;
1667         size_t src_offset;
1668         char *pkt_data;
1669         unsigned int i;
1670
1671         __rte_mbuf_sanity_check(mbuf, 1);
1672
1673         m = mbuf;
1674         src_offset = 0;
1675         total_length = rte_pktmbuf_pkt_len(m);
1676         for (i = 0; (i < count) && (m != NULL); i++) {
1677                 /* fill each destination buffer */
1678                 buf = buffers[i];
1679
1680                 if (i < count - 1) {
1681                         /* prefetch next entry while processing this one */
1682                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1683                         rte_prefetch0(pkt_buf);
1684                 }
1685
1686                 /* Adjust pointers for guest addressing */
1687                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1688                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1689
1690                 /* setup the buffer chain */
1691                 if (previous_buf != NULL)
1692                         previous_buf->next = buf;
1693                 else
1694                         first_buf = pkt_buf;
1695
1696                 previous_buf = pkt_buf;
1697
1698                 do {
1699                         /*
1700                          * copy as many source mbuf segments as will fit in the
1701                          * destination buffer.
1702                          */
1703                         copy_length = RTE_MIN((avp->host_mbuf_size -
1704                                                pkt_buf->data_len),
1705                                               (rte_pktmbuf_data_len(m) -
1706                                                src_offset));
1707                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1708                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1709                                                src_offset),
1710                                    copy_length);
1711                         pkt_buf->data_len += copy_length;
1712                         src_offset += copy_length;
1713
1714                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1715                                 /* need a new source buffer */
1716                                 m = m->next;
1717                                 src_offset = 0;
1718                         }
1719
1720                         if (unlikely(pkt_buf->data_len ==
1721                                      avp->host_mbuf_size)) {
1722                                 /* need a new destination buffer */
1723                                 break;
1724                         }
1725
1726                 } while (m != NULL);
1727         }
1728
1729         first_buf->nb_segs = count;
1730         first_buf->pkt_len = total_length;
1731
1732         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1733                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1734                 first_buf->vlan_tci = mbuf->vlan_tci;
1735         }
1736
1737         avp_dev_buffer_sanity_check(avp, buffers[0]);
1738
1739         return total_length;
1740 }
1741
1742
1743 static uint16_t
1744 avp_xmit_scattered_pkts(void *tx_queue,
1745                         struct rte_mbuf **tx_pkts,
1746                         uint16_t nb_pkts)
1747 {
1748         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1749                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1750         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1751         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1752         struct avp_dev *avp = txq->avp;
1753         struct rte_avp_fifo *alloc_q;
1754         struct rte_avp_fifo *tx_q;
1755         unsigned int count, avail, n;
1756         unsigned int orig_nb_pkts;
1757         struct rte_mbuf *m;
1758         unsigned int required;
1759         unsigned int segments;
1760         unsigned int tx_bytes;
1761         unsigned int i;
1762
1763         orig_nb_pkts = nb_pkts;
1764         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1765                 /* VM live migration in progress */
1766                 /* TODO ... buffer for X packets then drop? */
1767                 txq->errors += nb_pkts;
1768                 return 0;
1769         }
1770
1771         tx_q = avp->tx_q[txq->queue_id];
1772         alloc_q = avp->alloc_q[txq->queue_id];
1773
1774         /* limit the number of transmitted packets to the max burst size */
1775         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1776                 nb_pkts = AVP_MAX_TX_BURST;
1777
1778         /* determine how many buffers are available to copy into */
1779         avail = avp_fifo_count(alloc_q);
1780         if (unlikely(avail > (AVP_MAX_TX_BURST *
1781                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1782                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1783
1784         /* determine how many slots are available in the transmit queue */
1785         count = avp_fifo_free_count(tx_q);
1786
1787         /* determine how many packets can be sent */
1788         nb_pkts = RTE_MIN(count, nb_pkts);
1789
1790         /* determine how many packets will fit in the available buffers */
1791         count = 0;
1792         segments = 0;
1793         for (i = 0; i < nb_pkts; i++) {
1794                 m = tx_pkts[i];
1795                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1796                         /* prefetch next entry while processing this one */
1797                         rte_prefetch0(tx_pkts[i + 1]);
1798                 }
1799                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1800                         avp->host_mbuf_size;
1801
1802                 if (unlikely((required == 0) ||
1803                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1804                         break;
1805                 else if (unlikely(required + segments > avail))
1806                         break;
1807                 segments += required;
1808                 count++;
1809         }
1810         nb_pkts = count;
1811
1812         if (unlikely(nb_pkts == 0)) {
1813                 /* no available buffers, or no space on the tx queue */
1814                 txq->errors += orig_nb_pkts;
1815                 return 0;
1816         }
1817
1818         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1819                    nb_pkts, tx_q);
1820
1821         /* retrieve sufficient send buffers */
1822         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1823         if (unlikely(n != segments)) {
1824                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1825                            "n=%u, segments=%u, orig=%u\n",
1826                            n, segments, orig_nb_pkts);
1827                 txq->errors += orig_nb_pkts;
1828                 return 0;
1829         }
1830
1831         tx_bytes = 0;
1832         count = 0;
1833         for (i = 0; i < nb_pkts; i++) {
1834                 /* process each packet to be transmitted */
1835                 m = tx_pkts[i];
1836
1837                 /* determine how many buffers are required for this packet */
1838                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1839                         avp->host_mbuf_size;
1840
1841                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1842                                                     &avp_bufs[count], required);
1843                 tx_bufs[i] = avp_bufs[count];
1844                 count += required;
1845
1846                 /* free the original mbuf */
1847                 rte_pktmbuf_free(m);
1848         }
1849
1850         txq->packets += nb_pkts;
1851         txq->bytes += tx_bytes;
1852
1853 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1854         for (i = 0; i < nb_pkts; i++)
1855                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1856 #endif
1857
1858         /* send the packets */
1859         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1860         if (unlikely(n != orig_nb_pkts))
1861                 txq->errors += (orig_nb_pkts - n);
1862
1863         return n;
1864 }
1865
1866
1867 static uint16_t
1868 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1869 {
1870         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1871         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1872         struct avp_dev *avp = txq->avp;
1873         struct rte_avp_desc *pkt_buf;
1874         struct rte_avp_fifo *alloc_q;
1875         struct rte_avp_fifo *tx_q;
1876         unsigned int count, avail, n;
1877         struct rte_mbuf *m;
1878         unsigned int pkt_len;
1879         unsigned int tx_bytes;
1880         char *pkt_data;
1881         unsigned int i;
1882
1883         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1884                 /* VM live migration in progress */
1885                 /* TODO ... buffer for X packets then drop?! */
1886                 txq->errors++;
1887                 return 0;
1888         }
1889
1890         tx_q = avp->tx_q[txq->queue_id];
1891         alloc_q = avp->alloc_q[txq->queue_id];
1892
1893         /* limit the number of transmitted packets to the max burst size */
1894         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1895                 nb_pkts = AVP_MAX_TX_BURST;
1896
1897         /* determine how many buffers are available to copy into */
1898         avail = avp_fifo_count(alloc_q);
1899
1900         /* determine how many slots are available in the transmit queue */
1901         count = avp_fifo_free_count(tx_q);
1902
1903         /* determine how many packets can be sent */
1904         count = RTE_MIN(count, avail);
1905         count = RTE_MIN(count, nb_pkts);
1906
1907         if (unlikely(count == 0)) {
1908                 /* no available buffers, or no space on the tx queue */
1909                 txq->errors += nb_pkts;
1910                 return 0;
1911         }
1912
1913         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1914                    count, tx_q);
1915
1916         /* retrieve sufficient send buffers */
1917         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1918         if (unlikely(n != count)) {
1919                 txq->errors++;
1920                 return 0;
1921         }
1922
1923         tx_bytes = 0;
1924         for (i = 0; i < count; i++) {
1925                 /* prefetch next entry while processing the current one */
1926                 if (i < count - 1) {
1927                         pkt_buf = avp_dev_translate_buffer(avp,
1928                                                            avp_bufs[i + 1]);
1929                         rte_prefetch0(pkt_buf);
1930                 }
1931
1932                 /* process each packet to be transmitted */
1933                 m = tx_pkts[i];
1934
1935                 /* Adjust pointers for guest addressing */
1936                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1937                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1938                 pkt_len = rte_pktmbuf_pkt_len(m);
1939
1940                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1941                                          (pkt_len > avp->host_mbuf_size))) {
1942                         /*
1943                          * application should be using the scattered transmit
1944                          * function; send it truncated to avoid the performance
1945                          * hit of having to manage returning the already
1946                          * allocated buffer to the free list.  This should not
1947                          * happen since the application should have set the
1948                          * max_rx_pkt_len based on its MTU and it should be
1949                          * policing its own packet sizes.
1950                          */
1951                         txq->errors++;
1952                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1953                                           avp->host_mbuf_size);
1954                 }
1955
1956                 /* copy data out of our mbuf and into the AVP buffer */
1957                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1958                 pkt_buf->pkt_len = pkt_len;
1959                 pkt_buf->data_len = pkt_len;
1960                 pkt_buf->nb_segs = 1;
1961                 pkt_buf->next = NULL;
1962
1963                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1964                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1965                         pkt_buf->vlan_tci = m->vlan_tci;
1966                 }
1967
1968                 tx_bytes += pkt_len;
1969
1970                 /* free the original mbuf */
1971                 rte_pktmbuf_free(m);
1972         }
1973
1974         txq->packets += count;
1975         txq->bytes += tx_bytes;
1976
1977         /* send the packets */
1978         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1979
1980         return n;
1981 }
1982
1983 static void
1984 avp_dev_rx_queue_release(void *rx_queue)
1985 {
1986         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1987         struct avp_dev *avp = rxq->avp;
1988         struct rte_eth_dev_data *data = avp->dev_data;
1989         unsigned int i;
1990
1991         for (i = 0; i < avp->num_rx_queues; i++) {
1992                 if (data->rx_queues[i] == rxq)
1993                         data->rx_queues[i] = NULL;
1994         }
1995 }
1996
1997 static void
1998 avp_dev_tx_queue_release(void *tx_queue)
1999 {
2000         struct avp_queue *txq = (struct avp_queue *)tx_queue;
2001         struct avp_dev *avp = txq->avp;
2002         struct rte_eth_dev_data *data = avp->dev_data;
2003         unsigned int i;
2004
2005         for (i = 0; i < avp->num_tx_queues; i++) {
2006                 if (data->tx_queues[i] == txq)
2007                         data->tx_queues[i] = NULL;
2008         }
2009 }
2010
2011 static int
2012 avp_dev_configure(struct rte_eth_dev *eth_dev)
2013 {
2014         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
2015         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2016         struct rte_avp_device_info *host_info;
2017         struct rte_avp_device_config config;
2018         int mask = 0;
2019         void *addr;
2020         int ret;
2021
2022         rte_spinlock_lock(&avp->lock);
2023         if (avp->flags & AVP_F_DETACHED) {
2024                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2025                 ret = -ENOTSUP;
2026                 goto unlock;
2027         }
2028
2029         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2030         host_info = (struct rte_avp_device_info *)addr;
2031
2032         /* Setup required number of queues */
2033         _avp_set_queue_counts(eth_dev);
2034
2035         mask = (ETH_VLAN_STRIP_MASK |
2036                 ETH_VLAN_FILTER_MASK |
2037                 ETH_VLAN_EXTEND_MASK);
2038         avp_vlan_offload_set(eth_dev, mask);
2039
2040         /* update device config */
2041         memset(&config, 0, sizeof(config));
2042         config.device_id = host_info->device_id;
2043         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2044         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2045         config.features = avp->features;
2046         config.num_tx_queues = avp->num_tx_queues;
2047         config.num_rx_queues = avp->num_rx_queues;
2048
2049         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2050         if (ret < 0) {
2051                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2052                             ret);
2053                 goto unlock;
2054         }
2055
2056         avp->flags |= AVP_F_CONFIGURED;
2057         ret = 0;
2058
2059 unlock:
2060         rte_spinlock_unlock(&avp->lock);
2061         return ret;
2062 }
2063
2064 static int
2065 avp_dev_start(struct rte_eth_dev *eth_dev)
2066 {
2067         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2068         int ret;
2069
2070         rte_spinlock_lock(&avp->lock);
2071         if (avp->flags & AVP_F_DETACHED) {
2072                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2073                 ret = -ENOTSUP;
2074                 goto unlock;
2075         }
2076
2077         /* disable features that we do not support */
2078         eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2079         eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2080         eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2081         eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2082
2083         /* update link state */
2084         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2085         if (ret < 0) {
2086                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2087                             ret);
2088                 goto unlock;
2089         }
2090
2091         /* remember current link state */
2092         avp->flags |= AVP_F_LINKUP;
2093
2094         ret = 0;
2095
2096 unlock:
2097         rte_spinlock_unlock(&avp->lock);
2098         return ret;
2099 }
2100
2101 static void
2102 avp_dev_stop(struct rte_eth_dev *eth_dev)
2103 {
2104         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2105         int ret;
2106
2107         rte_spinlock_lock(&avp->lock);
2108         if (avp->flags & AVP_F_DETACHED) {
2109                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2110                 goto unlock;
2111         }
2112
2113         /* remember current link state */
2114         avp->flags &= ~AVP_F_LINKUP;
2115
2116         /* update link state */
2117         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2118         if (ret < 0) {
2119                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2120                             ret);
2121         }
2122
2123 unlock:
2124         rte_spinlock_unlock(&avp->lock);
2125 }
2126
2127 static void
2128 avp_dev_close(struct rte_eth_dev *eth_dev)
2129 {
2130         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2131         int ret;
2132
2133         rte_spinlock_lock(&avp->lock);
2134         if (avp->flags & AVP_F_DETACHED) {
2135                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2136                 goto unlock;
2137         }
2138
2139         /* remember current link state */
2140         avp->flags &= ~AVP_F_LINKUP;
2141         avp->flags &= ~AVP_F_CONFIGURED;
2142
2143         ret = avp_dev_disable_interrupts(eth_dev);
2144         if (ret < 0) {
2145                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2146                 /* continue */
2147         }
2148
2149         /* update device state */
2150         ret = avp_dev_ctrl_shutdown(eth_dev);
2151         if (ret < 0) {
2152                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2153                             ret);
2154                 /* continue */
2155         }
2156
2157 unlock:
2158         rte_spinlock_unlock(&avp->lock);
2159 }
2160
2161 static int
2162 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2163                                         __rte_unused int wait_to_complete)
2164 {
2165         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2166         struct rte_eth_link *link = &eth_dev->data->dev_link;
2167
2168         link->link_speed = ETH_SPEED_NUM_10G;
2169         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2170         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2171
2172         return -1;
2173 }
2174
2175 static void
2176 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2177 {
2178         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2179
2180         rte_spinlock_lock(&avp->lock);
2181         if ((avp->flags & AVP_F_PROMISC) == 0) {
2182                 avp->flags |= AVP_F_PROMISC;
2183                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2184                             eth_dev->data->port_id);
2185         }
2186         rte_spinlock_unlock(&avp->lock);
2187 }
2188
2189 static void
2190 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2191 {
2192         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2193
2194         rte_spinlock_lock(&avp->lock);
2195         if ((avp->flags & AVP_F_PROMISC) != 0) {
2196                 avp->flags &= ~AVP_F_PROMISC;
2197                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2198                             eth_dev->data->port_id);
2199         }
2200         rte_spinlock_unlock(&avp->lock);
2201 }
2202
2203 static void
2204 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2205                  struct rte_eth_dev_info *dev_info)
2206 {
2207         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2208
2209         dev_info->driver_name = "rte_avp_pmd";
2210         dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2211         dev_info->max_rx_queues = avp->max_rx_queues;
2212         dev_info->max_tx_queues = avp->max_tx_queues;
2213         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2214         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2215         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2216         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2217                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2218                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2219         }
2220 }
2221
2222 static void
2223 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2224 {
2225         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2226
2227         if (mask & ETH_VLAN_STRIP_MASK) {
2228                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2229                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2230                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2231                         else
2232                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2233                 } else {
2234                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2235                 }
2236         }
2237
2238         if (mask & ETH_VLAN_FILTER_MASK) {
2239                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2240                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2241         }
2242
2243         if (mask & ETH_VLAN_EXTEND_MASK) {
2244                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2245                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2246         }
2247 }
2248
2249 static void
2250 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2251 {
2252         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2253         unsigned int i;
2254
2255         for (i = 0; i < avp->num_rx_queues; i++) {
2256                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2257
2258                 if (rxq) {
2259                         stats->ipackets += rxq->packets;
2260                         stats->ibytes += rxq->bytes;
2261                         stats->ierrors += rxq->errors;
2262
2263                         stats->q_ipackets[i] += rxq->packets;
2264                         stats->q_ibytes[i] += rxq->bytes;
2265                         stats->q_errors[i] += rxq->errors;
2266                 }
2267         }
2268
2269         for (i = 0; i < avp->num_tx_queues; i++) {
2270                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2271
2272                 if (txq) {
2273                         stats->opackets += txq->packets;
2274                         stats->obytes += txq->bytes;
2275                         stats->oerrors += txq->errors;
2276
2277                         stats->q_opackets[i] += txq->packets;
2278                         stats->q_obytes[i] += txq->bytes;
2279                         stats->q_errors[i] += txq->errors;
2280                 }
2281         }
2282 }
2283
2284 static void
2285 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2286 {
2287         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2288         unsigned int i;
2289
2290         for (i = 0; i < avp->num_rx_queues; i++) {
2291                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2292
2293                 if (rxq) {
2294                         rxq->bytes = 0;
2295                         rxq->packets = 0;
2296                         rxq->errors = 0;
2297                 }
2298         }
2299
2300         for (i = 0; i < avp->num_tx_queues; i++) {
2301                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2302
2303                 if (txq) {
2304                         txq->bytes = 0;
2305                         txq->packets = 0;
2306                         txq->errors = 0;
2307                 }
2308         }
2309 }
2310
2311 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2312 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);