310cbefcf20bbb53b4c6b0f5eef023a40bf87954
[deb_dpdk.git] / drivers / net / vhost / rte_eth_vhost.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) 2016 IGEL Co., Ltd.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of IGEL Co.,Ltd. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <unistd.h>
34 #include <pthread.h>
35 #include <stdbool.h>
36 #ifdef RTE_LIBRTE_VHOST_NUMA
37 #include <numaif.h>
38 #endif
39
40 #include <rte_mbuf.h>
41 #include <rte_ethdev.h>
42 #include <rte_malloc.h>
43 #include <rte_memcpy.h>
44 #include <rte_dev.h>
45 #include <rte_kvargs.h>
46 #include <rte_virtio_net.h>
47 #include <rte_spinlock.h>
48
49 #include "rte_eth_vhost.h"
50
51 #define ETH_VHOST_IFACE_ARG             "iface"
52 #define ETH_VHOST_QUEUES_ARG            "queues"
53
54 static const char *drivername = "VHOST PMD";
55
56 static const char *valid_arguments[] = {
57         ETH_VHOST_IFACE_ARG,
58         ETH_VHOST_QUEUES_ARG,
59         NULL
60 };
61
62 static struct ether_addr base_eth_addr = {
63         .addr_bytes = {
64                 0x56 /* V */,
65                 0x48 /* H */,
66                 0x4F /* O */,
67                 0x53 /* S */,
68                 0x54 /* T */,
69                 0x00
70         }
71 };
72
73 struct vhost_queue {
74         rte_atomic32_t allow_queuing;
75         rte_atomic32_t while_queuing;
76         struct virtio_net *device;
77         struct pmd_internal *internal;
78         struct rte_mempool *mb_pool;
79         uint8_t port;
80         uint16_t virtqueue_id;
81         uint64_t rx_pkts;
82         uint64_t tx_pkts;
83         uint64_t missed_pkts;
84         uint64_t rx_bytes;
85         uint64_t tx_bytes;
86 };
87
88 struct pmd_internal {
89         char *dev_name;
90         char *iface_name;
91         uint16_t max_queues;
92
93         volatile uint16_t once;
94 };
95
96 struct internal_list {
97         TAILQ_ENTRY(internal_list) next;
98         struct rte_eth_dev *eth_dev;
99 };
100
101 TAILQ_HEAD(internal_list_head, internal_list);
102 static struct internal_list_head internal_list =
103         TAILQ_HEAD_INITIALIZER(internal_list);
104
105 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
106
107 static rte_atomic16_t nb_started_ports;
108 static pthread_t session_th;
109
110 static struct rte_eth_link pmd_link = {
111                 .link_speed = 10000,
112                 .link_duplex = ETH_LINK_FULL_DUPLEX,
113                 .link_status = ETH_LINK_DOWN
114 };
115
116 struct rte_vhost_vring_state {
117         rte_spinlock_t lock;
118
119         bool cur[RTE_MAX_QUEUES_PER_PORT * 2];
120         bool seen[RTE_MAX_QUEUES_PER_PORT * 2];
121         unsigned int index;
122         unsigned int max_vring;
123 };
124
125 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
126
127 static uint16_t
128 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
129 {
130         struct vhost_queue *r = q;
131         uint16_t i, nb_rx = 0;
132
133         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
134                 return 0;
135
136         rte_atomic32_set(&r->while_queuing, 1);
137
138         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
139                 goto out;
140
141         /* Dequeue packets from guest TX queue */
142         nb_rx = rte_vhost_dequeue_burst(r->device,
143                         r->virtqueue_id, r->mb_pool, bufs, nb_bufs);
144
145         r->rx_pkts += nb_rx;
146
147         for (i = 0; likely(i < nb_rx); i++) {
148                 bufs[i]->port = r->port;
149                 r->rx_bytes += bufs[i]->pkt_len;
150         }
151
152 out:
153         rte_atomic32_set(&r->while_queuing, 0);
154
155         return nb_rx;
156 }
157
158 static uint16_t
159 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
160 {
161         struct vhost_queue *r = q;
162         uint16_t i, nb_tx = 0;
163
164         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
165                 return 0;
166
167         rte_atomic32_set(&r->while_queuing, 1);
168
169         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
170                 goto out;
171
172         /* Enqueue packets to guest RX queue */
173         nb_tx = rte_vhost_enqueue_burst(r->device,
174                         r->virtqueue_id, bufs, nb_bufs);
175
176         r->tx_pkts += nb_tx;
177         r->missed_pkts += nb_bufs - nb_tx;
178
179         for (i = 0; likely(i < nb_tx); i++)
180                 r->tx_bytes += bufs[i]->pkt_len;
181
182         for (i = 0; likely(i < nb_tx); i++)
183                 rte_pktmbuf_free(bufs[i]);
184 out:
185         rte_atomic32_set(&r->while_queuing, 0);
186
187         return nb_tx;
188 }
189
190 static int
191 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
192 {
193         return 0;
194 }
195
196 static inline struct internal_list *
197 find_internal_resource(char *ifname)
198 {
199         int found = 0;
200         struct internal_list *list;
201         struct pmd_internal *internal;
202
203         if (ifname == NULL)
204                 return NULL;
205
206         pthread_mutex_lock(&internal_list_lock);
207
208         TAILQ_FOREACH(list, &internal_list, next) {
209                 internal = list->eth_dev->data->dev_private;
210                 if (!strcmp(internal->iface_name, ifname)) {
211                         found = 1;
212                         break;
213                 }
214         }
215
216         pthread_mutex_unlock(&internal_list_lock);
217
218         if (!found)
219                 return NULL;
220
221         return list;
222 }
223
224 static int
225 new_device(struct virtio_net *dev)
226 {
227         struct rte_eth_dev *eth_dev;
228         struct internal_list *list;
229         struct pmd_internal *internal;
230         struct vhost_queue *vq;
231         unsigned i;
232 #ifdef RTE_LIBRTE_VHOST_NUMA
233         int newnode, ret;
234 #endif
235
236         if (dev == NULL) {
237                 RTE_LOG(INFO, PMD, "Invalid argument\n");
238                 return -1;
239         }
240
241         list = find_internal_resource(dev->ifname);
242         if (list == NULL) {
243                 RTE_LOG(INFO, PMD, "Invalid device name\n");
244                 return -1;
245         }
246
247         eth_dev = list->eth_dev;
248         internal = eth_dev->data->dev_private;
249
250 #ifdef RTE_LIBRTE_VHOST_NUMA
251         ret  = get_mempolicy(&newnode, NULL, 0, dev,
252                         MPOL_F_NODE | MPOL_F_ADDR);
253         if (ret < 0) {
254                 RTE_LOG(ERR, PMD, "Unknown numa node\n");
255                 return -1;
256         }
257
258         eth_dev->data->numa_node = newnode;
259 #endif
260
261         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
262                 vq = eth_dev->data->rx_queues[i];
263                 if (vq == NULL)
264                         continue;
265                 vq->device = dev;
266                 vq->internal = internal;
267                 vq->port = eth_dev->data->port_id;
268         }
269         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
270                 vq = eth_dev->data->tx_queues[i];
271                 if (vq == NULL)
272                         continue;
273                 vq->device = dev;
274                 vq->internal = internal;
275                 vq->port = eth_dev->data->port_id;
276         }
277
278         for (i = 0; i < dev->virt_qp_nb * VIRTIO_QNUM; i++)
279                 rte_vhost_enable_guest_notification(dev, i, 0);
280
281         dev->flags |= VIRTIO_DEV_RUNNING;
282         dev->priv = eth_dev;
283         eth_dev->data->dev_link.link_status = ETH_LINK_UP;
284
285         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
286                 vq = eth_dev->data->rx_queues[i];
287                 if (vq == NULL)
288                         continue;
289                 rte_atomic32_set(&vq->allow_queuing, 1);
290         }
291         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
292                 vq = eth_dev->data->tx_queues[i];
293                 if (vq == NULL)
294                         continue;
295                 rte_atomic32_set(&vq->allow_queuing, 1);
296         }
297
298         RTE_LOG(INFO, PMD, "New connection established\n");
299
300         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC);
301
302         return 0;
303 }
304
305 static void
306 destroy_device(volatile struct virtio_net *dev)
307 {
308         struct rte_eth_dev *eth_dev;
309         struct vhost_queue *vq;
310         unsigned i;
311
312         if (dev == NULL) {
313                 RTE_LOG(INFO, PMD, "Invalid argument\n");
314                 return;
315         }
316
317         eth_dev = (struct rte_eth_dev *)dev->priv;
318         if (eth_dev == NULL) {
319                 RTE_LOG(INFO, PMD, "Failed to find a ethdev\n");
320                 return;
321         }
322
323         /* Wait until rx/tx_pkt_burst stops accessing vhost device */
324         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
325                 vq = eth_dev->data->rx_queues[i];
326                 if (vq == NULL)
327                         continue;
328                 rte_atomic32_set(&vq->allow_queuing, 0);
329                 while (rte_atomic32_read(&vq->while_queuing))
330                         rte_pause();
331         }
332         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
333                 vq = eth_dev->data->tx_queues[i];
334                 if (vq == NULL)
335                         continue;
336                 rte_atomic32_set(&vq->allow_queuing, 0);
337                 while (rte_atomic32_read(&vq->while_queuing))
338                         rte_pause();
339         }
340
341         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
342
343         dev->priv = NULL;
344         dev->flags &= ~VIRTIO_DEV_RUNNING;
345
346         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
347                 vq = eth_dev->data->rx_queues[i];
348                 if (vq == NULL)
349                         continue;
350                 vq->device = NULL;
351         }
352         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
353                 vq = eth_dev->data->tx_queues[i];
354                 if (vq == NULL)
355                         continue;
356                 vq->device = NULL;
357         }
358
359         RTE_LOG(INFO, PMD, "Connection closed\n");
360
361         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC);
362 }
363
364 static int
365 vring_state_changed(struct virtio_net *dev, uint16_t vring, int enable)
366 {
367         struct rte_vhost_vring_state *state;
368         struct rte_eth_dev *eth_dev;
369         struct internal_list *list;
370
371         if (dev == NULL) {
372                 RTE_LOG(ERR, PMD, "Invalid argument\n");
373                 return -1;
374         }
375
376         list = find_internal_resource(dev->ifname);
377         if (list == NULL) {
378                 RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", dev->ifname);
379                 return -1;
380         }
381
382         eth_dev = list->eth_dev;
383         /* won't be NULL */
384         state = vring_states[eth_dev->data->port_id];
385         rte_spinlock_lock(&state->lock);
386         state->cur[vring] = enable;
387         state->max_vring = RTE_MAX(vring, state->max_vring);
388         rte_spinlock_unlock(&state->lock);
389
390         RTE_LOG(INFO, PMD, "vring%u is %s\n",
391                         vring, enable ? "enabled" : "disabled");
392
393         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE);
394
395         return 0;
396 }
397
398 int
399 rte_eth_vhost_get_queue_event(uint8_t port_id,
400                 struct rte_eth_vhost_queue_event *event)
401 {
402         struct rte_vhost_vring_state *state;
403         unsigned int i;
404         int idx;
405
406         if (port_id >= RTE_MAX_ETHPORTS) {
407                 RTE_LOG(ERR, PMD, "Invalid port id\n");
408                 return -1;
409         }
410
411         state = vring_states[port_id];
412         if (!state) {
413                 RTE_LOG(ERR, PMD, "Unused port\n");
414                 return -1;
415         }
416
417         rte_spinlock_lock(&state->lock);
418         for (i = 0; i <= state->max_vring; i++) {
419                 idx = state->index++ % (state->max_vring + 1);
420
421                 if (state->cur[idx] != state->seen[idx]) {
422                         state->seen[idx] = state->cur[idx];
423                         event->queue_id = idx / 2;
424                         event->rx = idx & 1;
425                         event->enable = state->cur[idx];
426                         rte_spinlock_unlock(&state->lock);
427                         return 0;
428                 }
429         }
430         rte_spinlock_unlock(&state->lock);
431
432         return -1;
433 }
434
435 static void *
436 vhost_driver_session(void *param __rte_unused)
437 {
438         static struct virtio_net_device_ops vhost_ops;
439
440         /* set vhost arguments */
441         vhost_ops.new_device = new_device;
442         vhost_ops.destroy_device = destroy_device;
443         vhost_ops.vring_state_changed = vring_state_changed;
444         if (rte_vhost_driver_callback_register(&vhost_ops) < 0)
445                 RTE_LOG(ERR, PMD, "Can't register callbacks\n");
446
447         /* start event handling */
448         rte_vhost_driver_session_start();
449
450         return NULL;
451 }
452
453 static int
454 vhost_driver_session_start(void)
455 {
456         int ret;
457
458         ret = pthread_create(&session_th,
459                         NULL, vhost_driver_session, NULL);
460         if (ret)
461                 RTE_LOG(ERR, PMD, "Can't create a thread\n");
462
463         return ret;
464 }
465
466 static void
467 vhost_driver_session_stop(void)
468 {
469         int ret;
470
471         ret = pthread_cancel(session_th);
472         if (ret)
473                 RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
474
475         ret = pthread_join(session_th, NULL);
476         if (ret)
477                 RTE_LOG(ERR, PMD, "Can't join the thread\n");
478 }
479
480 static int
481 eth_dev_start(struct rte_eth_dev *dev)
482 {
483         struct pmd_internal *internal = dev->data->dev_private;
484         int ret = 0;
485
486         if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
487                 ret = rte_vhost_driver_register(internal->iface_name);
488                 if (ret)
489                         return ret;
490         }
491
492         /* We need only one message handling thread */
493         if (rte_atomic16_add_return(&nb_started_ports, 1) == 1)
494                 ret = vhost_driver_session_start();
495
496         return ret;
497 }
498
499 static void
500 eth_dev_stop(struct rte_eth_dev *dev)
501 {
502         struct pmd_internal *internal = dev->data->dev_private;
503
504         if (rte_atomic16_cmpset(&internal->once, 1, 0))
505                 rte_vhost_driver_unregister(internal->iface_name);
506
507         if (rte_atomic16_sub_return(&nb_started_ports, 1) == 0)
508                 vhost_driver_session_stop();
509 }
510
511 static int
512 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
513                    uint16_t nb_rx_desc __rte_unused,
514                    unsigned int socket_id,
515                    const struct rte_eth_rxconf *rx_conf __rte_unused,
516                    struct rte_mempool *mb_pool)
517 {
518         struct vhost_queue *vq;
519
520         vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
521                         RTE_CACHE_LINE_SIZE, socket_id);
522         if (vq == NULL) {
523                 RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n");
524                 return -ENOMEM;
525         }
526
527         vq->mb_pool = mb_pool;
528         vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
529         dev->data->rx_queues[rx_queue_id] = vq;
530
531         return 0;
532 }
533
534 static int
535 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
536                    uint16_t nb_tx_desc __rte_unused,
537                    unsigned int socket_id,
538                    const struct rte_eth_txconf *tx_conf __rte_unused)
539 {
540         struct vhost_queue *vq;
541
542         vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
543                         RTE_CACHE_LINE_SIZE, socket_id);
544         if (vq == NULL) {
545                 RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n");
546                 return -ENOMEM;
547         }
548
549         vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
550         dev->data->tx_queues[tx_queue_id] = vq;
551
552         return 0;
553 }
554
555 static void
556 eth_dev_info(struct rte_eth_dev *dev,
557              struct rte_eth_dev_info *dev_info)
558 {
559         struct pmd_internal *internal;
560
561         internal = dev->data->dev_private;
562         if (internal == NULL) {
563                 RTE_LOG(ERR, PMD, "Invalid device specified\n");
564                 return;
565         }
566
567         dev_info->driver_name = drivername;
568         dev_info->max_mac_addrs = 1;
569         dev_info->max_rx_pktlen = (uint32_t)-1;
570         dev_info->max_rx_queues = internal->max_queues;
571         dev_info->max_tx_queues = internal->max_queues;
572         dev_info->min_rx_bufsize = 0;
573 }
574
575 static void
576 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
577 {
578         unsigned i;
579         unsigned long rx_total = 0, tx_total = 0, tx_missed_total = 0;
580         unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
581         struct vhost_queue *vq;
582
583         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
584                         i < dev->data->nb_rx_queues; i++) {
585                 if (dev->data->rx_queues[i] == NULL)
586                         continue;
587                 vq = dev->data->rx_queues[i];
588                 stats->q_ipackets[i] = vq->rx_pkts;
589                 rx_total += stats->q_ipackets[i];
590
591                 stats->q_ibytes[i] = vq->rx_bytes;
592                 rx_total_bytes += stats->q_ibytes[i];
593         }
594
595         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
596                         i < dev->data->nb_tx_queues; i++) {
597                 if (dev->data->tx_queues[i] == NULL)
598                         continue;
599                 vq = dev->data->tx_queues[i];
600                 stats->q_opackets[i] = vq->tx_pkts;
601                 tx_missed_total += vq->missed_pkts;
602                 tx_total += stats->q_opackets[i];
603
604                 stats->q_obytes[i] = vq->tx_bytes;
605                 tx_total_bytes += stats->q_obytes[i];
606         }
607
608         stats->ipackets = rx_total;
609         stats->opackets = tx_total;
610         stats->imissed = tx_missed_total;
611         stats->ibytes = rx_total_bytes;
612         stats->obytes = tx_total_bytes;
613 }
614
615 static void
616 eth_stats_reset(struct rte_eth_dev *dev)
617 {
618         struct vhost_queue *vq;
619         unsigned i;
620
621         for (i = 0; i < dev->data->nb_rx_queues; i++) {
622                 if (dev->data->rx_queues[i] == NULL)
623                         continue;
624                 vq = dev->data->rx_queues[i];
625                 vq->rx_pkts = 0;
626                 vq->rx_bytes = 0;
627         }
628         for (i = 0; i < dev->data->nb_tx_queues; i++) {
629                 if (dev->data->tx_queues[i] == NULL)
630                         continue;
631                 vq = dev->data->tx_queues[i];
632                 vq->tx_pkts = 0;
633                 vq->tx_bytes = 0;
634                 vq->missed_pkts = 0;
635         }
636 }
637
638 static void
639 eth_queue_release(void *q)
640 {
641         rte_free(q);
642 }
643
644 static int
645 eth_link_update(struct rte_eth_dev *dev __rte_unused,
646                 int wait_to_complete __rte_unused)
647 {
648         return 0;
649 }
650
651 /**
652  * Disable features in feature_mask. Returns 0 on success.
653  */
654 int
655 rte_eth_vhost_feature_disable(uint64_t feature_mask)
656 {
657         return rte_vhost_feature_disable(feature_mask);
658 }
659
660 /**
661  * Enable features in feature_mask. Returns 0 on success.
662  */
663 int
664 rte_eth_vhost_feature_enable(uint64_t feature_mask)
665 {
666         return rte_vhost_feature_enable(feature_mask);
667 }
668
669 /* Returns currently supported vhost features */
670 uint64_t
671 rte_eth_vhost_feature_get(void)
672 {
673         return rte_vhost_feature_get();
674 }
675
676 static const struct eth_dev_ops ops = {
677         .dev_start = eth_dev_start,
678         .dev_stop = eth_dev_stop,
679         .dev_configure = eth_dev_configure,
680         .dev_infos_get = eth_dev_info,
681         .rx_queue_setup = eth_rx_queue_setup,
682         .tx_queue_setup = eth_tx_queue_setup,
683         .rx_queue_release = eth_queue_release,
684         .tx_queue_release = eth_queue_release,
685         .link_update = eth_link_update,
686         .stats_get = eth_stats_get,
687         .stats_reset = eth_stats_reset,
688 };
689
690 static int
691 eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
692                      const unsigned numa_node)
693 {
694         struct rte_eth_dev_data *data = NULL;
695         struct pmd_internal *internal = NULL;
696         struct rte_eth_dev *eth_dev = NULL;
697         struct ether_addr *eth_addr = NULL;
698         struct rte_vhost_vring_state *vring_state = NULL;
699         struct internal_list *list = NULL;
700
701         RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n",
702                 numa_node);
703
704         /* now do all data allocation - for eth_dev structure, dummy pci driver
705          * and internal (private) data
706          */
707         data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
708         if (data == NULL)
709                 goto error;
710
711         internal = rte_zmalloc_socket(name, sizeof(*internal), 0, numa_node);
712         if (internal == NULL)
713                 goto error;
714
715         list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
716         if (list == NULL)
717                 goto error;
718
719         /* reserve an ethdev entry */
720         eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
721         if (eth_dev == NULL)
722                 goto error;
723
724         eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
725         if (eth_addr == NULL)
726                 goto error;
727         *eth_addr = base_eth_addr;
728         eth_addr->addr_bytes[5] = eth_dev->data->port_id;
729
730         vring_state = rte_zmalloc_socket(name,
731                         sizeof(*vring_state), 0, numa_node);
732         if (vring_state == NULL)
733                 goto error;
734
735         TAILQ_INIT(&eth_dev->link_intr_cbs);
736
737         /* now put it all together
738          * - store queue data in internal,
739          * - store numa_node info in ethdev data
740          * - point eth_dev_data to internals
741          * - and point eth_dev structure to new eth_dev_data structure
742          */
743         internal->dev_name = strdup(name);
744         if (internal->dev_name == NULL)
745                 goto error;
746         internal->iface_name = strdup(iface_name);
747         if (internal->iface_name == NULL)
748                 goto error;
749
750         list->eth_dev = eth_dev;
751         pthread_mutex_lock(&internal_list_lock);
752         TAILQ_INSERT_TAIL(&internal_list, list, next);
753         pthread_mutex_unlock(&internal_list_lock);
754
755         rte_spinlock_init(&vring_state->lock);
756         vring_states[eth_dev->data->port_id] = vring_state;
757
758         data->dev_private = internal;
759         data->port_id = eth_dev->data->port_id;
760         memmove(data->name, eth_dev->data->name, sizeof(data->name));
761         data->nb_rx_queues = queues;
762         data->nb_tx_queues = queues;
763         internal->max_queues = queues;
764         data->dev_link = pmd_link;
765         data->mac_addrs = eth_addr;
766
767         /* We'll replace the 'data' originally allocated by eth_dev. So the
768          * vhost PMD resources won't be shared between multi processes.
769          */
770         eth_dev->data = data;
771         eth_dev->dev_ops = &ops;
772         eth_dev->driver = NULL;
773         data->dev_flags =
774                 RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC;
775         data->kdrv = RTE_KDRV_NONE;
776         data->drv_name = internal->dev_name;
777         data->numa_node = numa_node;
778
779         /* finally assign rx and tx ops */
780         eth_dev->rx_pkt_burst = eth_vhost_rx;
781         eth_dev->tx_pkt_burst = eth_vhost_tx;
782
783         return data->port_id;
784
785 error:
786         if (internal)
787                 free(internal->dev_name);
788         rte_free(vring_state);
789         rte_free(eth_addr);
790         if (eth_dev)
791                 rte_eth_dev_release_port(eth_dev);
792         rte_free(internal);
793         rte_free(list);
794         rte_free(data);
795
796         return -1;
797 }
798
799 static inline int
800 open_iface(const char *key __rte_unused, const char *value, void *extra_args)
801 {
802         const char **iface_name = extra_args;
803
804         if (value == NULL)
805                 return -1;
806
807         *iface_name = value;
808
809         return 0;
810 }
811
812 static inline int
813 open_queues(const char *key __rte_unused, const char *value, void *extra_args)
814 {
815         uint16_t *q = extra_args;
816
817         if (value == NULL || extra_args == NULL)
818                 return -EINVAL;
819
820         *q = (uint16_t)strtoul(value, NULL, 0);
821         if (*q == USHRT_MAX && errno == ERANGE)
822                 return -1;
823
824         if (*q > RTE_MAX_QUEUES_PER_PORT)
825                 return -1;
826
827         return 0;
828 }
829
830 static int
831 rte_pmd_vhost_devinit(const char *name, const char *params)
832 {
833         struct rte_kvargs *kvlist = NULL;
834         int ret = 0;
835         char *iface_name;
836         uint16_t queues;
837
838         RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
839
840         kvlist = rte_kvargs_parse(params, valid_arguments);
841         if (kvlist == NULL)
842                 return -1;
843
844         if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
845                 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
846                                          &open_iface, &iface_name);
847                 if (ret < 0)
848                         goto out_free;
849         } else {
850                 ret = -1;
851                 goto out_free;
852         }
853
854         if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
855                 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
856                                          &open_queues, &queues);
857                 if (ret < 0)
858                         goto out_free;
859
860         } else
861                 queues = 1;
862
863         eth_dev_vhost_create(name, iface_name, queues, rte_socket_id());
864
865 out_free:
866         rte_kvargs_free(kvlist);
867         return ret;
868 }
869
870 static int
871 rte_pmd_vhost_devuninit(const char *name)
872 {
873         struct rte_eth_dev *eth_dev = NULL;
874         struct pmd_internal *internal;
875         struct internal_list *list;
876         unsigned int i;
877
878         RTE_LOG(INFO, PMD, "Un-Initializing pmd_vhost for %s\n", name);
879
880         /* find an ethdev entry */
881         eth_dev = rte_eth_dev_allocated(name);
882         if (eth_dev == NULL)
883                 return -ENODEV;
884
885         internal = eth_dev->data->dev_private;
886         if (internal == NULL)
887                 return -ENODEV;
888
889         list = find_internal_resource(internal->iface_name);
890         if (list == NULL)
891                 return -ENODEV;
892
893         pthread_mutex_lock(&internal_list_lock);
894         TAILQ_REMOVE(&internal_list, list, next);
895         pthread_mutex_unlock(&internal_list_lock);
896         rte_free(list);
897
898         eth_dev_stop(eth_dev);
899
900         rte_free(vring_states[eth_dev->data->port_id]);
901         vring_states[eth_dev->data->port_id] = NULL;
902
903         free(internal->dev_name);
904         free(internal->iface_name);
905
906         for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
907                 rte_free(eth_dev->data->rx_queues[i]);
908         for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
909                 rte_free(eth_dev->data->tx_queues[i]);
910
911         rte_free(eth_dev->data->mac_addrs);
912         rte_free(eth_dev->data);
913         rte_free(internal);
914
915         rte_eth_dev_release_port(eth_dev);
916
917         return 0;
918 }
919
920 static struct rte_driver pmd_vhost_drv = {
921         .name = "eth_vhost",
922         .type = PMD_VDEV,
923         .init = rte_pmd_vhost_devinit,
924         .uninit = rte_pmd_vhost_devuninit,
925 };
926
927 PMD_REGISTER_DRIVER(pmd_vhost_drv);