Imported Upstream version 16.07-rc1
[deb_dpdk.git] / drivers / net / vhost / rte_eth_vhost.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) 2016 IGEL Co., Ltd.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of IGEL Co.,Ltd. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <unistd.h>
34 #include <pthread.h>
35 #include <stdbool.h>
36 #ifdef RTE_LIBRTE_VHOST_NUMA
37 #include <numaif.h>
38 #endif
39
40 #include <rte_mbuf.h>
41 #include <rte_ethdev.h>
42 #include <rte_malloc.h>
43 #include <rte_memcpy.h>
44 #include <rte_dev.h>
45 #include <rte_kvargs.h>
46 #include <rte_virtio_net.h>
47 #include <rte_spinlock.h>
48
49 #include "rte_eth_vhost.h"
50
51 #define ETH_VHOST_IFACE_ARG             "iface"
52 #define ETH_VHOST_QUEUES_ARG            "queues"
53 #define ETH_VHOST_CLIENT_ARG            "client"
54
55 static const char *drivername = "VHOST PMD";
56
57 static const char *valid_arguments[] = {
58         ETH_VHOST_IFACE_ARG,
59         ETH_VHOST_QUEUES_ARG,
60         ETH_VHOST_CLIENT_ARG,
61         NULL
62 };
63
64 static struct ether_addr base_eth_addr = {
65         .addr_bytes = {
66                 0x56 /* V */,
67                 0x48 /* H */,
68                 0x4F /* O */,
69                 0x53 /* S */,
70                 0x54 /* T */,
71                 0x00
72         }
73 };
74
75 struct vhost_queue {
76         int vid;
77         rte_atomic32_t allow_queuing;
78         rte_atomic32_t while_queuing;
79         struct pmd_internal *internal;
80         struct rte_mempool *mb_pool;
81         uint8_t port;
82         uint16_t virtqueue_id;
83         uint64_t rx_pkts;
84         uint64_t tx_pkts;
85         uint64_t missed_pkts;
86         uint64_t rx_bytes;
87         uint64_t tx_bytes;
88 };
89
90 struct pmd_internal {
91         char *dev_name;
92         char *iface_name;
93         uint16_t max_queues;
94         uint64_t flags;
95
96         volatile uint16_t once;
97 };
98
99 struct internal_list {
100         TAILQ_ENTRY(internal_list) next;
101         struct rte_eth_dev *eth_dev;
102 };
103
104 TAILQ_HEAD(internal_list_head, internal_list);
105 static struct internal_list_head internal_list =
106         TAILQ_HEAD_INITIALIZER(internal_list);
107
108 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
109
110 static rte_atomic16_t nb_started_ports;
111 static pthread_t session_th;
112
113 static struct rte_eth_link pmd_link = {
114                 .link_speed = 10000,
115                 .link_duplex = ETH_LINK_FULL_DUPLEX,
116                 .link_status = ETH_LINK_DOWN
117 };
118
119 struct rte_vhost_vring_state {
120         rte_spinlock_t lock;
121
122         bool cur[RTE_MAX_QUEUES_PER_PORT * 2];
123         bool seen[RTE_MAX_QUEUES_PER_PORT * 2];
124         unsigned int index;
125         unsigned int max_vring;
126 };
127
128 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
129
130 static uint16_t
131 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
132 {
133         struct vhost_queue *r = q;
134         uint16_t i, nb_rx = 0;
135
136         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
137                 return 0;
138
139         rte_atomic32_set(&r->while_queuing, 1);
140
141         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
142                 goto out;
143
144         /* Dequeue packets from guest TX queue */
145         nb_rx = rte_vhost_dequeue_burst(r->vid,
146                         r->virtqueue_id, r->mb_pool, bufs, nb_bufs);
147
148         r->rx_pkts += nb_rx;
149
150         for (i = 0; likely(i < nb_rx); i++) {
151                 bufs[i]->port = r->port;
152                 r->rx_bytes += bufs[i]->pkt_len;
153         }
154
155 out:
156         rte_atomic32_set(&r->while_queuing, 0);
157
158         return nb_rx;
159 }
160
161 static uint16_t
162 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
163 {
164         struct vhost_queue *r = q;
165         uint16_t i, nb_tx = 0;
166
167         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
168                 return 0;
169
170         rte_atomic32_set(&r->while_queuing, 1);
171
172         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
173                 goto out;
174
175         /* Enqueue packets to guest RX queue */
176         nb_tx = rte_vhost_enqueue_burst(r->vid,
177                         r->virtqueue_id, bufs, nb_bufs);
178
179         r->tx_pkts += nb_tx;
180         r->missed_pkts += nb_bufs - nb_tx;
181
182         for (i = 0; likely(i < nb_tx); i++)
183                 r->tx_bytes += bufs[i]->pkt_len;
184
185         for (i = 0; likely(i < nb_tx); i++)
186                 rte_pktmbuf_free(bufs[i]);
187 out:
188         rte_atomic32_set(&r->while_queuing, 0);
189
190         return nb_tx;
191 }
192
193 static int
194 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
195 {
196         return 0;
197 }
198
199 static inline struct internal_list *
200 find_internal_resource(char *ifname)
201 {
202         int found = 0;
203         struct internal_list *list;
204         struct pmd_internal *internal;
205
206         if (ifname == NULL)
207                 return NULL;
208
209         pthread_mutex_lock(&internal_list_lock);
210
211         TAILQ_FOREACH(list, &internal_list, next) {
212                 internal = list->eth_dev->data->dev_private;
213                 if (!strcmp(internal->iface_name, ifname)) {
214                         found = 1;
215                         break;
216                 }
217         }
218
219         pthread_mutex_unlock(&internal_list_lock);
220
221         if (!found)
222                 return NULL;
223
224         return list;
225 }
226
227 static int
228 new_device(int vid)
229 {
230         struct rte_eth_dev *eth_dev;
231         struct internal_list *list;
232         struct pmd_internal *internal;
233         struct vhost_queue *vq;
234         unsigned i;
235         char ifname[PATH_MAX];
236 #ifdef RTE_LIBRTE_VHOST_NUMA
237         int newnode;
238 #endif
239
240         rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
241         list = find_internal_resource(ifname);
242         if (list == NULL) {
243                 RTE_LOG(INFO, PMD, "Invalid device name: %s\n", ifname);
244                 return -1;
245         }
246
247         eth_dev = list->eth_dev;
248         internal = eth_dev->data->dev_private;
249
250 #ifdef RTE_LIBRTE_VHOST_NUMA
251         newnode = rte_vhost_get_numa_node(vid);
252         if (newnode >= 0)
253                 eth_dev->data->numa_node = newnode;
254 #endif
255
256         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
257                 vq = eth_dev->data->rx_queues[i];
258                 if (vq == NULL)
259                         continue;
260                 vq->vid = vid;
261                 vq->internal = internal;
262                 vq->port = eth_dev->data->port_id;
263         }
264         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
265                 vq = eth_dev->data->tx_queues[i];
266                 if (vq == NULL)
267                         continue;
268                 vq->vid = vid;
269                 vq->internal = internal;
270                 vq->port = eth_dev->data->port_id;
271         }
272
273         for (i = 0; i < rte_vhost_get_queue_num(vid) * VIRTIO_QNUM; i++)
274                 rte_vhost_enable_guest_notification(vid, i, 0);
275
276         eth_dev->data->dev_link.link_status = ETH_LINK_UP;
277
278         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
279                 vq = eth_dev->data->rx_queues[i];
280                 if (vq == NULL)
281                         continue;
282                 rte_atomic32_set(&vq->allow_queuing, 1);
283         }
284         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
285                 vq = eth_dev->data->tx_queues[i];
286                 if (vq == NULL)
287                         continue;
288                 rte_atomic32_set(&vq->allow_queuing, 1);
289         }
290
291         RTE_LOG(INFO, PMD, "New connection established\n");
292
293         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC);
294
295         return 0;
296 }
297
298 static void
299 destroy_device(int vid)
300 {
301         struct rte_eth_dev *eth_dev;
302         struct vhost_queue *vq;
303         struct internal_list *list;
304         char ifname[PATH_MAX];
305         unsigned i;
306
307         rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
308         list = find_internal_resource(ifname);
309         if (list == NULL) {
310                 RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
311                 return;
312         }
313         eth_dev = list->eth_dev;
314
315         /* Wait until rx/tx_pkt_burst stops accessing vhost device */
316         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
317                 vq = eth_dev->data->rx_queues[i];
318                 if (vq == NULL)
319                         continue;
320                 rte_atomic32_set(&vq->allow_queuing, 0);
321                 while (rte_atomic32_read(&vq->while_queuing))
322                         rte_pause();
323         }
324         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
325                 vq = eth_dev->data->tx_queues[i];
326                 if (vq == NULL)
327                         continue;
328                 rte_atomic32_set(&vq->allow_queuing, 0);
329                 while (rte_atomic32_read(&vq->while_queuing))
330                         rte_pause();
331         }
332
333         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
334
335         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
336                 vq = eth_dev->data->rx_queues[i];
337                 if (vq == NULL)
338                         continue;
339                 vq->vid = -1;
340         }
341         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
342                 vq = eth_dev->data->tx_queues[i];
343                 if (vq == NULL)
344                         continue;
345                 vq->vid = -1;
346         }
347
348         RTE_LOG(INFO, PMD, "Connection closed\n");
349
350         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC);
351 }
352
353 static int
354 vring_state_changed(int vid, uint16_t vring, int enable)
355 {
356         struct rte_vhost_vring_state *state;
357         struct rte_eth_dev *eth_dev;
358         struct internal_list *list;
359         char ifname[PATH_MAX];
360
361         rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
362         list = find_internal_resource(ifname);
363         if (list == NULL) {
364                 RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname);
365                 return -1;
366         }
367
368         eth_dev = list->eth_dev;
369         /* won't be NULL */
370         state = vring_states[eth_dev->data->port_id];
371         rte_spinlock_lock(&state->lock);
372         state->cur[vring] = enable;
373         state->max_vring = RTE_MAX(vring, state->max_vring);
374         rte_spinlock_unlock(&state->lock);
375
376         RTE_LOG(INFO, PMD, "vring%u is %s\n",
377                         vring, enable ? "enabled" : "disabled");
378
379         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE);
380
381         return 0;
382 }
383
384 int
385 rte_eth_vhost_get_queue_event(uint8_t port_id,
386                 struct rte_eth_vhost_queue_event *event)
387 {
388         struct rte_vhost_vring_state *state;
389         unsigned int i;
390         int idx;
391
392         if (port_id >= RTE_MAX_ETHPORTS) {
393                 RTE_LOG(ERR, PMD, "Invalid port id\n");
394                 return -1;
395         }
396
397         state = vring_states[port_id];
398         if (!state) {
399                 RTE_LOG(ERR, PMD, "Unused port\n");
400                 return -1;
401         }
402
403         rte_spinlock_lock(&state->lock);
404         for (i = 0; i <= state->max_vring; i++) {
405                 idx = state->index++ % (state->max_vring + 1);
406
407                 if (state->cur[idx] != state->seen[idx]) {
408                         state->seen[idx] = state->cur[idx];
409                         event->queue_id = idx / 2;
410                         event->rx = idx & 1;
411                         event->enable = state->cur[idx];
412                         rte_spinlock_unlock(&state->lock);
413                         return 0;
414                 }
415         }
416         rte_spinlock_unlock(&state->lock);
417
418         return -1;
419 }
420
421 static void *
422 vhost_driver_session(void *param __rte_unused)
423 {
424         static struct virtio_net_device_ops vhost_ops;
425
426         /* set vhost arguments */
427         vhost_ops.new_device = new_device;
428         vhost_ops.destroy_device = destroy_device;
429         vhost_ops.vring_state_changed = vring_state_changed;
430         if (rte_vhost_driver_callback_register(&vhost_ops) < 0)
431                 RTE_LOG(ERR, PMD, "Can't register callbacks\n");
432
433         /* start event handling */
434         rte_vhost_driver_session_start();
435
436         return NULL;
437 }
438
439 static int
440 vhost_driver_session_start(void)
441 {
442         int ret;
443
444         ret = pthread_create(&session_th,
445                         NULL, vhost_driver_session, NULL);
446         if (ret)
447                 RTE_LOG(ERR, PMD, "Can't create a thread\n");
448
449         return ret;
450 }
451
452 static void
453 vhost_driver_session_stop(void)
454 {
455         int ret;
456
457         ret = pthread_cancel(session_th);
458         if (ret)
459                 RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
460
461         ret = pthread_join(session_th, NULL);
462         if (ret)
463                 RTE_LOG(ERR, PMD, "Can't join the thread\n");
464 }
465
466 static int
467 eth_dev_start(struct rte_eth_dev *dev)
468 {
469         struct pmd_internal *internal = dev->data->dev_private;
470         int ret = 0;
471
472         if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
473                 ret = rte_vhost_driver_register(internal->iface_name,
474                                                 internal->flags);
475                 if (ret)
476                         return ret;
477         }
478
479         /* We need only one message handling thread */
480         if (rte_atomic16_add_return(&nb_started_ports, 1) == 1)
481                 ret = vhost_driver_session_start();
482
483         return ret;
484 }
485
486 static void
487 eth_dev_stop(struct rte_eth_dev *dev)
488 {
489         struct pmd_internal *internal = dev->data->dev_private;
490
491         if (rte_atomic16_cmpset(&internal->once, 1, 0))
492                 rte_vhost_driver_unregister(internal->iface_name);
493
494         if (rte_atomic16_sub_return(&nb_started_ports, 1) == 0)
495                 vhost_driver_session_stop();
496 }
497
498 static int
499 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
500                    uint16_t nb_rx_desc __rte_unused,
501                    unsigned int socket_id,
502                    const struct rte_eth_rxconf *rx_conf __rte_unused,
503                    struct rte_mempool *mb_pool)
504 {
505         struct vhost_queue *vq;
506
507         vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
508                         RTE_CACHE_LINE_SIZE, socket_id);
509         if (vq == NULL) {
510                 RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n");
511                 return -ENOMEM;
512         }
513
514         vq->mb_pool = mb_pool;
515         vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
516         dev->data->rx_queues[rx_queue_id] = vq;
517
518         return 0;
519 }
520
521 static int
522 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
523                    uint16_t nb_tx_desc __rte_unused,
524                    unsigned int socket_id,
525                    const struct rte_eth_txconf *tx_conf __rte_unused)
526 {
527         struct vhost_queue *vq;
528
529         vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
530                         RTE_CACHE_LINE_SIZE, socket_id);
531         if (vq == NULL) {
532                 RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n");
533                 return -ENOMEM;
534         }
535
536         vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
537         dev->data->tx_queues[tx_queue_id] = vq;
538
539         return 0;
540 }
541
542 static void
543 eth_dev_info(struct rte_eth_dev *dev,
544              struct rte_eth_dev_info *dev_info)
545 {
546         struct pmd_internal *internal;
547
548         internal = dev->data->dev_private;
549         if (internal == NULL) {
550                 RTE_LOG(ERR, PMD, "Invalid device specified\n");
551                 return;
552         }
553
554         dev_info->driver_name = drivername;
555         dev_info->max_mac_addrs = 1;
556         dev_info->max_rx_pktlen = (uint32_t)-1;
557         dev_info->max_rx_queues = internal->max_queues;
558         dev_info->max_tx_queues = internal->max_queues;
559         dev_info->min_rx_bufsize = 0;
560 }
561
562 static void
563 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
564 {
565         unsigned i;
566         unsigned long rx_total = 0, tx_total = 0, tx_missed_total = 0;
567         unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
568         struct vhost_queue *vq;
569
570         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
571                         i < dev->data->nb_rx_queues; i++) {
572                 if (dev->data->rx_queues[i] == NULL)
573                         continue;
574                 vq = dev->data->rx_queues[i];
575                 stats->q_ipackets[i] = vq->rx_pkts;
576                 rx_total += stats->q_ipackets[i];
577
578                 stats->q_ibytes[i] = vq->rx_bytes;
579                 rx_total_bytes += stats->q_ibytes[i];
580         }
581
582         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
583                         i < dev->data->nb_tx_queues; i++) {
584                 if (dev->data->tx_queues[i] == NULL)
585                         continue;
586                 vq = dev->data->tx_queues[i];
587                 stats->q_opackets[i] = vq->tx_pkts;
588                 tx_missed_total += vq->missed_pkts;
589                 tx_total += stats->q_opackets[i];
590
591                 stats->q_obytes[i] = vq->tx_bytes;
592                 tx_total_bytes += stats->q_obytes[i];
593         }
594
595         stats->ipackets = rx_total;
596         stats->opackets = tx_total;
597         stats->oerrors = tx_missed_total;
598         stats->ibytes = rx_total_bytes;
599         stats->obytes = tx_total_bytes;
600 }
601
602 static void
603 eth_stats_reset(struct rte_eth_dev *dev)
604 {
605         struct vhost_queue *vq;
606         unsigned i;
607
608         for (i = 0; i < dev->data->nb_rx_queues; i++) {
609                 if (dev->data->rx_queues[i] == NULL)
610                         continue;
611                 vq = dev->data->rx_queues[i];
612                 vq->rx_pkts = 0;
613                 vq->rx_bytes = 0;
614         }
615         for (i = 0; i < dev->data->nb_tx_queues; i++) {
616                 if (dev->data->tx_queues[i] == NULL)
617                         continue;
618                 vq = dev->data->tx_queues[i];
619                 vq->tx_pkts = 0;
620                 vq->tx_bytes = 0;
621                 vq->missed_pkts = 0;
622         }
623 }
624
625 static void
626 eth_queue_release(void *q)
627 {
628         rte_free(q);
629 }
630
631 static int
632 eth_link_update(struct rte_eth_dev *dev __rte_unused,
633                 int wait_to_complete __rte_unused)
634 {
635         return 0;
636 }
637
638 /**
639  * Disable features in feature_mask. Returns 0 on success.
640  */
641 int
642 rte_eth_vhost_feature_disable(uint64_t feature_mask)
643 {
644         return rte_vhost_feature_disable(feature_mask);
645 }
646
647 /**
648  * Enable features in feature_mask. Returns 0 on success.
649  */
650 int
651 rte_eth_vhost_feature_enable(uint64_t feature_mask)
652 {
653         return rte_vhost_feature_enable(feature_mask);
654 }
655
656 /* Returns currently supported vhost features */
657 uint64_t
658 rte_eth_vhost_feature_get(void)
659 {
660         return rte_vhost_feature_get();
661 }
662
663 static const struct eth_dev_ops ops = {
664         .dev_start = eth_dev_start,
665         .dev_stop = eth_dev_stop,
666         .dev_configure = eth_dev_configure,
667         .dev_infos_get = eth_dev_info,
668         .rx_queue_setup = eth_rx_queue_setup,
669         .tx_queue_setup = eth_tx_queue_setup,
670         .rx_queue_release = eth_queue_release,
671         .tx_queue_release = eth_queue_release,
672         .link_update = eth_link_update,
673         .stats_get = eth_stats_get,
674         .stats_reset = eth_stats_reset,
675 };
676
677 static int
678 eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
679                      const unsigned numa_node, uint64_t flags)
680 {
681         struct rte_eth_dev_data *data = NULL;
682         struct pmd_internal *internal = NULL;
683         struct rte_eth_dev *eth_dev = NULL;
684         struct ether_addr *eth_addr = NULL;
685         struct rte_vhost_vring_state *vring_state = NULL;
686         struct internal_list *list = NULL;
687
688         RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n",
689                 numa_node);
690
691         /* now do all data allocation - for eth_dev structure, dummy pci driver
692          * and internal (private) data
693          */
694         data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
695         if (data == NULL)
696                 goto error;
697
698         internal = rte_zmalloc_socket(name, sizeof(*internal), 0, numa_node);
699         if (internal == NULL)
700                 goto error;
701
702         list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
703         if (list == NULL)
704                 goto error;
705
706         /* reserve an ethdev entry */
707         eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
708         if (eth_dev == NULL)
709                 goto error;
710
711         eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
712         if (eth_addr == NULL)
713                 goto error;
714         *eth_addr = base_eth_addr;
715         eth_addr->addr_bytes[5] = eth_dev->data->port_id;
716
717         vring_state = rte_zmalloc_socket(name,
718                         sizeof(*vring_state), 0, numa_node);
719         if (vring_state == NULL)
720                 goto error;
721
722         TAILQ_INIT(&eth_dev->link_intr_cbs);
723
724         /* now put it all together
725          * - store queue data in internal,
726          * - store numa_node info in ethdev data
727          * - point eth_dev_data to internals
728          * - and point eth_dev structure to new eth_dev_data structure
729          */
730         internal->dev_name = strdup(name);
731         if (internal->dev_name == NULL)
732                 goto error;
733         internal->iface_name = strdup(iface_name);
734         if (internal->iface_name == NULL)
735                 goto error;
736         internal->flags = flags;
737
738         list->eth_dev = eth_dev;
739         pthread_mutex_lock(&internal_list_lock);
740         TAILQ_INSERT_TAIL(&internal_list, list, next);
741         pthread_mutex_unlock(&internal_list_lock);
742
743         rte_spinlock_init(&vring_state->lock);
744         vring_states[eth_dev->data->port_id] = vring_state;
745
746         data->dev_private = internal;
747         data->port_id = eth_dev->data->port_id;
748         memmove(data->name, eth_dev->data->name, sizeof(data->name));
749         data->nb_rx_queues = queues;
750         data->nb_tx_queues = queues;
751         internal->max_queues = queues;
752         data->dev_link = pmd_link;
753         data->mac_addrs = eth_addr;
754
755         /* We'll replace the 'data' originally allocated by eth_dev. So the
756          * vhost PMD resources won't be shared between multi processes.
757          */
758         eth_dev->data = data;
759         eth_dev->dev_ops = &ops;
760         eth_dev->driver = NULL;
761         data->dev_flags =
762                 RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC;
763         data->kdrv = RTE_KDRV_NONE;
764         data->drv_name = internal->dev_name;
765         data->numa_node = numa_node;
766
767         /* finally assign rx and tx ops */
768         eth_dev->rx_pkt_burst = eth_vhost_rx;
769         eth_dev->tx_pkt_burst = eth_vhost_tx;
770
771         return data->port_id;
772
773 error:
774         if (internal)
775                 free(internal->dev_name);
776         rte_free(vring_state);
777         rte_free(eth_addr);
778         if (eth_dev)
779                 rte_eth_dev_release_port(eth_dev);
780         rte_free(internal);
781         rte_free(list);
782         rte_free(data);
783
784         return -1;
785 }
786
787 static inline int
788 open_iface(const char *key __rte_unused, const char *value, void *extra_args)
789 {
790         const char **iface_name = extra_args;
791
792         if (value == NULL)
793                 return -1;
794
795         *iface_name = value;
796
797         return 0;
798 }
799
800 static inline int
801 open_int(const char *key __rte_unused, const char *value, void *extra_args)
802 {
803         uint16_t *n = extra_args;
804
805         if (value == NULL || extra_args == NULL)
806                 return -EINVAL;
807
808         *n = (uint16_t)strtoul(value, NULL, 0);
809         if (*n == USHRT_MAX && errno == ERANGE)
810                 return -1;
811
812         return 0;
813 }
814
815 static int
816 rte_pmd_vhost_devinit(const char *name, const char *params)
817 {
818         struct rte_kvargs *kvlist = NULL;
819         int ret = 0;
820         char *iface_name;
821         uint16_t queues;
822         uint64_t flags = 0;
823         int client_mode = 0;
824
825         RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
826
827         kvlist = rte_kvargs_parse(params, valid_arguments);
828         if (kvlist == NULL)
829                 return -1;
830
831         if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
832                 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
833                                          &open_iface, &iface_name);
834                 if (ret < 0)
835                         goto out_free;
836         } else {
837                 ret = -1;
838                 goto out_free;
839         }
840
841         if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
842                 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
843                                          &open_int, &queues);
844                 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
845                         goto out_free;
846
847         } else
848                 queues = 1;
849
850         if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
851                 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
852                                          &open_int, &client_mode);
853                 if (ret < 0)
854                         goto out_free;
855
856                 if (client_mode)
857                         flags |= RTE_VHOST_USER_CLIENT;
858         }
859
860         eth_dev_vhost_create(name, iface_name, queues, rte_socket_id(), flags);
861
862 out_free:
863         rte_kvargs_free(kvlist);
864         return ret;
865 }
866
867 static int
868 rte_pmd_vhost_devuninit(const char *name)
869 {
870         struct rte_eth_dev *eth_dev = NULL;
871         struct pmd_internal *internal;
872         struct internal_list *list;
873         unsigned int i;
874
875         RTE_LOG(INFO, PMD, "Un-Initializing pmd_vhost for %s\n", name);
876
877         /* find an ethdev entry */
878         eth_dev = rte_eth_dev_allocated(name);
879         if (eth_dev == NULL)
880                 return -ENODEV;
881
882         internal = eth_dev->data->dev_private;
883         if (internal == NULL)
884                 return -ENODEV;
885
886         list = find_internal_resource(internal->iface_name);
887         if (list == NULL)
888                 return -ENODEV;
889
890         pthread_mutex_lock(&internal_list_lock);
891         TAILQ_REMOVE(&internal_list, list, next);
892         pthread_mutex_unlock(&internal_list_lock);
893         rte_free(list);
894
895         eth_dev_stop(eth_dev);
896
897         rte_free(vring_states[eth_dev->data->port_id]);
898         vring_states[eth_dev->data->port_id] = NULL;
899
900         free(internal->dev_name);
901         free(internal->iface_name);
902
903         for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
904                 rte_free(eth_dev->data->rx_queues[i]);
905         for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
906                 rte_free(eth_dev->data->tx_queues[i]);
907
908         rte_free(eth_dev->data->mac_addrs);
909         rte_free(eth_dev->data);
910         rte_free(internal);
911
912         rte_eth_dev_release_port(eth_dev);
913
914         return 0;
915 }
916
917 static struct rte_driver pmd_vhost_drv = {
918         .name = "eth_vhost",
919         .type = PMD_VDEV,
920         .init = rte_pmd_vhost_devinit,
921         .uninit = rte_pmd_vhost_devuninit,
922 };
923
924 PMD_REGISTER_DRIVER(pmd_vhost_drv);