4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <arpa/inet.h>
36 #include <linux/if_ether.h>
37 #include <linux/if_vlan.h>
38 #include <linux/virtio_net.h>
39 #include <linux/virtio_ring.h>
42 #include <sys/eventfd.h>
43 #include <sys/param.h>
46 #include <rte_atomic.h>
47 #include <rte_cycles.h>
48 #include <rte_ethdev.h>
50 #include <rte_string_fns.h>
51 #include <rte_malloc.h>
52 #include <rte_virtio_net.h>
59 #define MAX_QUEUES 128
62 /* the maximum number of external ports supported */
63 #define MAX_SUP_PORTS 1
65 #define MBUF_CACHE_SIZE 128
66 #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
68 #define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
69 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
71 #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */
72 #define BURST_RX_RETRIES 4 /* Number of retries on RX. */
74 #define JUMBO_FRAME_MAX_SIZE 0x2600
76 /* State of virtio device. */
77 #define DEVICE_MAC_LEARNING 0
79 #define DEVICE_SAFE_REMOVE 2
81 /* Configurable number of RX/TX ring descriptors */
82 #define RTE_TEST_RX_DESC_DEFAULT 1024
83 #define RTE_TEST_TX_DESC_DEFAULT 512
85 #define INVALID_PORT_ID 0xFF
87 /* Max number of devices. Limited by vmdq. */
88 #define MAX_DEVICES 64
90 /* Size of buffers used for snprintfs. */
91 #define MAX_PRINT_BUFF 6072
93 /* Maximum long option length for option parsing. */
94 #define MAX_LONG_OPT_SZ 64
96 /* mask of enabled ports */
97 static uint32_t enabled_port_mask = 0;
99 /* Promiscuous mode */
100 static uint32_t promiscuous;
102 /* number of devices/queues to support*/
103 static uint32_t num_queues = 0;
104 static uint32_t num_devices;
106 static struct rte_mempool *mbuf_pool;
107 static int mergeable;
109 /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
116 static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
119 static uint32_t enable_stats = 0;
120 /* Enable retries on RX. */
121 static uint32_t enable_retry = 1;
123 /* Disable TX checksum offload */
124 static uint32_t enable_tx_csum;
126 /* Disable TSO offload */
127 static uint32_t enable_tso;
129 static int client_mode;
130 static int dequeue_zero_copy;
132 /* Specify timeout (in useconds) between retries on RX. */
133 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
134 /* Specify the number of retries on RX. */
135 static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
137 /* Socket file paths. Can be set by user */
138 static char *socket_files;
139 static int nb_sockets;
141 /* empty vmdq configuration structure. Filled in programatically */
142 static struct rte_eth_conf vmdq_conf_default = {
144 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
146 .header_split = 0, /**< Header Split disabled */
147 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
148 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
150 * It is necessary for 1G NIC such as I350,
151 * this fixes bug of ipv4 forwarding in guest can't
152 * forward pakets from one virtio dev to another virtio dev.
154 .hw_vlan_strip = 1, /**< VLAN strip enabled. */
155 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
156 .hw_strip_crc = 1, /**< CRC stripped by hardware */
160 .mq_mode = ETH_MQ_TX_NONE,
164 * should be overridden separately in code with
168 .nb_queue_pools = ETH_8_POOLS,
169 .enable_default_pool = 0,
172 .pool_map = {{0, 0},},
177 static unsigned lcore_ids[RTE_MAX_LCORE];
178 static uint8_t ports[RTE_MAX_ETHPORTS];
179 static unsigned num_ports = 0; /**< The number of ports specified in command line */
180 static uint16_t num_pf_queues, num_vmdq_queues;
181 static uint16_t vmdq_pool_base, vmdq_queue_base;
182 static uint16_t queues_per_pool;
184 const uint16_t vlan_tags[] = {
185 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
186 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
187 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
188 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
189 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
190 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
191 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
192 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
195 /* ethernet addresses of ports */
196 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
198 static struct vhost_dev_tailq_list vhost_dev_list =
199 TAILQ_HEAD_INITIALIZER(vhost_dev_list);
201 static struct lcore_info lcore_info[RTE_MAX_LCORE];
203 /* Used for queueing bursts of TX packets. */
207 struct rte_mbuf *m_table[MAX_PKT_BURST];
210 /* TX queue for each data core. */
211 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
213 #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
214 / US_PER_S * BURST_TX_DRAIN_US)
218 * Builds up the correct configuration for VMDQ VLAN pool map
219 * according to the pool & queue limits.
222 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
224 struct rte_eth_vmdq_rx_conf conf;
225 struct rte_eth_vmdq_rx_conf *def_conf =
226 &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
229 memset(&conf, 0, sizeof(conf));
230 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
231 conf.nb_pool_maps = num_devices;
232 conf.enable_loop_back = def_conf->enable_loop_back;
233 conf.rx_mode = def_conf->rx_mode;
235 for (i = 0; i < conf.nb_pool_maps; i++) {
236 conf.pool_map[i].vlan_id = vlan_tags[ i ];
237 conf.pool_map[i].pools = (1UL << i);
240 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
241 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
242 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
247 * Validate the device number according to the max pool number gotten form
248 * dev_info. If the device number is invalid, give the error message and
249 * return -1. Each device must have its own pool.
252 validate_num_devices(uint32_t max_nb_devices)
254 if (num_devices > max_nb_devices) {
255 RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n");
262 * Initialises a given port using global settings and with the rx buffers
263 * coming from the mbuf_pool passed as parameter
266 port_init(uint8_t port)
268 struct rte_eth_dev_info dev_info;
269 struct rte_eth_conf port_conf;
270 struct rte_eth_rxconf *rxconf;
271 struct rte_eth_txconf *txconf;
272 int16_t rx_rings, tx_rings;
273 uint16_t rx_ring_size, tx_ring_size;
277 /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
278 rte_eth_dev_info_get (port, &dev_info);
280 rxconf = &dev_info.default_rxconf;
281 txconf = &dev_info.default_txconf;
282 rxconf->rx_drop_en = 1;
284 /* Enable vlan offload */
285 txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL;
287 /*configure the number of supported virtio devices based on VMDQ limits */
288 num_devices = dev_info.max_vmdq_pools;
290 rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
291 tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
294 * When dequeue zero copy is enabled, guest Tx used vring will be
295 * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
296 * (tx_ring_size here) must be small enough so that the driver will
297 * hit the free threshold easily and free mbufs timely. Otherwise,
298 * guest Tx vring would be starved.
300 if (dequeue_zero_copy)
303 tx_rings = (uint16_t)rte_lcore_count();
305 retval = validate_num_devices(MAX_DEVICES);
309 /* Get port configuration. */
310 retval = get_eth_conf(&port_conf, num_devices);
313 /* NIC queues are divided into pf queues and vmdq queues. */
314 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
315 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
316 num_vmdq_queues = num_devices * queues_per_pool;
317 num_queues = num_pf_queues + num_vmdq_queues;
318 vmdq_queue_base = dev_info.vmdq_queue_base;
319 vmdq_pool_base = dev_info.vmdq_pool_base;
320 printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
321 num_pf_queues, num_devices, queues_per_pool);
323 if (port >= rte_eth_dev_count()) return -1;
325 if (enable_tx_csum == 0)
326 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
328 if (enable_tso == 0) {
329 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4);
330 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6);
331 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO4);
332 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO6);
335 rx_rings = (uint16_t)dev_info.max_rx_queues;
336 /* Configure ethernet device. */
337 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
339 RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n",
340 port, strerror(-retval));
344 /* Setup the queues. */
345 for (q = 0; q < rx_rings; q ++) {
346 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
347 rte_eth_dev_socket_id(port),
351 RTE_LOG(ERR, VHOST_PORT,
352 "Failed to setup rx queue %u of port %u: %s.\n",
353 q, port, strerror(-retval));
357 for (q = 0; q < tx_rings; q ++) {
358 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
359 rte_eth_dev_socket_id(port),
362 RTE_LOG(ERR, VHOST_PORT,
363 "Failed to setup tx queue %u of port %u: %s.\n",
364 q, port, strerror(-retval));
369 /* Start the device. */
370 retval = rte_eth_dev_start(port);
372 RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n",
373 port, strerror(-retval));
378 rte_eth_promiscuous_enable(port);
380 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
381 RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
382 RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
383 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
385 vmdq_ports_eth_addr[port].addr_bytes[0],
386 vmdq_ports_eth_addr[port].addr_bytes[1],
387 vmdq_ports_eth_addr[port].addr_bytes[2],
388 vmdq_ports_eth_addr[port].addr_bytes[3],
389 vmdq_ports_eth_addr[port].addr_bytes[4],
390 vmdq_ports_eth_addr[port].addr_bytes[5]);
396 * Set socket file path.
399 us_vhost_parse_socket_path(const char *q_arg)
401 /* parse number string */
402 if (strnlen(q_arg, PATH_MAX) > PATH_MAX)
405 socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
406 snprintf(socket_files + nb_sockets * PATH_MAX, PATH_MAX, "%s", q_arg);
413 * Parse the portmask provided at run time.
416 parse_portmask(const char *portmask)
423 /* parse hexadecimal string */
424 pm = strtoul(portmask, &end, 16);
425 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
436 * Parse num options at run time.
439 parse_num_opt(const char *q_arg, uint32_t max_valid_value)
446 /* parse unsigned int string */
447 num = strtoul(q_arg, &end, 10);
448 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
451 if (num > max_valid_value)
462 us_vhost_usage(const char *prgname)
464 RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
466 " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
467 " --socket-file <path>\n"
469 " -p PORTMASK: Set mask for ports to be used by application\n"
470 " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
471 " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
472 " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
473 " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
474 " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
475 " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
476 " --socket-file: The path of the socket file.\n"
477 " --tx-csum [0|1] disable/enable TX checksum offload.\n"
478 " --tso [0|1] disable/enable TCP segment offload.\n"
479 " --client register a vhost-user socket as client mode.\n"
480 " --dequeue-zero-copy enables dequeue zero copy\n",
485 * Parse the arguments given in the command line of the application.
488 us_vhost_parse_args(int argc, char **argv)
493 const char *prgname = argv[0];
494 static struct option long_option[] = {
495 {"vm2vm", required_argument, NULL, 0},
496 {"rx-retry", required_argument, NULL, 0},
497 {"rx-retry-delay", required_argument, NULL, 0},
498 {"rx-retry-num", required_argument, NULL, 0},
499 {"mergeable", required_argument, NULL, 0},
500 {"stats", required_argument, NULL, 0},
501 {"socket-file", required_argument, NULL, 0},
502 {"tx-csum", required_argument, NULL, 0},
503 {"tso", required_argument, NULL, 0},
504 {"client", no_argument, &client_mode, 1},
505 {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
509 /* Parse command line */
510 while ((opt = getopt_long(argc, argv, "p:P",
511 long_option, &option_index)) != EOF) {
515 enabled_port_mask = parse_portmask(optarg);
516 if (enabled_port_mask == 0) {
517 RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
518 us_vhost_usage(prgname);
525 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
526 ETH_VMDQ_ACCEPT_BROADCAST |
527 ETH_VMDQ_ACCEPT_MULTICAST;
528 rte_vhost_feature_enable(1ULL << VIRTIO_NET_F_CTRL_RX);
533 /* Enable/disable vm2vm comms. */
534 if (!strncmp(long_option[option_index].name, "vm2vm",
536 ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
538 RTE_LOG(INFO, VHOST_CONFIG,
539 "Invalid argument for "
541 us_vhost_usage(prgname);
544 vm2vm_mode = (vm2vm_type)ret;
548 /* Enable/disable retries on RX. */
549 if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) {
550 ret = parse_num_opt(optarg, 1);
552 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
553 us_vhost_usage(prgname);
560 /* Enable/disable TX checksum offload. */
561 if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
562 ret = parse_num_opt(optarg, 1);
564 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
565 us_vhost_usage(prgname);
568 enable_tx_csum = ret;
571 /* Enable/disable TSO offload. */
572 if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
573 ret = parse_num_opt(optarg, 1);
575 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
576 us_vhost_usage(prgname);
582 /* Specify the retries delay time (in useconds) on RX. */
583 if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
584 ret = parse_num_opt(optarg, INT32_MAX);
586 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
587 us_vhost_usage(prgname);
590 burst_rx_delay_time = ret;
594 /* Specify the retries number on RX. */
595 if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) {
596 ret = parse_num_opt(optarg, INT32_MAX);
598 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
599 us_vhost_usage(prgname);
602 burst_rx_retry_num = ret;
606 /* Enable/disable RX mergeable buffers. */
607 if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) {
608 ret = parse_num_opt(optarg, 1);
610 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
611 us_vhost_usage(prgname);
616 vmdq_conf_default.rxmode.jumbo_frame = 1;
617 vmdq_conf_default.rxmode.max_rx_pkt_len
618 = JUMBO_FRAME_MAX_SIZE;
623 /* Enable/disable stats. */
624 if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
625 ret = parse_num_opt(optarg, INT32_MAX);
627 RTE_LOG(INFO, VHOST_CONFIG,
628 "Invalid argument for stats [0..N]\n");
629 us_vhost_usage(prgname);
636 /* Set socket file path. */
637 if (!strncmp(long_option[option_index].name,
638 "socket-file", MAX_LONG_OPT_SZ)) {
639 if (us_vhost_parse_socket_path(optarg) == -1) {
640 RTE_LOG(INFO, VHOST_CONFIG,
641 "Invalid argument for socket name (Max %d characters)\n",
643 us_vhost_usage(prgname);
650 /* Invalid option - print options. */
652 us_vhost_usage(prgname);
657 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
658 if (enabled_port_mask & (1 << i))
659 ports[num_ports++] = (uint8_t)i;
662 if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) {
663 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
664 "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
672 * Update the global var NUM_PORTS and array PORTS according to system ports number
673 * and return valid ports number
675 static unsigned check_ports_num(unsigned nb_ports)
677 unsigned valid_num_ports = num_ports;
680 if (num_ports > nb_ports) {
681 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
682 num_ports, nb_ports);
683 num_ports = nb_ports;
686 for (portid = 0; portid < num_ports; portid ++) {
687 if (ports[portid] >= nb_ports) {
688 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
689 ports[portid], (nb_ports - 1));
690 ports[portid] = INVALID_PORT_ID;
694 return valid_num_ports;
697 static inline struct vhost_dev *__attribute__((always_inline))
698 find_vhost_dev(struct ether_addr *mac)
700 struct vhost_dev *vdev;
702 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
703 if (vdev->ready == DEVICE_RX &&
704 is_same_ether_addr(mac, &vdev->mac_address))
712 * This function learns the MAC address of the device and registers this along with a
713 * vlan tag to a VMDQ.
716 link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
718 struct ether_hdr *pkt_hdr;
721 /* Learn MAC address of guest device from packet */
722 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
724 if (find_vhost_dev(&pkt_hdr->s_addr)) {
725 RTE_LOG(ERR, VHOST_DATA,
726 "(%d) device is using a registered MAC!\n",
731 for (i = 0; i < ETHER_ADDR_LEN; i++)
732 vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
734 /* vlan_tag currently uses the device_id. */
735 vdev->vlan_tag = vlan_tags[vdev->vid];
737 /* Print out VMDQ registration info. */
738 RTE_LOG(INFO, VHOST_DATA,
739 "(%d) mac %02x:%02x:%02x:%02x:%02x:%02x and vlan %d registered\n",
741 vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
742 vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
743 vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
746 /* Register the MAC address. */
747 ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
748 (uint32_t)vdev->vid + vmdq_pool_base);
750 RTE_LOG(ERR, VHOST_DATA,
751 "(%d) failed to add device MAC address to VMDQ\n",
754 rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1);
756 /* Set device as ready for RX. */
757 vdev->ready = DEVICE_RX;
763 * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
764 * queue before disabling RX on the device.
767 unlink_vmdq(struct vhost_dev *vdev)
771 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
773 if (vdev->ready == DEVICE_RX) {
774 /*clear MAC and VLAN settings*/
775 rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
776 for (i = 0; i < 6; i++)
777 vdev->mac_address.addr_bytes[i] = 0;
781 /*Clear out the receive buffers*/
782 rx_count = rte_eth_rx_burst(ports[0],
783 (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
786 for (i = 0; i < rx_count; i++)
787 rte_pktmbuf_free(pkts_burst[i]);
789 rx_count = rte_eth_rx_burst(ports[0],
790 (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
793 vdev->ready = DEVICE_MAC_LEARNING;
797 static inline void __attribute__((always_inline))
798 virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
803 ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
805 rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
806 rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
807 src_vdev->stats.tx_total++;
808 src_vdev->stats.tx += ret;
813 * Check if the packet destination MAC address is for a local device. If so then put
814 * the packet on that devices RX queue. If not then return.
816 static inline int __attribute__((always_inline))
817 virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
819 struct ether_hdr *pkt_hdr;
820 struct vhost_dev *dst_vdev;
822 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
824 dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
828 if (vdev->vid == dst_vdev->vid) {
829 RTE_LOG(DEBUG, VHOST_DATA,
830 "(%d) TX: src and dst MAC is same. Dropping packet.\n",
835 RTE_LOG(DEBUG, VHOST_DATA,
836 "(%d) TX: MAC address is local\n", dst_vdev->vid);
838 if (unlikely(dst_vdev->remove)) {
839 RTE_LOG(DEBUG, VHOST_DATA,
840 "(%d) device is marked for removal\n", dst_vdev->vid);
844 virtio_xmit(dst_vdev, vdev, m);
849 * Check if the destination MAC of a packet is one local VM,
850 * and get its vlan tag, and offset if it is.
852 static inline int __attribute__((always_inline))
853 find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
854 uint32_t *offset, uint16_t *vlan_tag)
856 struct vhost_dev *dst_vdev;
857 struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
859 dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
863 if (vdev->vid == dst_vdev->vid) {
864 RTE_LOG(DEBUG, VHOST_DATA,
865 "(%d) TX: src and dst MAC is same. Dropping packet.\n",
871 * HW vlan strip will reduce the packet length
872 * by minus length of vlan tag, so need restore
873 * the packet length by plus it.
876 *vlan_tag = vlan_tags[vdev->vid];
878 RTE_LOG(DEBUG, VHOST_DATA,
879 "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n",
880 vdev->vid, dst_vdev->vid, *vlan_tag);
886 get_psd_sum(void *l3_hdr, uint64_t ol_flags)
888 if (ol_flags & PKT_TX_IPV4)
889 return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
890 else /* assume ethertype == ETHER_TYPE_IPv6 */
891 return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
894 static void virtio_tx_offload(struct rte_mbuf *m)
897 struct ipv4_hdr *ipv4_hdr = NULL;
898 struct tcp_hdr *tcp_hdr = NULL;
899 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
901 l3_hdr = (char *)eth_hdr + m->l2_len;
903 if (m->ol_flags & PKT_TX_IPV4) {
905 ipv4_hdr->hdr_checksum = 0;
906 m->ol_flags |= PKT_TX_IP_CKSUM;
909 tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
910 tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
914 free_pkts(struct rte_mbuf **pkts, uint16_t n)
917 rte_pktmbuf_free(pkts[n]);
920 static inline void __attribute__((always_inline))
921 do_drain_mbuf_table(struct mbuf_table *tx_q)
925 count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
926 tx_q->m_table, tx_q->len);
927 if (unlikely(count < tx_q->len))
928 free_pkts(&tx_q->m_table[count], tx_q->len - count);
934 * This function routes the TX packet to the correct interface. This
935 * may be a local device or the physical port.
937 static inline void __attribute__((always_inline))
938 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
940 struct mbuf_table *tx_q;
942 const uint16_t lcore_id = rte_lcore_id();
943 struct ether_hdr *nh;
946 nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
947 if (unlikely(is_broadcast_ether_addr(&nh->d_addr))) {
948 struct vhost_dev *vdev2;
950 TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
952 virtio_xmit(vdev2, vdev, m);
957 /*check if destination is local VM*/
958 if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
963 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
964 if (unlikely(find_local_dest(vdev, m, &offset,
971 RTE_LOG(DEBUG, VHOST_DATA,
972 "(%d) TX: MAC address is external\n", vdev->vid);
976 /*Add packet to the port tx queue*/
977 tx_q = &lcore_tx_queue[lcore_id];
979 nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
980 if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
981 /* Guest has inserted the vlan tag. */
982 struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1);
983 uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
984 if ((vm2vm_mode == VM2VM_HARDWARE) &&
985 (vh->vlan_tci != vlan_tag_be))
986 vh->vlan_tci = vlan_tag_be;
988 m->ol_flags |= PKT_TX_VLAN_PKT;
991 * Find the right seg to adjust the data len when offset is
992 * bigger than tail room size.
994 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
995 if (likely(offset <= rte_pktmbuf_tailroom(m)))
996 m->data_len += offset;
998 struct rte_mbuf *seg = m;
1000 while ((seg->next != NULL) &&
1001 (offset > rte_pktmbuf_tailroom(seg)))
1004 seg->data_len += offset;
1006 m->pkt_len += offset;
1009 m->vlan_tci = vlan_tag;
1012 if (m->ol_flags & PKT_TX_TCP_SEG)
1013 virtio_tx_offload(m);
1015 tx_q->m_table[tx_q->len++] = m;
1017 vdev->stats.tx_total++;
1021 if (unlikely(tx_q->len == MAX_PKT_BURST))
1022 do_drain_mbuf_table(tx_q);
1026 static inline void __attribute__((always_inline))
1027 drain_mbuf_table(struct mbuf_table *tx_q)
1029 static uint64_t prev_tsc;
1035 cur_tsc = rte_rdtsc();
1036 if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
1039 RTE_LOG(DEBUG, VHOST_DATA,
1040 "TX queue drained after timeout with burst size %u\n",
1042 do_drain_mbuf_table(tx_q);
1046 static inline void __attribute__((always_inline))
1047 drain_eth_rx(struct vhost_dev *vdev)
1049 uint16_t rx_count, enqueue_count;
1050 struct rte_mbuf *pkts[MAX_PKT_BURST];
1052 rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
1053 pkts, MAX_PKT_BURST);
1058 * When "enable_retry" is set, here we wait and retry when there
1059 * is no enough free slots in the queue to hold @rx_count packets,
1060 * to diminish packet loss.
1063 unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
1067 for (retry = 0; retry < burst_rx_retry_num; retry++) {
1068 rte_delay_us(burst_rx_delay_time);
1069 if (rx_count <= rte_vhost_avail_entries(vdev->vid,
1075 enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
1078 rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
1079 rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
1082 free_pkts(pkts, rx_count);
1085 static inline void __attribute__((always_inline))
1086 drain_virtio_tx(struct vhost_dev *vdev)
1088 struct rte_mbuf *pkts[MAX_PKT_BURST];
1092 count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, mbuf_pool,
1093 pkts, MAX_PKT_BURST);
1095 /* setup VMDq for the first packet */
1096 if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
1097 if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
1098 free_pkts(pkts, count);
1101 for (i = 0; i < count; ++i)
1102 virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
1106 * Main function of vhost-switch. It basically does:
1108 * for each vhost device {
1111 * Which drains the host eth Rx queue linked to the vhost device,
1112 * and deliver all of them to guest virito Rx ring associated with
1113 * this vhost device.
1115 * - drain_virtio_tx()
1117 * Which drains the guest virtio Tx queue and deliver all of them
1118 * to the target, which could be another vhost device, or the
1119 * physical eth dev. The route is done in function "virtio_tx_route".
1123 switch_worker(void *arg __rte_unused)
1126 unsigned lcore_id = rte_lcore_id();
1127 struct vhost_dev *vdev;
1128 struct mbuf_table *tx_q;
1130 RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
1132 tx_q = &lcore_tx_queue[lcore_id];
1133 for (i = 0; i < rte_lcore_count(); i++) {
1134 if (lcore_ids[i] == lcore_id) {
1141 drain_mbuf_table(tx_q);
1144 * Inform the configuration core that we have exited the
1145 * linked list and that no devices are in use if requested.
1147 if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL)
1148 lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
1151 * Process vhost devices
1153 TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
1155 if (unlikely(vdev->remove)) {
1157 vdev->ready = DEVICE_SAFE_REMOVE;
1161 if (likely(vdev->ready == DEVICE_RX))
1164 if (likely(!vdev->remove))
1165 drain_virtio_tx(vdev);
1173 * Remove a device from the specific data core linked list and from the
1174 * main linked list. Synchonization occurs through the use of the
1175 * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1176 * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1179 destroy_device(int vid)
1181 struct vhost_dev *vdev = NULL;
1184 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1185 if (vdev->vid == vid)
1190 /*set the remove flag. */
1192 while(vdev->ready != DEVICE_SAFE_REMOVE) {
1196 TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev,
1198 TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry);
1201 /* Set the dev_removal_flag on each lcore. */
1202 RTE_LCORE_FOREACH_SLAVE(lcore)
1203 lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
1206 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL
1207 * we can be sure that they can no longer access the device removed
1208 * from the linked lists and that the devices are no longer in use.
1210 RTE_LCORE_FOREACH_SLAVE(lcore) {
1211 while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
1215 lcore_info[vdev->coreid].device_num--;
1217 RTE_LOG(INFO, VHOST_DATA,
1218 "(%d) device has been removed from data core\n",
1225 * A new device is added to a data core. First the device is added to the main linked list
1226 * and the allocated to a specific data core.
1231 int lcore, core_add = 0;
1232 uint32_t device_num_min = num_devices;
1233 struct vhost_dev *vdev;
1235 vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
1237 RTE_LOG(INFO, VHOST_DATA,
1238 "(%d) couldn't allocate memory for vhost dev\n",
1244 TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
1245 vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base;
1247 /*reset ready flag*/
1248 vdev->ready = DEVICE_MAC_LEARNING;
1251 /* Find a suitable lcore to add the device. */
1252 RTE_LCORE_FOREACH_SLAVE(lcore) {
1253 if (lcore_info[lcore].device_num < device_num_min) {
1254 device_num_min = lcore_info[lcore].device_num;
1258 vdev->coreid = core_add;
1260 TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev,
1262 lcore_info[vdev->coreid].device_num++;
1264 /* Disable notifications. */
1265 rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
1266 rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
1268 RTE_LOG(INFO, VHOST_DATA,
1269 "(%d) device has been added to data core %d\n",
1276 * These callback allow devices to be added to the data core when configuration
1277 * has been fully complete.
1279 static const struct virtio_net_device_ops virtio_net_device_ops =
1281 .new_device = new_device,
1282 .destroy_device = destroy_device,
1286 * This is a thread will wake up after a period to print stats if the user has
1292 struct vhost_dev *vdev;
1293 uint64_t tx_dropped, rx_dropped;
1294 uint64_t tx, tx_total, rx, rx_total;
1295 const char clr[] = { 27, '[', '2', 'J', '\0' };
1296 const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1299 sleep(enable_stats);
1301 /* Clear screen and move to top left */
1302 printf("%s%s\n", clr, top_left);
1303 printf("Device statistics =================================\n");
1305 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1306 tx_total = vdev->stats.tx_total;
1307 tx = vdev->stats.tx;
1308 tx_dropped = tx_total - tx;
1310 rx_total = rte_atomic64_read(&vdev->stats.rx_total_atomic);
1311 rx = rte_atomic64_read(&vdev->stats.rx_atomic);
1312 rx_dropped = rx_total - rx;
1314 printf("Statistics for device %d\n"
1315 "-----------------------\n"
1316 "TX total: %" PRIu64 "\n"
1317 "TX dropped: %" PRIu64 "\n"
1318 "TX successful: %" PRIu64 "\n"
1319 "RX total: %" PRIu64 "\n"
1320 "RX dropped: %" PRIu64 "\n"
1321 "RX successful: %" PRIu64 "\n",
1323 tx_total, tx_dropped, tx,
1324 rx_total, rx_dropped, rx);
1327 printf("===================================================\n");
1332 unregister_drivers(int socket_num)
1336 for (i = 0; i < socket_num; i++) {
1337 ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX);
1339 RTE_LOG(ERR, VHOST_CONFIG,
1340 "Fail to unregister vhost driver for %s.\n",
1341 socket_files + i * PATH_MAX);
1345 /* When we receive a INT signal, unregister vhost driver */
1347 sigint_handler(__rte_unused int signum)
1349 /* Unregister vhost driver. */
1350 unregister_drivers(nb_sockets);
1356 * While creating an mbuf pool, one key thing is to figure out how
1357 * many mbuf entries is enough for our use. FYI, here are some
1360 * - Each rx queue would reserve @nr_rx_desc mbufs at queue setup stage
1362 * - For each switch core (A CPU core does the packet switch), we need
1363 * also make some reservation for receiving the packets from virtio
1364 * Tx queue. How many is enough depends on the usage. It's normally
1365 * a simple calculation like following:
1367 * MAX_PKT_BURST * max packet size / mbuf size
1369 * So, we definitely need allocate more mbufs when TSO is enabled.
1371 * - Similarly, for each switching core, we should serve @nr_rx_desc
1372 * mbufs for receiving the packets from physical NIC device.
1374 * - We also need make sure, for each switch core, we have allocated
1375 * enough mbufs to fill up the mbuf cache.
1378 create_mbuf_pool(uint16_t nr_port, uint32_t nr_switch_core, uint32_t mbuf_size,
1379 uint32_t nr_queues, uint32_t nr_rx_desc, uint32_t nr_mbuf_cache)
1382 uint32_t nr_mbufs_per_core;
1383 uint32_t mtu = 1500;
1390 nr_mbufs_per_core = (mtu + mbuf_size) * MAX_PKT_BURST /
1391 (mbuf_size - RTE_PKTMBUF_HEADROOM);
1392 nr_mbufs_per_core += nr_rx_desc;
1393 nr_mbufs_per_core = RTE_MAX(nr_mbufs_per_core, nr_mbuf_cache);
1395 nr_mbufs = nr_queues * nr_rx_desc;
1396 nr_mbufs += nr_mbufs_per_core * nr_switch_core;
1397 nr_mbufs *= nr_port;
1399 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", nr_mbufs,
1400 nr_mbuf_cache, 0, mbuf_size,
1402 if (mbuf_pool == NULL)
1403 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1407 * Main function, does initialisation and calls the per-lcore functions.
1410 main(int argc, char *argv[])
1412 unsigned lcore_id, core_id = 0;
1413 unsigned nb_ports, valid_num_ports;
1416 static pthread_t tid;
1417 char thread_name[RTE_MAX_THREAD_NAME_LEN];
1420 signal(SIGINT, sigint_handler);
1423 ret = rte_eal_init(argc, argv);
1425 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1429 /* parse app arguments */
1430 ret = us_vhost_parse_args(argc, argv);
1432 rte_exit(EXIT_FAILURE, "Invalid argument\n");
1434 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1435 TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
1437 if (rte_lcore_is_enabled(lcore_id))
1438 lcore_ids[core_id++] = lcore_id;
1441 if (rte_lcore_count() > RTE_MAX_LCORE)
1442 rte_exit(EXIT_FAILURE,"Not enough cores\n");
1444 /* Get the number of physical ports. */
1445 nb_ports = rte_eth_dev_count();
1448 * Update the global var NUM_PORTS and global array PORTS
1449 * and get value of var VALID_NUM_PORTS according to system ports number
1451 valid_num_ports = check_ports_num(nb_ports);
1453 if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
1454 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
1455 "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1460 * FIXME: here we are trying to allocate mbufs big enough for
1461 * @MAX_QUEUES, but the truth is we're never going to use that
1462 * many queues here. We probably should only do allocation for
1463 * those queues we are going to use.
1465 create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE,
1466 MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE);
1468 if (vm2vm_mode == VM2VM_HARDWARE) {
1469 /* Enable VT loop back to let L2 switch to do it. */
1470 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
1471 RTE_LOG(DEBUG, VHOST_CONFIG,
1472 "Enable loop back for L2 switch in vmdq.\n");
1475 /* initialize all ports */
1476 for (portid = 0; portid < nb_ports; portid++) {
1477 /* skip ports that are not enabled */
1478 if ((enabled_port_mask & (1 << portid)) == 0) {
1479 RTE_LOG(INFO, VHOST_PORT,
1480 "Skipping disabled port %d\n", portid);
1483 if (port_init(portid) != 0)
1484 rte_exit(EXIT_FAILURE,
1485 "Cannot initialize network ports\n");
1488 /* Enable stats if the user option is set. */
1490 ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
1492 rte_exit(EXIT_FAILURE,
1493 "Cannot create print-stats thread\n");
1495 /* Set thread_name for aid in debugging. */
1496 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
1497 ret = rte_thread_setname(tid, thread_name);
1499 RTE_LOG(DEBUG, VHOST_CONFIG,
1500 "Cannot set print-stats name\n");
1503 /* Launch all data cores. */
1504 RTE_LCORE_FOREACH_SLAVE(lcore_id)
1505 rte_eal_remote_launch(switch_worker, NULL, lcore_id);
1508 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
1511 flags |= RTE_VHOST_USER_CLIENT;
1513 if (dequeue_zero_copy)
1514 flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
1516 /* Register vhost user driver to handle vhost messages. */
1517 for (i = 0; i < nb_sockets; i++) {
1518 ret = rte_vhost_driver_register
1519 (socket_files + i * PATH_MAX, flags);
1521 unregister_drivers(i);
1522 rte_exit(EXIT_FAILURE,
1523 "vhost driver register failure.\n");
1527 rte_vhost_driver_callback_register(&virtio_net_device_ops);
1529 rte_vhost_driver_session_start();