New upstream version 17.11.5
[deb_dpdk.git] / examples / vhost / main.c
index eddaf92..82660a6 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
 #include <rte_log.h>
 #include <rte_string_fns.h>
 #include <rte_malloc.h>
-#include <rte_virtio_net.h>
+#include <rte_vhost.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
+#include <rte_pause.h>
 
 #include "main.h"
 
@@ -65,7 +66,6 @@
 #define MBUF_CACHE_SIZE        128
 #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
 
-#define MAX_PKT_BURST 32               /* Max burst size for RX/TX */
 #define BURST_TX_DRAIN_US 100  /* TX drain every ~100us */
 
 #define BURST_RX_WAIT_US 15    /* Defines how long we wait between retries on RX */
@@ -87,9 +87,6 @@
 /* Max number of devices. Limited by vmdq. */
 #define MAX_DEVICES 64
 
-/* Size of buffers used for snprintfs. */
-#define MAX_PRINT_BUFF 6072
-
 /* Maximum long option length for option parsing. */
 #define MAX_LONG_OPT_SZ 64
 
@@ -129,6 +126,8 @@ static uint32_t enable_tso;
 static int client_mode;
 static int dequeue_zero_copy;
 
+static int builtin_net_driver;
+
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -153,7 +152,7 @@ static struct rte_eth_conf vmdq_conf_default = {
                 */
                .hw_vlan_strip  = 1, /**< VLAN strip enabled. */
                .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
-               .hw_strip_crc   = 0, /**< CRC stripped by hardware */
+               .hw_strip_crc   = 1, /**< CRC stripped by hardware */
        },
 
        .txmode = {
@@ -175,7 +174,7 @@ static struct rte_eth_conf vmdq_conf_default = {
 };
 
 static unsigned lcore_ids[RTE_MAX_LCORE];
-static uint8_t ports[RTE_MAX_ETHPORTS];
+static uint16_t ports[RTE_MAX_ETHPORTS];
 static unsigned num_ports = 0; /**< The number of ports specified in command line */
 static uint16_t num_pf_queues, num_vmdq_queues;
 static uint16_t vmdq_pool_base, vmdq_queue_base;
@@ -263,7 +262,7 @@ validate_num_devices(uint32_t max_nb_devices)
  * coming from the mbuf_pool passed as parameter
  */
 static inline int
-port_init(uint8_t port)
+port_init(uint16_t port)
 {
        struct rte_eth_dev_info dev_info;
        struct rte_eth_conf port_conf;
@@ -277,12 +276,6 @@ port_init(uint8_t port)
        /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
        rte_eth_dev_info_get (port, &dev_info);
 
-       if (dev_info.max_rx_queues > MAX_QUEUES) {
-               rte_exit(EXIT_FAILURE,
-                       "please define MAX_QUEUES no less than %u in %s\n",
-                       dev_info.max_rx_queues, __FILE__);
-       }
-
        rxconf = &dev_info.default_rxconf;
        txconf = &dev_info.default_txconf;
        rxconf->rx_drop_en = 1;
@@ -328,16 +321,6 @@ port_init(uint8_t port)
 
        if (port >= rte_eth_dev_count()) return -1;
 
-       if (enable_tx_csum == 0)
-               rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
-
-       if (enable_tso == 0) {
-               rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4);
-               rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6);
-               rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO4);
-               rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO6);
-       }
-
        rx_rings = (uint16_t)dev_info.max_rx_queues;
        /* Configure ethernet device. */
        retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
@@ -347,6 +330,19 @@ port_init(uint8_t port)
                return retval;
        }
 
+       retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size,
+               &tx_ring_size);
+       if (retval != 0) {
+               RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors "
+                       "for port %u: %s.\n", port, strerror(-retval));
+               return retval;
+       }
+       if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) {
+               RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size "
+                       "for Rx queues on port %u.\n", port);
+               return -1;
+       }
+
        /* Setup the queues. */
        for (q = 0; q < rx_rings; q ++) {
                retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
@@ -387,7 +383,7 @@ port_init(uint8_t port)
        RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
        RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
                        " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
-                       (unsigned)port,
+                       port,
                        vmdq_ports_eth_addr[port].addr_bytes[0],
                        vmdq_ports_eth_addr[port].addr_bytes[1],
                        vmdq_ports_eth_addr[port].addr_bytes[2],
@@ -405,7 +401,7 @@ static int
 us_vhost_parse_socket_path(const char *q_arg)
 {
        /* parse number string */
-       if (strnlen(q_arg, PATH_MAX) > PATH_MAX)
+       if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
                return -1;
 
        socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
@@ -509,6 +505,7 @@ us_vhost_parse_args(int argc, char **argv)
                {"tso", required_argument, NULL, 0},
                {"client", no_argument, &client_mode, 1},
                {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
+               {"builtin-net-driver", no_argument, &builtin_net_driver, 1},
                {NULL, 0, 0, 0},
        };
 
@@ -531,7 +528,6 @@ us_vhost_parse_args(int argc, char **argv)
                        vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
                                ETH_VMDQ_ACCEPT_BROADCAST |
                                ETH_VMDQ_ACCEPT_MULTICAST;
-                       rte_vhost_feature_enable(1ULL << VIRTIO_NET_F_CTRL_RX);
 
                        break;
 
@@ -662,7 +658,7 @@ us_vhost_parse_args(int argc, char **argv)
 
        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
                if (enabled_port_mask & (1 << i))
-                       ports[num_ports++] = (uint8_t)i;
+                       ports[num_ports++] = i;
        }
 
        if ((num_ports ==  0) || (num_ports > MAX_SUP_PORTS)) {
@@ -700,7 +696,7 @@ static unsigned check_ports_num(unsigned nb_ports)
        return valid_num_ports;
 }
 
-static inline struct vhost_dev *__attribute__((always_inline))
+static __rte_always_inline struct vhost_dev *
 find_vhost_dev(struct ether_addr *mac)
 {
        struct vhost_dev *vdev;
@@ -800,13 +796,18 @@ unlink_vmdq(struct vhost_dev *vdev)
        }
 }
 
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
 virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
            struct rte_mbuf *m)
 {
        uint16_t ret;
 
-       ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
+       if (builtin_net_driver) {
+               ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+       } else {
+               ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
+       }
+
        if (enable_stats) {
                rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
                rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
@@ -819,7 +820,7 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
  */
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
 virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
        struct ether_hdr *pkt_hdr;
@@ -832,17 +833,17 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
                return -1;
 
        if (vdev->vid == dst_vdev->vid) {
-               RTE_LOG(DEBUG, VHOST_DATA,
+               RTE_LOG_DP(DEBUG, VHOST_DATA,
                        "(%d) TX: src and dst MAC is same. Dropping packet.\n",
                        vdev->vid);
                return 0;
        }
 
-       RTE_LOG(DEBUG, VHOST_DATA,
+       RTE_LOG_DP(DEBUG, VHOST_DATA,
                "(%d) TX: MAC address is local\n", dst_vdev->vid);
 
        if (unlikely(dst_vdev->remove)) {
-               RTE_LOG(DEBUG, VHOST_DATA,
+               RTE_LOG_DP(DEBUG, VHOST_DATA,
                        "(%d) device is marked for removal\n", dst_vdev->vid);
                return 0;
        }
@@ -855,7 +856,7 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
  * Check if the destination MAC of a packet is one local VM,
  * and get its vlan tag, and offset if it is.
  */
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
 find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
        uint32_t *offset, uint16_t *vlan_tag)
 {
@@ -867,7 +868,7 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
                return 0;
 
        if (vdev->vid == dst_vdev->vid) {
-               RTE_LOG(DEBUG, VHOST_DATA,
+               RTE_LOG_DP(DEBUG, VHOST_DATA,
                        "(%d) TX: src and dst MAC is same. Dropping packet.\n",
                        vdev->vid);
                return -1;
@@ -881,7 +882,7 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
        *offset  = VLAN_HLEN;
        *vlan_tag = vlan_tags[vdev->vid];
 
-       RTE_LOG(DEBUG, VHOST_DATA,
+       RTE_LOG_DP(DEBUG, VHOST_DATA,
                "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n",
                vdev->vid, dst_vdev->vid, *vlan_tag);
 
@@ -923,7 +924,7 @@ free_pkts(struct rte_mbuf **pkts, uint16_t n)
                rte_pktmbuf_free(pkts[n]);
 }
 
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
        uint16_t count;
@@ -940,7 +941,7 @@ do_drain_mbuf_table(struct mbuf_table *tx_q)
  * This function routes the TX packet to the correct interface. This
  * may be a local device or the physical port.
  */
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 {
        struct mbuf_table *tx_q;
@@ -954,7 +955,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
                struct vhost_dev *vdev2;
 
                TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
-                       virtio_xmit(vdev2, vdev, m);
+                       if (vdev2 != vdev)
+                               virtio_xmit(vdev2, vdev, m);
                }
                goto queue2nic;
        }
@@ -973,7 +975,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
                }
        }
 
-       RTE_LOG(DEBUG, VHOST_DATA,
+       RTE_LOG_DP(DEBUG, VHOST_DATA,
                "(%d) TX: MAC address is external\n", vdev->vid);
 
 queue2nic:
@@ -1028,7 +1030,7 @@ queue2nic:
 }
 
 
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
 drain_mbuf_table(struct mbuf_table *tx_q)
 {
        static uint64_t prev_tsc;
@@ -1041,14 +1043,14 @@ drain_mbuf_table(struct mbuf_table *tx_q)
        if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
                prev_tsc = cur_tsc;
 
-               RTE_LOG(DEBUG, VHOST_DATA,
+               RTE_LOG_DP(DEBUG, VHOST_DATA,
                        "TX queue drained after timeout with burst size %u\n",
                        tx_q->len);
                do_drain_mbuf_table(tx_q);
        }
 }
 
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
        uint16_t rx_count, enqueue_count;
@@ -1077,8 +1079,13 @@ drain_eth_rx(struct vhost_dev *vdev)
                }
        }
 
-       enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+       if (builtin_net_driver) {
+               enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
                                                pkts, rx_count);
+       } else {
+               enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+                                               pkts, rx_count);
+       }
        if (enable_stats) {
                rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
                rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
@@ -1087,15 +1094,20 @@ drain_eth_rx(struct vhost_dev *vdev)
        free_pkts(pkts, rx_count);
 }
 
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
 drain_virtio_tx(struct vhost_dev *vdev)
 {
        struct rte_mbuf *pkts[MAX_PKT_BURST];
        uint16_t count;
        uint16_t i;
 
-       count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, mbuf_pool,
+       if (builtin_net_driver) {
+               count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
                                        pkts, MAX_PKT_BURST);
+       } else {
+               count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
+                                       mbuf_pool, pkts, MAX_PKT_BURST);
+       }
 
        /* setup VMDq for the first packet */
        if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
@@ -1198,6 +1210,9 @@ destroy_device(int vid)
                rte_pause();
        }
 
+       if (builtin_net_driver)
+               vs_vhost_net_remove(vdev);
+
        TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev,
                     lcore_vdev_entry);
        TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry);
@@ -1246,6 +1261,9 @@ new_device(int vid)
        }
        vdev->vid = vid;
 
+       if (builtin_net_driver)
+               vs_vhost_net_setup(vdev);
+
        TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
        vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base;
 
@@ -1281,7 +1299,7 @@ new_device(int vid)
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
  */
-static const struct virtio_net_device_ops virtio_net_device_ops =
+static const struct vhost_device_ops virtio_net_device_ops =
 {
        .new_device =  new_device,
        .destroy_device = destroy_device,
@@ -1417,7 +1435,7 @@ main(int argc, char *argv[])
        unsigned lcore_id, core_id = 0;
        unsigned nb_ports, valid_num_ports;
        int ret, i;
-       uint8_t portid;
+       uint16_t portid;
        static pthread_t tid;
        char thread_name[RTE_MAX_THREAD_NAME_LEN];
        uint64_t flags = 0;
@@ -1509,9 +1527,6 @@ main(int argc, char *argv[])
        RTE_LCORE_FOREACH_SLAVE(lcore_id)
                rte_eal_remote_launch(switch_worker, NULL, lcore_id);
 
-       if (mergeable == 0)
-               rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
-
        if (client_mode)
                flags |= RTE_VHOST_USER_CLIENT;
 
@@ -1520,18 +1535,59 @@ main(int argc, char *argv[])
 
        /* Register vhost user driver to handle vhost messages. */
        for (i = 0; i < nb_sockets; i++) {
-               ret = rte_vhost_driver_register
-                               (socket_files + i * PATH_MAX, flags);
+               char *file = socket_files + i * PATH_MAX;
+               ret = rte_vhost_driver_register(file, flags);
                if (ret != 0) {
                        unregister_drivers(i);
                        rte_exit(EXIT_FAILURE,
                                "vhost driver register failure.\n");
                }
+
+               if (builtin_net_driver)
+                       rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES);
+
+               if (mergeable == 0) {
+                       rte_vhost_driver_disable_features(file,
+                               1ULL << VIRTIO_NET_F_MRG_RXBUF);
+               }
+
+               if (enable_tx_csum == 0) {
+                       rte_vhost_driver_disable_features(file,
+                               1ULL << VIRTIO_NET_F_CSUM);
+               }
+
+               if (enable_tso == 0) {
+                       rte_vhost_driver_disable_features(file,
+                               1ULL << VIRTIO_NET_F_HOST_TSO4);
+                       rte_vhost_driver_disable_features(file,
+                               1ULL << VIRTIO_NET_F_HOST_TSO6);
+                       rte_vhost_driver_disable_features(file,
+                               1ULL << VIRTIO_NET_F_GUEST_TSO4);
+                       rte_vhost_driver_disable_features(file,
+                               1ULL << VIRTIO_NET_F_GUEST_TSO6);
+               }
+
+               if (promiscuous) {
+                       rte_vhost_driver_enable_features(file,
+                               1ULL << VIRTIO_NET_F_CTRL_RX);
+               }
+
+               ret = rte_vhost_driver_callback_register(file,
+                       &virtio_net_device_ops);
+               if (ret != 0) {
+                       rte_exit(EXIT_FAILURE,
+                               "failed to register vhost driver callbacks.\n");
+               }
+
+               if (rte_vhost_driver_start(file) < 0) {
+                       rte_exit(EXIT_FAILURE,
+                               "failed to start vhost driver.\n");
+               }
        }
 
-       rte_vhost_driver_callback_register(&virtio_net_device_ops);
+       RTE_LCORE_FOREACH_SLAVE(lcore_id)
+               rte_eal_wait_lcore(lcore_id);
 
-       rte_vhost_driver_session_start();
        return 0;
 
 }