4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * Interface to vhost-user
43 #include <sys/eventfd.h>
45 #include <rte_memory.h>
46 #include <rte_mempool.h>
52 /* These are not C++-aware. */
53 #include <linux/vhost.h>
54 #include <linux/virtio_ring.h>
56 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
57 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
58 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
59 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
62 * Information relating to memory regions including offsets to
63 * addresses in QEMUs memory file.
65 struct rte_vhost_mem_region {
66 uint64_t guest_phys_addr;
67 uint64_t guest_user_addr;
68 uint64_t host_user_addr;
76 * Memory structure includes region and mapping information.
78 struct rte_vhost_memory {
80 struct rte_vhost_mem_region regions[];
83 struct rte_vhost_vring {
84 struct vring_desc *desc;
85 struct vring_avail *avail;
86 struct vring_used *used;
87 uint64_t log_guest_addr;
95 * Device and vring operations.
97 struct vhost_device_ops {
98 int (*new_device)(int vid); /**< Add device. */
99 void (*destroy_device)(int vid); /**< Remove device. */
101 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
104 * Features could be changed after the feature negotiation.
105 * For example, VHOST_F_LOG_ALL will be set/cleared at the
106 * start/end of live migration, respectively. This callback
107 * is used to inform the application on such change.
109 int (*features_changed)(int vid, uint64_t features);
111 int (*new_connection)(int vid);
112 void (*destroy_connection)(int vid);
114 void *reserved[2]; /**< Reserved for future extension */
118 * Convert guest physical address to host virtual address
120 * This function is deprecated because unsafe.
121 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
122 * guest physical ranges are fully and contiguously mapped into
123 * process virtual address space.
126 * the guest memory regions
128 * the guest physical address for querying
130 * the host virtual address on success, 0 on failure
133 static __rte_always_inline uint64_t
134 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
136 struct rte_vhost_mem_region *reg;
139 for (i = 0; i < mem->nregions; i++) {
140 reg = &mem->regions[i];
141 if (gpa >= reg->guest_phys_addr &&
142 gpa < reg->guest_phys_addr + reg->size) {
143 return gpa - reg->guest_phys_addr +
152 * Convert guest physical address to host virtual address safely
154 * This variant of rte_vhost_gpa_to_vva() takes care all the
155 * requested length is mapped and contiguous in process address
159 * the guest memory regions
161 * the guest physical address for querying
163 * the size of the requested area to map, updated with actual size mapped
165 * the host virtual address on success, 0 on failure
167 static __rte_always_inline uint64_t
168 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
169 uint64_t gpa, uint64_t *len)
171 struct rte_vhost_mem_region *r;
174 for (i = 0; i < mem->nregions; i++) {
175 r = &mem->regions[i];
176 if (gpa >= r->guest_phys_addr &&
177 gpa < r->guest_phys_addr + r->size) {
179 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
180 *len = r->guest_phys_addr + r->size - gpa;
182 return gpa - r->guest_phys_addr +
191 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
194 * Log the memory write start with given address.
196 * This function only need be invoked when the live migration starts.
197 * Therefore, we won't need call it at all in the most of time. For
198 * making the performance impact be minimum, it's suggested to do a
199 * check before calling it:
201 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
202 * rte_vhost_log_write(vid, addr, len);
207 * the starting address for write
209 * the length to write
211 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
214 * Log the used ring update start at given offset.
216 * Same as rte_vhost_log_write, it's suggested to do a check before
219 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
220 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
227 * the offset inside the used ring
229 * the length to write
231 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
232 uint64_t offset, uint64_t len);
234 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
237 * Register vhost driver. path could be different for multiple
240 int rte_vhost_driver_register(const char *path, uint64_t flags);
242 /* Unregister vhost driver. This is only meaningful to vhost user. */
243 int rte_vhost_driver_unregister(const char *path);
246 * Set the feature bits the vhost-user driver supports.
249 * The vhost-user socket file path
253 * 0 on success, -1 on failure
255 int rte_vhost_driver_set_features(const char *path, uint64_t features);
258 * Enable vhost-user driver features.
261 * - the param features should be a subset of the feature bits provided
262 * by rte_vhost_driver_set_features().
263 * - it must be invoked before vhost-user negotiation starts.
266 * The vhost-user socket file path
270 * 0 on success, -1 on failure
272 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
275 * Disable vhost-user driver features.
277 * The two notes at rte_vhost_driver_enable_features() also apply here.
280 * The vhost-user socket file path
282 * Features to disable
284 * 0 on success, -1 on failure
286 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
289 * Get the feature bits before feature negotiation.
292 * The vhost-user socket file path
294 * A pointer to store the queried feature bits
296 * 0 on success, -1 on failure
298 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
301 * Get the feature bits after negotiation
306 * A pointer to store the queried feature bits
308 * 0 on success, -1 on failure
310 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
312 /* Register callbacks. */
313 int rte_vhost_driver_callback_register(const char *path,
314 struct vhost_device_ops const * const ops);
318 * Start the vhost-user driver.
320 * This function triggers the vhost-user negotiation.
323 * The vhost-user socket file path
325 * 0 on success, -1 on failure
327 int rte_vhost_driver_start(const char *path);
330 * Get the MTU value of the device if set in QEMU.
333 * virtio-net device ID
335 * The variable to store the MTU value
339 * -EAGAIN: device not yet started
340 * -ENOTSUP: device does not support MTU feature
342 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
345 * Get the numa node from which the virtio net device's memory
352 * The numa node, -1 on failure
354 int rte_vhost_get_numa_node(int vid);
358 * Get the number of queues the device supports.
360 * Note this function is deprecated, as it returns a queue pair number,
361 * which is vhost specific. Instead, rte_vhost_get_vring_num should
368 * The number of queues, 0 on failure
371 uint32_t rte_vhost_get_queue_num(int vid);
374 * Get the number of vrings the device supports.
380 * The number of vrings, 0 on failure
382 uint16_t rte_vhost_get_vring_num(int vid);
385 * Get the virtio net device's ifname, which is the vhost-user socket
391 * The buffer to stored the queried ifname
396 * 0 on success, -1 on failure
398 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
401 * Get how many avail entries are left in the queue
409 * num of avail entires left
411 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
416 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
417 * be received from the physical port or from another virtual device. A packet
418 * count is returned to indicate the number of packets that were successfully
419 * added to the RX queue.
423 * virtio queue index in mq case
425 * array to contain packets to be enqueued
427 * packets num to be enqueued
429 * num of packets enqueued
431 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
432 struct rte_mbuf **pkts, uint16_t count);
435 * This function gets guest buffers from the virtio device TX virtqueue,
436 * construct host mbufs, copies guest buffer content to host mbufs and
437 * store them in pkts to be processed.
441 * virtio queue index in mq case
443 * mbuf_pool where host mbuf is allocated.
445 * array to contain packets to be dequeued
447 * packets num to be dequeued
449 * num of packets dequeued
451 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
452 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
455 * Get guest mem table: a list of memory regions.
457 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
458 * guest memory regions. Application should free it at destroy_device()
464 * To store the returned mem regions
466 * 0 on success, -1 on failure
468 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
471 * Get guest vring info, including the vring address, vring size, etc.
478 * the structure to hold the requested vring info
480 * 0 on success, -1 on failure
482 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
483 struct rte_vhost_vring *vring);
486 * Get vhost RX queue avail count.
491 * virtio queue index in mq case
493 * num of desc available
495 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
501 #endif /* _RTE_VHOST_H_ */