1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
10 * Interface to vhost-user
14 #include <sys/eventfd.h>
16 #include <rte_memory.h>
17 #include <rte_mempool.h>
23 /* These are not C++-aware. */
24 #include <linux/vhost.h>
25 #include <linux/virtio_ring.h>
27 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
28 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
29 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
30 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
33 * Information relating to memory regions including offsets to
34 * addresses in QEMUs memory file.
36 struct rte_vhost_mem_region {
37 uint64_t guest_phys_addr;
38 uint64_t guest_user_addr;
39 uint64_t host_user_addr;
47 * Memory structure includes region and mapping information.
49 struct rte_vhost_memory {
51 struct rte_vhost_mem_region regions[];
54 struct rte_vhost_vring {
55 struct vring_desc *desc;
56 struct vring_avail *avail;
57 struct vring_used *used;
58 uint64_t log_guest_addr;
60 /** Deprecated, use rte_vhost_vring_call() instead. */
68 * Device and vring operations.
70 struct vhost_device_ops {
71 int (*new_device)(int vid); /**< Add device. */
72 void (*destroy_device)(int vid); /**< Remove device. */
74 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
77 * Features could be changed after the feature negotiation.
78 * For example, VHOST_F_LOG_ALL will be set/cleared at the
79 * start/end of live migration, respectively. This callback
80 * is used to inform the application on such change.
82 int (*features_changed)(int vid, uint64_t features);
84 int (*new_connection)(int vid);
85 void (*destroy_connection)(int vid);
87 void *reserved[2]; /**< Reserved for future extension */
91 * Convert guest physical address to host virtual address
93 * This function is deprecated because unsafe.
94 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
95 * guest physical ranges are fully and contiguously mapped into
96 * process virtual address space.
99 * the guest memory regions
101 * the guest physical address for querying
103 * the host virtual address on success, 0 on failure
106 static __rte_always_inline uint64_t
107 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
109 struct rte_vhost_mem_region *reg;
112 for (i = 0; i < mem->nregions; i++) {
113 reg = &mem->regions[i];
114 if (gpa >= reg->guest_phys_addr &&
115 gpa < reg->guest_phys_addr + reg->size) {
116 return gpa - reg->guest_phys_addr +
125 * Convert guest physical address to host virtual address safely
127 * This variant of rte_vhost_gpa_to_vva() takes care all the
128 * requested length is mapped and contiguous in process address
132 * the guest memory regions
134 * the guest physical address for querying
136 * the size of the requested area to map, updated with actual size mapped
138 * the host virtual address on success, 0 on failure
140 static __rte_always_inline uint64_t
141 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
142 uint64_t gpa, uint64_t *len)
144 struct rte_vhost_mem_region *r;
147 for (i = 0; i < mem->nregions; i++) {
148 r = &mem->regions[i];
149 if (gpa >= r->guest_phys_addr &&
150 gpa < r->guest_phys_addr + r->size) {
152 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
153 *len = r->guest_phys_addr + r->size - gpa;
155 return gpa - r->guest_phys_addr +
164 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
167 * Log the memory write start with given address.
169 * This function only need be invoked when the live migration starts.
170 * Therefore, we won't need call it at all in the most of time. For
171 * making the performance impact be minimum, it's suggested to do a
172 * check before calling it:
174 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
175 * rte_vhost_log_write(vid, addr, len);
180 * the starting address for write
182 * the length to write
184 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
187 * Log the used ring update start at given offset.
189 * Same as rte_vhost_log_write, it's suggested to do a check before
192 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
193 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
200 * the offset inside the used ring
202 * the length to write
204 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
205 uint64_t offset, uint64_t len);
207 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
210 * Register vhost driver. path could be different for multiple
213 int rte_vhost_driver_register(const char *path, uint64_t flags);
215 /* Unregister vhost driver. This is only meaningful to vhost user. */
216 int rte_vhost_driver_unregister(const char *path);
219 * Set the feature bits the vhost-user driver supports.
222 * The vhost-user socket file path
226 * 0 on success, -1 on failure
228 int rte_vhost_driver_set_features(const char *path, uint64_t features);
231 * Enable vhost-user driver features.
234 * - the param features should be a subset of the feature bits provided
235 * by rte_vhost_driver_set_features().
236 * - it must be invoked before vhost-user negotiation starts.
239 * The vhost-user socket file path
243 * 0 on success, -1 on failure
245 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
248 * Disable vhost-user driver features.
250 * The two notes at rte_vhost_driver_enable_features() also apply here.
253 * The vhost-user socket file path
255 * Features to disable
257 * 0 on success, -1 on failure
259 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
262 * Get the feature bits before feature negotiation.
265 * The vhost-user socket file path
267 * A pointer to store the queried feature bits
269 * 0 on success, -1 on failure
271 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
274 * Get the feature bits after negotiation
279 * A pointer to store the queried feature bits
281 * 0 on success, -1 on failure
283 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
285 /* Register callbacks. */
286 int rte_vhost_driver_callback_register(const char *path,
287 struct vhost_device_ops const * const ops);
291 * Start the vhost-user driver.
293 * This function triggers the vhost-user negotiation.
296 * The vhost-user socket file path
298 * 0 on success, -1 on failure
300 int rte_vhost_driver_start(const char *path);
303 * Get the MTU value of the device if set in QEMU.
306 * virtio-net device ID
308 * The variable to store the MTU value
312 * -EAGAIN: device not yet started
313 * -ENOTSUP: device does not support MTU feature
315 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
318 * Get the numa node from which the virtio net device's memory
325 * The numa node, -1 on failure
327 int rte_vhost_get_numa_node(int vid);
331 * Get the number of queues the device supports.
333 * Note this function is deprecated, as it returns a queue pair number,
334 * which is vhost specific. Instead, rte_vhost_get_vring_num should
341 * The number of queues, 0 on failure
344 uint32_t rte_vhost_get_queue_num(int vid);
347 * Get the number of vrings the device supports.
353 * The number of vrings, 0 on failure
355 uint16_t rte_vhost_get_vring_num(int vid);
358 * Get the virtio net device's ifname, which is the vhost-user socket
364 * The buffer to stored the queried ifname
369 * 0 on success, -1 on failure
371 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
374 * Get how many avail entries are left in the queue
382 * num of avail entires left
384 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
389 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
390 * be received from the physical port or from another virtual device. A packet
391 * count is returned to indicate the number of packets that were successfully
392 * added to the RX queue.
396 * virtio queue index in mq case
398 * array to contain packets to be enqueued
400 * packets num to be enqueued
402 * num of packets enqueued
404 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
405 struct rte_mbuf **pkts, uint16_t count);
408 * This function gets guest buffers from the virtio device TX virtqueue,
409 * construct host mbufs, copies guest buffer content to host mbufs and
410 * store them in pkts to be processed.
414 * virtio queue index in mq case
416 * mbuf_pool where host mbuf is allocated.
418 * array to contain packets to be dequeued
420 * packets num to be dequeued
422 * num of packets dequeued
424 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
425 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
428 * Get guest mem table: a list of memory regions.
430 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
431 * guest memory regions. Application should free it at destroy_device()
437 * To store the returned mem regions
439 * 0 on success, -1 on failure
441 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
444 * Get guest vring info, including the vring address, vring size, etc.
451 * the structure to hold the requested vring info
453 * 0 on success, -1 on failure
455 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
456 struct rte_vhost_vring *vring);
459 * Notify the guest that used descriptors have been added to the vring. This
460 * function acts as a memory barrier.
467 * 0 on success, -1 on failure
469 int rte_vhost_vring_call(int vid, uint16_t vring_idx);
472 * Get vhost RX queue avail count.
477 * virtio queue index in mq case
479 * num of desc available
481 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
487 #endif /* _RTE_VHOST_H_ */