2 *------------------------------------------------------------------
5 * Copyright (c) 2014 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
20 #include <fcntl.h> /* for open */
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
25 #include <sys/types.h>
26 #include <sys/uio.h> /* for iovec */
27 #include <netinet/in.h>
30 #include <linux/if_arp.h>
31 #include <linux/if_tun.h>
33 #include <vlib/vlib.h>
34 #include <vlib/unix/unix.h>
36 #include <vnet/ip/ip.h>
38 #include <vnet/ethernet/ethernet.h>
39 #include <vnet/devices/devices.h>
40 #include <vnet/feature/feature.h>
42 #include <vnet/devices/virtio/vhost-user.h>
46 * @brief vHost User Device Driver.
48 * This file contains the source code for vHost User interface.
52 #define VHOST_USER_DEBUG_SOCKET 0
53 #define VHOST_DEBUG_VQ 0
55 #if VHOST_USER_DEBUG_SOCKET == 1
56 #define DBG_SOCK(args...) clib_warning(args);
58 #define DBG_SOCK(args...)
61 #if VHOST_DEBUG_VQ == 1
62 #define DBG_VQ(args...) clib_warning(args);
64 #define DBG_VQ(args...)
67 #define UNIX_GET_FD(unixfd_idx) \
68 (unixfd_idx != ~0) ? \
69 pool_elt_at_index (unix_main.file_pool, \
70 unixfd_idx)->file_descriptor : -1;
72 #define foreach_virtio_trace_flags \
73 _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
74 _ (SINGLE_DESC, 1, "Single descriptor packet") \
75 _ (INDIRECT, 2, "Indirect descriptor") \
76 _ (MAP_ERROR, 4, "Memory mapping error")
80 #define _(n,i,s) VIRTIO_TRACE_F_##n,
81 foreach_virtio_trace_flags
83 } virtio_trace_flag_t;
85 vlib_node_registration_t vhost_user_input_node;
87 #define foreach_vhost_user_tx_func_error \
89 _(NOT_READY, "vhost user state error") \
90 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
91 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
92 _(MMAP_FAIL, "mmap failure") \
93 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
97 #define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
98 foreach_vhost_user_tx_func_error
100 VHOST_USER_TX_FUNC_N_ERROR,
101 } vhost_user_tx_func_error_t;
103 static char *vhost_user_tx_func_error_strings[] = {
105 foreach_vhost_user_tx_func_error
109 #define foreach_vhost_user_input_func_error \
110 _(NO_ERROR, "no error") \
111 _(NO_BUFFER, "no available buffer") \
112 _(MMAP_FAIL, "mmap failure") \
113 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
114 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
115 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
119 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
120 foreach_vhost_user_input_func_error
122 VHOST_USER_INPUT_FUNC_N_ERROR,
123 } vhost_user_input_func_error_t;
125 static char *vhost_user_input_func_error_strings[] = {
127 foreach_vhost_user_input_func_error
132 static vhost_user_main_t vhost_user_main = {
136 VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
137 .name = "vhost-user",
142 format_vhost_user_interface_name (u8 * s, va_list * args)
144 u32 i = va_arg (*args, u32);
145 u32 show_dev_instance = ~0;
146 vhost_user_main_t *vum = &vhost_user_main;
148 if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
149 show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
151 if (show_dev_instance != ~0)
152 i = show_dev_instance;
154 s = format (s, "VirtualEthernet0/0/%d", i);
159 vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
161 // FIXME: check if the new dev instance is already used
162 vhost_user_main_t *vum = &vhost_user_main;
163 vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
164 hi->dev_instance, ~0);
166 vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
169 DBG_SOCK ("renumbered vhost-user interface dev_instance %d to %d",
170 hi->dev_instance, new_dev_instance);
175 static_always_inline void *
176 map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
179 if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
180 ((vui->regions[i].guest_phys_addr +
181 vui->regions[i].memory_size) > addr)))
183 return (void *) (vui->region_mmap_addr[i] + addr -
184 vui->regions[i].guest_phys_addr);
187 __m128i rl, rh, al, ah, r;
188 al = _mm_set1_epi64x (addr + 1);
189 ah = _mm_set1_epi64x (addr);
191 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
192 rl = _mm_cmpgt_epi64 (al, rl);
193 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
194 rh = _mm_cmpgt_epi64 (rh, ah);
195 r = _mm_and_si128 (rl, rh);
197 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
198 rl = _mm_cmpgt_epi64 (al, rl);
199 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
200 rh = _mm_cmpgt_epi64 (rh, ah);
201 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
203 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
204 rl = _mm_cmpgt_epi64 (al, rl);
205 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
206 rh = _mm_cmpgt_epi64 (rh, ah);
207 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
209 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
210 rl = _mm_cmpgt_epi64 (al, rl);
211 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
212 rh = _mm_cmpgt_epi64 (rh, ah);
213 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
215 r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
216 i = __builtin_ctzll (_mm_movemask_epi8 (r));
218 if (i < vui->nregions)
221 return (void *) (vui->region_mmap_addr[i] + addr -
222 vui->regions[i].guest_phys_addr);
226 for (i = 0; i < vui->nregions; i++)
228 if ((vui->regions[i].guest_phys_addr <= addr) &&
229 ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
233 return (void *) (vui->region_mmap_addr[i] + addr -
234 vui->regions[i].guest_phys_addr);
238 DBG_VQ ("failed to map guest mem addr %llx", addr);
244 map_user_mem (vhost_user_intf_t * vui, uword addr)
247 for (i = 0; i < vui->nregions; i++)
249 if ((vui->regions[i].userspace_addr <= addr) &&
250 ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
253 return (void *) (vui->region_mmap_addr[i] + addr -
254 vui->regions[i].userspace_addr);
261 get_huge_page_size (int fd)
269 unmap_all_mem_regions (vhost_user_intf_t * vui)
272 for (i = 0; i < vui->nregions; i++)
274 if (vui->region_mmap_addr[i] != (void *) -1)
277 long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
279 ssize_t map_sz = (vui->regions[i].memory_size +
280 vui->regions[i].mmap_offset +
281 page_sz) & ~(page_sz - 1);
284 munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
288 ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
289 vui->region_mmap_addr[i], map_sz, page_sz);
291 vui->region_mmap_addr[i] = (void *) -1;
295 clib_warning ("failed to unmap memory region (errno %d)",
298 close (vui->region_mmap_fd[i]);
305 vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
307 //Let's try to assign one queue to each thread
310 vui->use_tx_spinlock = 0;
313 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
315 vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
316 if (!rxvq->started || !rxvq->enabled)
319 vui->per_cpu_tx_qid[cpu_index] = qid;
321 if (cpu_index == vlib_get_thread_main ()->n_vlib_mains)
324 //We need to loop, meaning the spinlock has to be used
325 vui->use_tx_spinlock = 1;
328 //Could not find a single valid one
330 cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++)
332 vui->per_cpu_tx_qid[cpu_index] = 0;
340 vhost_user_rx_thread_placement ()
342 vhost_user_main_t *vum = &vhost_user_main;
343 vhost_user_intf_t *vui;
347 //Let's list all workers cpu indexes
349 for (i = vum->input_cpu_first_index;
350 i < vum->input_cpu_first_index + vum->input_cpu_count; i++)
352 vlib_node_set_state (vlib_mains ? vlib_mains[i] : &vlib_global_main,
353 vhost_user_input_node.index,
354 VLIB_NODE_STATE_DISABLED);
355 vec_add1 (workers, i);
358 vec_foreach (vhc, vum->cpus)
360 vec_reset_length (vhc->rx_queues);
364 vhost_iface_and_queue_t iaq;
366 pool_foreach (vui, vum->vhost_user_interfaces, {
367 u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers;
369 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
371 vhost_user_vring_t *txvq =
372 &vui->vrings[VHOST_VRING_IDX_TX (qid)];
376 i %= vec_len (vui_workers);
377 u32 cpu_index = vui_workers[i];
379 vhc = &vum->cpus[cpu_index];
382 iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
383 vec_add1 (vhc->rx_queues, iaq);
384 vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] :
385 &vlib_global_main, vhost_user_input_node.index,
386 VLIB_NODE_STATE_POLLING);
393 vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del)
395 vhost_user_main_t *vum = &vhost_user_main;
396 vhost_user_intf_t *vui;
397 vnet_hw_interface_t *hw;
399 if (worker_thread_index < vum->input_cpu_first_index ||
400 worker_thread_index >=
401 vum->input_cpu_first_index + vum->input_cpu_count)
404 if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index)))
407 vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance);
409 vec_foreach (w, vui->workers)
411 if (*w == worker_thread_index)
413 found = w - vui->workers;
422 vec_del1 (vui->workers, found);
424 else if (found == ~0)
426 vec_add1 (vui->workers, worker_thread_index);
429 vhost_user_rx_thread_placement ();
433 /** @brief Returns whether at least one TX and one RX vring are enabled */
435 vhost_user_intf_ready (vhost_user_intf_t * vui)
437 int i, found[2] = { }; //RX + TX
439 for (i = 0; i < VHOST_VRING_MAX_N; i++)
440 if (vui->vrings[i].started && vui->vrings[i].enabled)
443 return found[0] && found[1];
447 vhost_user_update_iface_state (vhost_user_intf_t * vui)
449 /* if we have pointers to descriptor table, go up */
450 int is_up = vhost_user_intf_ready (vui);
451 if (is_up != vui->is_up)
453 DBG_SOCK ("interface %d %s", vui->sw_if_index,
454 is_up ? "ready" : "down");
455 vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
456 is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP :
460 vhost_user_rx_thread_placement ();
461 vhost_user_tx_thread_placement (vui);
464 static clib_error_t *
465 vhost_user_callfd_read_ready (unix_file_t * uf)
467 __attribute__ ((unused)) int n;
469 n = read (uf->file_descriptor, ((char *) &buff), 8);
473 static clib_error_t *
474 vhost_user_kickfd_read_ready (unix_file_t * uf)
476 __attribute__ ((unused)) int n;
478 vhost_user_intf_t *vui =
479 pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
480 uf->private_data >> 8);
481 u32 qid = uf->private_data & 0xff;
482 n = read (uf->file_descriptor, ((char *) &buff), 8);
483 DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
485 vlib_worker_thread_barrier_sync (vlib_get_main ());
486 vui->vrings[qid].started = 1;
487 vhost_user_update_iface_state (vui);
488 vlib_worker_thread_barrier_release (vlib_get_main ());
493 * @brief Try once to lock the vring
494 * @return 0 on success, non-zero on failure.
497 vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid)
499 return __sync_lock_test_and_set (vui->vring_locks[qid], 1);
503 * @brief Spin until the vring is successfully locked
506 vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
508 while (vhost_user_vring_try_lock (vui, qid))
513 * @brief Unlock the vring lock
516 vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
518 *vui->vring_locks[qid] = 0;
522 vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
524 vhost_user_vring_t *vring = &vui->vrings[qid];
525 memset (vring, 0, sizeof (*vring));
526 vring->kickfd_idx = ~0;
527 vring->callfd_idx = ~0;
531 * We have a bug with some qemu 2.5, and this may be a fix.
532 * Feel like interpretation holy text, but this is from vhost-user.txt.
534 * One queue pair is enabled initially. More queues are enabled
535 * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
537 * Don't know who's right, but this is what DPDK does.
539 if (qid == 0 || qid == 1)
544 vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
546 vhost_user_vring_t *vring = &vui->vrings[qid];
547 if (vring->kickfd_idx != ~0)
549 unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
551 unix_file_del (&unix_main, uf);
552 vring->kickfd_idx = ~0;
554 if (vring->callfd_idx != ~0)
556 unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
558 unix_file_del (&unix_main, uf);
559 vring->callfd_idx = ~0;
561 if (vring->errfd != -1)
562 close (vring->errfd);
563 vhost_user_vring_init (vui, qid);
567 vhost_user_if_disconnect (vhost_user_intf_t * vui)
569 vnet_main_t *vnm = vnet_get_main ();
572 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
574 if (vui->unix_file_index != ~0)
576 unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index);
577 vui->unix_file_index = ~0;
582 for (q = 0; q < VHOST_VRING_MAX_N; q++)
583 vhost_user_vring_close (vui, q);
585 unmap_all_mem_regions (vui);
586 DBG_SOCK ("interface ifindex %d disconnected", vui->sw_if_index);
589 #define VHOST_LOG_PAGE 0x1000
591 vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len)
593 if (PREDICT_TRUE (vui->log_base_addr == 0
594 || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL))))
598 if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
600 DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n");
604 CLIB_MEMORY_BARRIER ();
605 u64 page = addr / VHOST_LOG_PAGE;
606 while (page * VHOST_LOG_PAGE < addr + len)
608 ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
613 #define vhost_user_log_dirty_ring(vui, vq, member) \
614 if (PREDICT_FALSE(vq->log_used)) { \
615 vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
616 sizeof(vq->used->member)); \
619 static clib_error_t *
620 vhost_user_socket_read (unix_file_t * uf)
623 int fd, number_of_fds = 0;
624 int fds[VHOST_MEMORY_MAX_NREGIONS];
625 vhost_user_msg_t msg;
628 vhost_user_main_t *vum = &vhost_user_main;
629 vhost_user_intf_t *vui;
630 struct cmsghdr *cmsg;
632 unix_file_t template = { 0 };
633 vnet_main_t *vnm = vnet_get_main ();
635 vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
637 char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
639 memset (&mh, 0, sizeof (mh));
640 memset (control, 0, sizeof (control));
642 for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
645 /* set the payload */
646 iov[0].iov_base = (void *) &msg;
647 iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
651 mh.msg_control = control;
652 mh.msg_controllen = sizeof (control);
654 n = recvmsg (uf->file_descriptor, &mh, 0);
656 /* Stop workers to avoid end of the world */
657 vlib_worker_thread_barrier_sync (vlib_get_main ());
659 if (n != VHOST_USER_MSG_HDR_SZ)
663 DBG_SOCK ("recvmsg returned error %d %s", errno, strerror (errno));
667 DBG_SOCK ("n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
668 n, VHOST_USER_MSG_HDR_SZ);
673 if (mh.msg_flags & MSG_CTRUNC)
675 DBG_SOCK ("MSG_CTRUNC is set");
679 cmsg = CMSG_FIRSTHDR (&mh);
681 if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
682 (cmsg->cmsg_type == SCM_RIGHTS) &&
683 (cmsg->cmsg_len - CMSG_LEN (0) <=
684 VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
686 number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
687 clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
690 /* version 1, no reply bit set */
691 if ((msg.flags & 7) != 1)
693 DBG_SOCK ("malformed message received. closing socket");
700 read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
704 DBG_SOCK ("read failed %s", strerror (errno));
707 else if (rv != msg.size)
709 DBG_SOCK ("message too short (read %dB should be %dB)", rv, msg.size);
716 case VHOST_USER_GET_FEATURES:
718 msg.u64 = (1ULL << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
719 (1ULL << FEAT_VIRTIO_NET_F_CTRL_VQ) |
720 (1ULL << FEAT_VIRTIO_F_ANY_LAYOUT) |
721 (1ULL << FEAT_VIRTIO_F_INDIRECT_DESC) |
722 (1ULL << FEAT_VHOST_F_LOG_ALL) |
723 (1ULL << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
724 (1ULL << FEAT_VIRTIO_NET_F_MQ) |
725 (1ULL << FEAT_VHOST_USER_F_PROTOCOL_FEATURES) |
726 (1ULL << FEAT_VIRTIO_F_VERSION_1);
727 msg.u64 &= vui->feature_mask;
728 msg.size = sizeof (msg.u64);
729 DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES - reply 0x%016llx",
730 vui->hw_if_index, msg.u64);
733 case VHOST_USER_SET_FEATURES:
734 DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
735 vui->hw_if_index, msg.u64);
737 vui->features = msg.u64;
740 ((1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
741 (1ULL << FEAT_VIRTIO_F_VERSION_1)))
742 vui->virtio_net_hdr_sz = 12;
744 vui->virtio_net_hdr_sz = 10;
747 (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
749 ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
750 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
753 /*for (q = 0; q < VHOST_VRING_MAX_N; q++)
754 vhost_user_vring_close(&vui->vrings[q]); */
758 case VHOST_USER_SET_MEM_TABLE:
759 DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
760 vui->hw_if_index, msg.memory.nregions);
762 if ((msg.memory.nregions < 1) ||
763 (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
766 DBG_SOCK ("number of mem regions must be between 1 and %i",
767 VHOST_MEMORY_MAX_NREGIONS);
772 if (msg.memory.nregions != number_of_fds)
774 DBG_SOCK ("each memory region must have FD");
777 unmap_all_mem_regions (vui);
778 for (i = 0; i < msg.memory.nregions; i++)
780 clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i],
781 sizeof (vhost_user_memory_region_t));
783 long page_sz = get_huge_page_size (fds[i]);
785 /* align size to 2M page */
786 ssize_t map_sz = (vui->regions[i].memory_size +
787 vui->regions[i].mmap_offset +
788 page_sz) & ~(page_sz - 1);
790 vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
791 MAP_SHARED, fds[i], 0);
792 vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
793 vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
794 vui->regions[i].memory_size;
797 ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
798 "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i],
801 if (vui->region_mmap_addr[i] == MAP_FAILED)
803 clib_warning ("failed to map memory. errno is %d", errno);
806 vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
807 vui->region_mmap_fd[i] = fds[i];
809 vui->nregions = msg.memory.nregions;
812 case VHOST_USER_SET_VRING_NUM:
813 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
814 vui->hw_if_index, msg.state.index, msg.state.num);
816 if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
817 (msg.state.num == 0) || /* it cannot be zero */
818 ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */
820 vui->vrings[msg.state.index].qsz = msg.state.num;
823 case VHOST_USER_SET_VRING_ADDR:
824 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
825 vui->hw_if_index, msg.state.index);
827 if (msg.state.index >= VHOST_VRING_MAX_N)
829 DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ADDR:"
830 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
834 if (msg.size < sizeof (msg.addr))
836 DBG_SOCK ("vhost message is too short (%d < %d)",
837 msg.size, sizeof (msg.addr));
841 vui->vrings[msg.state.index].desc = (vring_desc_t *)
842 map_user_mem (vui, msg.addr.desc_user_addr);
843 vui->vrings[msg.state.index].used = (vring_used_t *)
844 map_user_mem (vui, msg.addr.used_user_addr);
845 vui->vrings[msg.state.index].avail = (vring_avail_t *)
846 map_user_mem (vui, msg.addr.avail_user_addr);
848 if ((vui->vrings[msg.state.index].desc == NULL) ||
849 (vui->vrings[msg.state.index].used == NULL) ||
850 (vui->vrings[msg.state.index].avail == NULL))
852 DBG_SOCK ("failed to map user memory for hw_if_index %d",
857 vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
858 vui->vrings[msg.state.index].log_used =
859 (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
861 /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
862 the ring is initialized in an enabled state. */
863 if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES)))
865 vui->vrings[msg.state.index].enabled = 1;
868 vui->vrings[msg.state.index].last_used_idx =
869 vui->vrings[msg.state.index].last_avail_idx =
870 vui->vrings[msg.state.index].used->idx;
872 /* tell driver that we don't want interrupts */
873 vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
876 case VHOST_USER_SET_OWNER:
877 DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
880 case VHOST_USER_RESET_OWNER:
881 DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", vui->hw_if_index);
884 case VHOST_USER_SET_VRING_CALL:
885 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %d",
886 vui->hw_if_index, msg.u64);
888 q = (u8) (msg.u64 & 0xFF);
890 /* if there is old fd, delete and close it */
891 if (vui->vrings[q].callfd_idx != ~0)
893 unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
894 vui->vrings[q].callfd_idx);
895 unix_file_del (&unix_main, uf);
896 vui->vrings[q].callfd_idx = ~0;
899 if (!(msg.u64 & 0x100))
901 if (number_of_fds != 1)
903 DBG_SOCK ("More than one fd received !");
907 template.read_function = vhost_user_callfd_read_ready;
908 template.file_descriptor = fds[0];
909 template.private_data =
910 ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
911 vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template);
914 vui->vrings[q].callfd_idx = ~0;
917 case VHOST_USER_SET_VRING_KICK:
918 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %d",
919 vui->hw_if_index, msg.u64);
921 q = (u8) (msg.u64 & 0xFF);
923 if (vui->vrings[q].kickfd_idx != ~0)
925 unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
926 vui->vrings[q].kickfd_idx);
927 unix_file_del (&unix_main, uf);
928 vui->vrings[q].kickfd_idx = ~0;
931 if (!(msg.u64 & 0x100))
933 if (number_of_fds != 1)
935 DBG_SOCK ("More than one fd received !");
939 template.read_function = vhost_user_kickfd_read_ready;
940 template.file_descriptor = fds[0];
941 template.private_data =
942 (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
944 vui->vrings[q].kickfd_idx = unix_file_add (&unix_main, &template);
948 //When no kickfd is set, the queue is initialized as started
949 vui->vrings[q].kickfd_idx = ~0;
950 vui->vrings[q].started = 1;
955 case VHOST_USER_SET_VRING_ERR:
956 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %d",
957 vui->hw_if_index, msg.u64);
959 q = (u8) (msg.u64 & 0xFF);
961 if (vui->vrings[q].errfd != -1)
962 close (vui->vrings[q].errfd);
964 if (!(msg.u64 & 0x100))
966 if (number_of_fds != 1)
969 vui->vrings[q].errfd = fds[0];
972 vui->vrings[q].errfd = -1;
976 case VHOST_USER_SET_VRING_BASE:
977 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
978 vui->hw_if_index, msg.state.index, msg.state.num);
980 vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
983 case VHOST_USER_GET_VRING_BASE:
984 DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
985 vui->hw_if_index, msg.state.index, msg.state.num);
987 if (msg.state.index >= VHOST_VRING_MAX_N)
989 DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:"
990 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
994 /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
995 vhost_user_vring_close (vui, msg.state.index);
997 msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
999 msg.size = sizeof (msg.state);
1002 case VHOST_USER_NONE:
1003 DBG_SOCK ("if %d msg VHOST_USER_NONE", vui->hw_if_index);
1007 case VHOST_USER_SET_LOG_BASE:
1009 DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", vui->hw_if_index);
1011 if (msg.size != sizeof (msg.log))
1014 ("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
1015 msg.size, sizeof (msg.log));
1020 (vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
1023 ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1028 /* align size to 2M page */
1029 long page_sz = get_huge_page_size (fd);
1031 (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
1033 vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1037 ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
1038 map_sz, msg.log.offset, fd, vui->log_base_addr);
1040 if (vui->log_base_addr == MAP_FAILED)
1042 clib_warning ("failed to map memory. errno is %d", errno);
1046 vui->log_base_addr += msg.log.offset;
1047 vui->log_size = msg.log.size;
1050 msg.size = sizeof (msg.u64);
1055 case VHOST_USER_SET_LOG_FD:
1056 DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
1060 case VHOST_USER_GET_PROTOCOL_FEATURES:
1061 DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
1065 msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
1066 (1 << VHOST_USER_PROTOCOL_F_MQ);
1067 msg.size = sizeof (msg.u64);
1070 case VHOST_USER_SET_PROTOCOL_FEATURES:
1071 DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx",
1072 vui->hw_if_index, msg.u64);
1074 vui->protocol_features = msg.u64;
1078 case VHOST_USER_GET_QUEUE_NUM:
1079 DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM", vui->hw_if_index);
1081 msg.u64 = VHOST_VRING_MAX_N;
1082 msg.size = sizeof (msg.u64);
1085 case VHOST_USER_SET_VRING_ENABLE:
1086 DBG_SOCK ("if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
1087 vui->hw_if_index, msg.state.num ? "enable" : "disable",
1089 if (msg.state.index >= VHOST_VRING_MAX_N)
1091 DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ENABLE:"
1092 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
1096 vui->vrings[msg.state.index].enabled = msg.state.num;
1100 DBG_SOCK ("unknown vhost-user message %d received. closing socket",
1105 /* if we need to reply */
1109 send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
1110 if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
1112 DBG_SOCK ("could not send message response");
1117 vhost_user_update_iface_state (vui);
1118 vlib_worker_thread_barrier_release (vlib_get_main ());
1122 vhost_user_if_disconnect (vui);
1123 vhost_user_update_iface_state (vui);
1124 vlib_worker_thread_barrier_release (vlib_get_main ());
1128 static clib_error_t *
1129 vhost_user_socket_error (unix_file_t * uf)
1131 vlib_main_t *vm = vlib_get_main ();
1132 vhost_user_main_t *vum = &vhost_user_main;
1133 vhost_user_intf_t *vui =
1134 pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
1136 DBG_SOCK ("socket error on if %d", vui->sw_if_index);
1137 vlib_worker_thread_barrier_sync (vm);
1138 vhost_user_if_disconnect (vui);
1139 vhost_user_rx_thread_placement ();
1140 vlib_worker_thread_barrier_release (vm);
1144 static clib_error_t *
1145 vhost_user_socksvr_accept_ready (unix_file_t * uf)
1147 int client_fd, client_len;
1148 struct sockaddr_un client;
1149 unix_file_t template = { 0 };
1150 vhost_user_main_t *vum = &vhost_user_main;
1151 vhost_user_intf_t *vui;
1153 vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
1155 client_len = sizeof (client);
1156 client_fd = accept (uf->file_descriptor,
1157 (struct sockaddr *) &client,
1158 (socklen_t *) & client_len);
1161 return clib_error_return_unix (0, "accept");
1163 DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index);
1164 template.read_function = vhost_user_socket_read;
1165 template.error_function = vhost_user_socket_error;
1166 template.file_descriptor = client_fd;
1167 template.private_data = vui - vhost_user_main.vhost_user_interfaces;
1168 vui->unix_file_index = unix_file_add (&unix_main, &template);
1172 static clib_error_t *
1173 vhost_user_init (vlib_main_t * vm)
1175 clib_error_t *error;
1176 vhost_user_main_t *vum = &vhost_user_main;
1177 vlib_thread_main_t *tm = vlib_get_thread_main ();
1178 vlib_thread_registration_t *tr;
1181 error = vlib_call_init_function (vm, ip4_init);
1185 vum->coalesce_frames = 32;
1186 vum->coalesce_time = 1e-3;
1188 vec_validate_aligned (vum->rx_buffers, tm->n_vlib_mains - 1,
1189 CLIB_CACHE_LINE_BYTES);
1190 vec_validate (vum->cpus, tm->n_vlib_mains - 1);
1192 /* find out which cpus will be used for input */
1193 vum->input_cpu_first_index = 0;
1194 vum->input_cpu_count = 1;
1195 p = hash_get_mem (tm->thread_registrations_by_name, "workers");
1196 tr = p ? (vlib_thread_registration_t *) p[0] : 0;
1198 if (tr && tr->count > 0)
1200 vum->input_cpu_first_index = tr->first_index;
1201 vum->input_cpu_count = tr->count;
1204 vum->random = random_default_seed ();
1209 VLIB_INIT_FUNCTION (vhost_user_init);
1211 static clib_error_t *
1212 vhost_user_exit (vlib_main_t * vm)
1218 VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
1222 u16 qid; /** The interface queue index (Not the virtio vring idx) */
1223 u16 device_index; /** The device index */
1224 u32 virtio_ring_flags; /** Runtime queue flags **/
1225 u16 first_desc_len; /** Length of the first data descriptor **/
1226 virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
1230 format_vhost_trace (u8 * s, va_list * va)
1232 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
1233 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
1234 CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
1235 vhost_user_main_t *vum = &vhost_user_main;
1236 vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
1237 vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
1240 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
1242 uword indent = format_get_indent (s);
1244 s = format (s, "%U %U queue %d\n", format_white_space, indent,
1245 format_vnet_sw_interface_name, vnm, sw, t->qid);
1247 s = format (s, "%U virtio flags:\n", format_white_space, indent);
1249 if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
1250 s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
1251 foreach_virtio_trace_flags
1253 s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
1254 format_white_space, indent, t->first_desc_len);
1256 s = format (s, "%U flags 0x%02x gso_type %u\n",
1257 format_white_space, indent,
1258 t->hdr.hdr.flags, t->hdr.hdr.gso_type);
1260 if (vui->virtio_net_hdr_sz == 12)
1261 s = format (s, "%U num_buff %u",
1262 format_white_space, indent, t->hdr.num_buffers);
1268 vhost_user_rx_trace (vhost_trace_t * t,
1269 vhost_user_intf_t * vui, u16 qid,
1270 vlib_buffer_t * b, vhost_user_vring_t * txvq)
1272 vhost_user_main_t *vum = &vhost_user_main;
1273 u32 qsz_mask = txvq->qsz - 1;
1274 u32 last_avail_idx = txvq->last_avail_idx;
1275 u32 desc_current = txvq->avail->ring[last_avail_idx & qsz_mask];
1276 vring_desc_t *hdr_desc = 0;
1277 virtio_net_hdr_mrg_rxbuf_t *hdr;
1280 memset (t, 0, sizeof (*t));
1281 t->device_index = vui - vum->vhost_user_interfaces;
1284 hdr_desc = &txvq->desc[desc_current];
1285 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1287 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
1288 //Header is the first here
1289 hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
1291 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1293 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
1295 if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1296 !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1298 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
1301 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
1303 if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
1305 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
1309 u32 len = vui->virtio_net_hdr_sz;
1310 memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
1315 vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
1317 vhost_user_main_t *vum = &vhost_user_main;
1319 int fd = UNIX_GET_FD (vq->callfd_idx);
1320 int rv __attribute__ ((unused));
1321 /* TODO: pay attention to rv */
1322 rv = write (fd, &x, sizeof (x));
1323 vq->n_since_last_int = 0;
1324 vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
1329 vhost_user_if_input (vlib_main_t * vm,
1330 vhost_user_main_t * vum,
1331 vhost_user_intf_t * vui,
1332 u16 qid, vlib_node_runtime_t * node)
1334 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
1335 vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
1336 uword n_rx_packets = 0, n_rx_bytes = 0;
1338 u32 n_left_to_next, *to_next;
1341 uword n_trace = vlib_get_trace_count (vm, node);
1343 u32 cpu_index, rx_len, drops, flush;
1344 f64 now = vlib_time_now (vm);
1345 u32 map_guest_hint_desc = 0;
1346 u32 map_guest_hint_indirect = 0;
1347 u32 *map_guest_hint_p = &map_guest_hint_desc;
1349 /* do we have pending interrupts ? */
1350 if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
1351 vhost_user_send_call (vm, txvq);
1353 if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
1354 vhost_user_send_call (vm, rxvq);
1356 if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
1359 n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
1362 if (PREDICT_FALSE (n_left == 0))
1365 if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
1368 * Discard input packet if interface is admin down or vring is not
1370 * "For example, for a networking device, in the disabled state
1371 * client must not supply any new RX packets, but must process
1372 * and discard any TX packets."
1375 txvq->last_avail_idx = txvq->last_used_idx = txvq->avail->idx;
1376 CLIB_MEMORY_BARRIER ();
1377 txvq->used->idx = txvq->last_used_idx;
1378 vhost_user_log_dirty_ring (vui, txvq, idx);
1379 vhost_user_send_call (vm, txvq);
1383 if (PREDICT_FALSE (n_left == txvq->qsz))
1385 //Informational error logging when VPP is not receiving packets fast enough
1386 vlib_error_count (vm, node->node_index,
1387 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
1390 qsz_mask = txvq->qsz - 1;
1391 cpu_index = os_get_cpu_number ();
1395 if (n_left > VLIB_FRAME_SIZE)
1396 n_left = VLIB_FRAME_SIZE;
1398 /* Allocate some buffers.
1399 * Note that buffers that are chained for jumbo
1400 * frames are allocated separately using a slower path.
1401 * The idea is to be certain to have enough buffers at least
1402 * to cycle through the descriptors without having to check for errors.
1403 * For jumbo frames, the bottleneck is memory copy anyway.
1405 if (PREDICT_FALSE (!vum->rx_buffers[cpu_index]))
1407 vec_alloc (vum->rx_buffers[cpu_index], 2 * VLIB_FRAME_SIZE);
1409 if (PREDICT_FALSE (!vum->rx_buffers[cpu_index]))
1410 flush = n_left; //Drop all input
1413 if (PREDICT_FALSE (_vec_len (vum->rx_buffers[cpu_index]) < n_left))
1415 u32 curr_len = _vec_len (vum->rx_buffers[cpu_index]);
1416 _vec_len (vum->rx_buffers[cpu_index]) +=
1417 vlib_buffer_alloc_from_free_list (vm,
1418 vum->rx_buffers[cpu_index] +
1420 2 * VLIB_FRAME_SIZE - curr_len,
1421 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
1423 if (PREDICT_FALSE (n_left > _vec_len (vum->rx_buffers[cpu_index])))
1424 flush = n_left - _vec_len (vum->rx_buffers[cpu_index]);
1427 if (PREDICT_FALSE (flush))
1429 //Remove some input buffers
1432 vlib_error_count (vm, vhost_user_input_node.index,
1433 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
1436 u16 desc_chain_head =
1437 txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
1438 txvq->last_avail_idx++;
1439 txvq->used->ring[txvq->last_used_idx & qsz_mask].id =
1441 txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
1442 vhost_user_log_dirty_ring (vui, txvq,
1443 ring[txvq->last_used_idx & qsz_mask]);
1444 txvq->last_used_idx++;
1449 rx_len = vec_len (vum->rx_buffers[cpu_index]); //vector might be null
1452 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1454 while (n_left > 0 && n_left_to_next > 0)
1456 vlib_buffer_t *b_head, *b_current;
1457 u32 bi_head, bi_current;
1458 u16 desc_chain_head, desc_current;
1459 u8 error = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
1461 if (PREDICT_TRUE (n_left > 1))
1464 txvq->avail->ring[(txvq->last_avail_idx + 1) & qsz_mask];
1466 map_guest_mem (vui, txvq->desc[next_desc].addr,
1467 &map_guest_hint_desc);
1468 if (PREDICT_TRUE (buffer_addr != 0))
1469 CLIB_PREFETCH (buffer_addr, 64, STORE);
1471 u32 bi = vum->rx_buffers[cpu_index][rx_len - 2];
1472 vlib_prefetch_buffer_with_index (vm, bi, STORE);
1473 CLIB_PREFETCH (vlib_get_buffer (vm, bi)->data, 128, STORE);
1476 desc_chain_head = desc_current =
1477 txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
1478 bi_head = bi_current = vum->rx_buffers[cpu_index][--rx_len];
1479 b_head = b_current = vlib_get_buffer (vm, bi_head);
1480 vlib_buffer_chain_init (b_head);
1481 if (PREDICT_FALSE (n_trace))
1483 vlib_trace_buffer (vm, node, next_index, b_head,
1484 /* follow_chain */ 0);
1486 vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
1487 vhost_user_rx_trace (t0, vui, qid, b_head, txvq);
1489 vlib_set_trace_count (vm, node, n_trace);
1493 if (PREDICT_TRUE (vui->is_any_layout) ||
1494 (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1495 !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)))
1497 /* ANYLAYOUT or single buffer */
1498 offset = vui->virtio_net_hdr_sz;
1502 /* CSR case without ANYLAYOUT, skip 1st buffer */
1503 offset = txvq->desc[desc_current].len;
1506 vring_desc_t *desc_table = txvq->desc;
1507 u32 desc_index = desc_current;
1508 map_guest_hint_p = &map_guest_hint_desc;
1510 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1512 desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
1513 &map_guest_hint_desc);
1515 map_guest_hint_p = &map_guest_hint_indirect;
1516 if (PREDICT_FALSE (desc_table == 0))
1518 error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
1526 map_guest_mem (vui, desc_table[desc_index].addr,
1528 if (PREDICT_FALSE (buffer_addr == 0))
1530 error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
1535 (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT))
1537 CLIB_PREFETCH (&desc_table[desc_table[desc_index].next],
1538 sizeof (vring_desc_t), STORE);
1541 if (desc_table[desc_index].len > offset)
1543 u16 len = desc_table[desc_index].len - offset;
1544 u16 copied = vlib_buffer_chain_append_data_with_alloc (vm,
1545 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
1554 error = VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER;
1560 /* if next flag is set, take next desc in the chain */
1561 if ((desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT))
1562 desc_index = desc_table[desc_index].next;
1568 /* consume the descriptor and return it as used */
1569 txvq->last_avail_idx++;
1570 txvq->used->ring[txvq->last_used_idx & qsz_mask].id =
1572 txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
1573 vhost_user_log_dirty_ring (vui, txvq,
1574 ring[txvq->last_used_idx & qsz_mask]);
1575 txvq->last_used_idx++;
1577 //It is important to free RX as fast as possible such that the TX
1578 //process does not drop packets
1579 if ((txvq->last_used_idx & 0x3f) == 0) // Every 64 packets
1580 txvq->used->idx = txvq->last_used_idx;
1582 if (PREDICT_FALSE (b_head->current_length < 14 &&
1583 error == VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1584 error = VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME;
1586 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
1588 vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
1589 vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1590 b_head->error = node->errors[error];
1592 if (PREDICT_FALSE (error))
1595 next0 = VNET_DEVICE_INPUT_NEXT_DROP;
1600 b_head->current_length +
1601 b_head->total_length_not_including_first_buffer;
1603 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
1606 to_next[0] = bi_head;
1610 /* redirect if feature path enabled */
1611 vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0,
1614 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1615 to_next, n_left_to_next,
1618 if (PREDICT_FALSE (!n_left))
1620 // I NEED SOME MORE !
1621 u32 remain = (u16) (txvq->avail->idx - txvq->last_avail_idx);
1622 remain = (remain > VLIB_FRAME_SIZE - n_rx_packets) ?
1623 VLIB_FRAME_SIZE - n_rx_packets : remain;
1624 remain = (remain > rx_len) ? rx_len : remain;
1629 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1632 if (PREDICT_TRUE (vum->rx_buffers[cpu_index] != 0))
1633 _vec_len (vum->rx_buffers[cpu_index]) = rx_len;
1635 /* give buffers back to driver */
1636 CLIB_MEMORY_BARRIER ();
1637 txvq->used->idx = txvq->last_used_idx;
1638 vhost_user_log_dirty_ring (vui, txvq, idx);
1640 /* interrupt (call) handling */
1641 if ((txvq->callfd_idx != ~0) && !(txvq->avail->flags & 1))
1643 txvq->n_since_last_int += n_rx_packets;
1645 if (txvq->n_since_last_int > vum->coalesce_frames)
1646 vhost_user_send_call (vm, txvq);
1649 if (PREDICT_FALSE (drops))
1651 vlib_increment_simple_counter
1652 (vnet_main.interface_main.sw_if_counters
1653 + VNET_INTERFACE_COUNTER_DROP, os_get_cpu_number (),
1654 vui->sw_if_index, drops);
1657 /* increase rx counters */
1658 vlib_increment_combined_counter
1659 (vnet_main.interface_main.combined_sw_if_counters
1660 + VNET_INTERFACE_COUNTER_RX,
1661 os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
1663 return n_rx_packets;
1667 vhost_user_input (vlib_main_t * vm,
1668 vlib_node_runtime_t * node, vlib_frame_t * f)
1670 vhost_user_main_t *vum = &vhost_user_main;
1671 uword n_rx_packets = 0;
1672 u32 cpu_index = os_get_cpu_number ();
1675 vhost_iface_and_queue_t *vhiq;
1676 vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
1678 vhost_user_intf_t *vui =
1679 &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
1680 n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
1683 //TODO: One call might return more than 256 packets here.
1684 //But this is supposed to be the vector size.
1685 return n_rx_packets;
1689 VLIB_REGISTER_NODE (vhost_user_input_node) = {
1690 .function = vhost_user_input,
1691 .type = VLIB_NODE_TYPE_INPUT,
1692 .name = "vhost-user-input",
1693 .sibling_of = "device-input",
1695 /* Will be enabled if/when hardware is detected. */
1696 .state = VLIB_NODE_STATE_DISABLED,
1698 .format_buffer = format_ethernet_header_with_length,
1699 .format_trace = format_vhost_trace,
1701 .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
1702 .error_strings = vhost_user_input_func_error_strings,
1705 VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input)
1710 vhost_user_tx_trace (vhost_trace_t * t,
1711 vhost_user_intf_t * vui, u16 qid,
1712 vlib_buffer_t * b, vhost_user_vring_t * rxvq)
1714 vhost_user_main_t *vum = &vhost_user_main;
1715 u32 qsz_mask = rxvq->qsz - 1;
1716 u32 last_avail_idx = rxvq->last_avail_idx;
1717 u32 desc_current = rxvq->avail->ring[last_avail_idx & qsz_mask];
1718 vring_desc_t *hdr_desc = 0;
1721 memset (t, 0, sizeof (*t));
1722 t->device_index = vui - vum->vhost_user_interfaces;
1725 hdr_desc = &rxvq->desc[desc_current];
1726 if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1728 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
1729 //Header is the first here
1730 hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
1732 if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1734 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
1736 if (!(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1737 !(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1739 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
1742 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
1746 vhost_user_tx (vlib_main_t * vm,
1747 vlib_node_runtime_t * node, vlib_frame_t * frame)
1749 u32 *buffers = vlib_frame_args (frame);
1751 vhost_user_main_t *vum = &vhost_user_main;
1752 uword n_packets = 0;
1753 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
1754 vhost_user_intf_t *vui =
1755 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
1757 vhost_user_vring_t *rxvq;
1759 u8 error = VHOST_USER_TX_FUNC_ERROR_NONE;
1760 u32 cpu_index = os_get_cpu_number ();
1761 n_left = n_packets = frame->n_vectors;
1762 u32 map_guest_hint_desc = 0;
1763 u32 map_guest_hint_indirect = 0;
1764 u32 *map_guest_hint_p = &map_guest_hint_desc;
1765 vhost_trace_t *current_trace = 0;
1768 if (PREDICT_FALSE (!vui->is_up || !vui->admin_up))
1770 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
1775 VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid, cpu_index));
1776 rxvq = &vui->vrings[qid];
1777 if (PREDICT_FALSE (vui->use_tx_spinlock))
1778 vhost_user_vring_lock (vui, qid);
1780 if (PREDICT_FALSE ((rxvq->avail->idx == rxvq->last_avail_idx)))
1782 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1786 qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */
1789 while (n_left > 0 && n_retry--)
1794 vlib_buffer_t *b0, *current_b0;
1795 u16 desc_head, desc_index, desc_len;
1796 vring_desc_t *desc_table;
1800 b0 = vlib_get_buffer (vm, buffers[0]);
1802 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1804 current_trace = vlib_add_trace (vm, node, b0,
1805 sizeof (*current_trace));
1806 vhost_user_tx_trace (current_trace, vui, qid / 2, b0, rxvq);
1809 if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
1811 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1815 desc_table = rxvq->desc;
1816 map_guest_hint_p = &map_guest_hint_desc;
1817 desc_head = desc_index =
1818 rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
1819 if (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)
1822 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
1824 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
1829 map_guest_mem (vui, rxvq->desc[desc_index].addr,
1830 &map_guest_hint_desc))))
1832 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1836 map_guest_hint_p = &map_guest_hint_indirect;
1839 desc_len = vui->virtio_net_hdr_sz;
1843 map_guest_mem (vui, desc_table[desc_index].addr,
1844 map_guest_hint_p))))
1846 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1849 buffer_len = desc_table[desc_index].len;
1851 CLIB_PREFETCH (buffer_addr, CLIB_CACHE_LINE_BYTES, STORE);
1853 virtio_net_hdr_mrg_rxbuf_t *hdr =
1854 (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr;
1856 hdr->hdr.gso_type = 0;
1857 if (vui->virtio_net_hdr_sz == 12)
1858 hdr->num_buffers = 1;
1860 vhost_user_log_dirty_pages (vui, desc_table[desc_index].addr,
1861 vui->virtio_net_hdr_sz);
1863 u16 bytes_left = b0->current_length;
1864 buffer_addr += vui->virtio_net_hdr_sz;
1865 buffer_len -= vui->virtio_net_hdr_sz;
1871 if (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT)
1874 vlib_get_buffer (vm, current_b0->next_buffer);
1875 bytes_left = current_b0->current_length;
1884 if (buffer_len == 0)
1886 if (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT)
1888 //Next one is chained
1889 desc_index = desc_table[desc_index].next;
1892 map_guest_mem (vui, desc_table[desc_index].addr,
1893 map_guest_hint_p))))
1895 rxvq->last_used_idx -= hdr->num_buffers - 1;
1896 rxvq->last_avail_idx -= hdr->num_buffers - 1;
1897 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1900 buffer_len = desc_table[desc_index].len;
1902 else if (vui->virtio_net_hdr_sz == 12) //MRG is available
1904 //Move from available to used buffer
1905 rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
1907 rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len =
1909 vhost_user_log_dirty_ring (vui, rxvq,
1910 ring[rxvq->last_used_idx &
1912 rxvq->last_avail_idx++;
1913 rxvq->last_used_idx++;
1917 (rxvq->last_avail_idx == rxvq->avail->idx))
1919 //Dequeue queued descriptors for this packet
1920 rxvq->last_used_idx -= hdr->num_buffers - 1;
1921 rxvq->last_avail_idx -= hdr->num_buffers - 1;
1922 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1926 desc_table = rxvq->desc;
1927 map_guest_hint_p = &map_guest_hint_desc;
1928 desc_head = desc_index =
1929 rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
1932 desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
1934 //It is seriously unlikely that a driver will put indirect descriptor
1935 //after non-indirect descriptor.
1937 (rxvq->desc[desc_head].len <
1938 sizeof (vring_desc_t)))
1941 VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
1947 rxvq->desc[desc_index].addr,
1948 &map_guest_hint_desc))))
1950 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1954 map_guest_hint_p = &map_guest_hint_indirect;
1959 map_guest_mem (vui, desc_table[desc_index].addr,
1960 map_guest_hint_p))))
1962 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1965 buffer_len = desc_table[desc_index].len;
1966 CLIB_PREFETCH (buffer_addr, CLIB_CACHE_LINE_BYTES,
1971 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
1976 u16 bytes_to_copy = bytes_left;
1978 (bytes_to_copy > buffer_len) ? buffer_len : bytes_to_copy;
1979 clib_memcpy (buffer_addr,
1980 vlib_buffer_get_current (current_b0) +
1981 current_b0->current_length - bytes_left,
1984 vhost_user_log_dirty_pages (vui,
1985 desc_table[desc_index].addr +
1986 desc_table[desc_index].len -
1987 bytes_left - bytes_to_copy,
1990 CLIB_PREFETCH (rxvq, sizeof (*rxvq), STORE);
1991 bytes_left -= bytes_to_copy;
1992 buffer_len -= bytes_to_copy;
1993 buffer_addr += bytes_to_copy;
1994 desc_len += bytes_to_copy;
1997 //Move from available to used ring
1998 rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = desc_head;
1999 rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = desc_len;
2000 vhost_user_log_dirty_ring (vui, rxvq,
2001 ring[rxvq->last_used_idx & qsz_mask]);
2003 rxvq->last_avail_idx++;
2004 rxvq->last_used_idx++;
2006 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2007 current_trace->hdr = *hdr;
2010 n_left--; //At the end for error counting when 'goto done' is invoked
2014 CLIB_MEMORY_BARRIER ();
2015 rxvq->used->idx = rxvq->last_used_idx;
2016 vhost_user_log_dirty_ring (vui, rxvq, idx);
2019 /* interrupt (call) handling */
2020 if ((rxvq->callfd_idx != ~0) && !(rxvq->avail->flags & 1))
2022 rxvq->n_since_last_int += n_packets - n_left;
2024 if (rxvq->n_since_last_int > vum->coalesce_frames)
2025 vhost_user_send_call (vm, rxvq);
2029 vhost_user_vring_unlock (vui, qid);
2032 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
2034 vlib_error_count (vm, node->node_index, error, n_left);
2035 vlib_increment_simple_counter
2036 (vnet_main.interface_main.sw_if_counters
2037 + VNET_INTERFACE_COUNTER_DROP,
2038 os_get_cpu_number (), vui->sw_if_index, n_left);
2041 vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
2042 return frame->n_vectors;
2045 static clib_error_t *
2046 vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
2049 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
2050 uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
2051 vhost_user_main_t *vum = &vhost_user_main;
2052 vhost_user_intf_t *vui =
2053 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
2055 vui->admin_up = is_up;
2058 vnet_hw_interface_set_flags (vnm, vui->hw_if_index,
2059 VNET_HW_INTERFACE_FLAG_LINK_UP);
2061 return /* no error */ 0;
2065 VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
2066 .name = "vhost-user",
2067 .tx_function = vhost_user_tx,
2068 .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
2069 .tx_function_error_strings = vhost_user_tx_func_error_strings,
2070 .format_device_name = format_vhost_user_interface_name,
2071 .name_renumber = vhost_user_name_renumber,
2072 .admin_up_down_function = vhost_user_interface_admin_up_down,
2073 .format_tx_trace = format_vhost_trace,
2076 VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class,
2081 vhost_user_process (vlib_main_t * vm,
2082 vlib_node_runtime_t * rt, vlib_frame_t * f)
2084 vhost_user_main_t *vum = &vhost_user_main;
2085 vhost_user_intf_t *vui;
2086 struct sockaddr_un sun;
2088 unix_file_t template = { 0 };
2089 f64 timeout = 3153600000.0 /* 100 years */ ;
2090 uword *event_data = 0;
2092 sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
2093 sun.sun_family = AF_UNIX;
2094 template.read_function = vhost_user_socket_read;
2095 template.error_function = vhost_user_socket_error;
2102 vlib_process_wait_for_event_or_clock (vm, timeout);
2103 vlib_process_get_events (vm, &event_data);
2104 vec_reset_length (event_data);
2109 pool_foreach (vui, vum->vhost_user_interfaces, {
2111 if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
2112 if (vui->unix_file_index == ~0)
2114 /* try to connect */
2115 strncpy (sun.sun_path, (char *) vui->sock_filename,
2116 sizeof (sun.sun_path) - 1);
2118 if (connect (sockfd, (struct sockaddr *) &sun,
2119 sizeof (struct sockaddr_un)) == 0)
2121 vui->sock_errno = 0;
2122 template.file_descriptor = sockfd;
2123 template.private_data =
2124 vui - vhost_user_main.vhost_user_interfaces;
2125 vui->unix_file_index = unix_file_add (&unix_main, &template);
2127 //Re-open for next connect
2128 if ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) {
2129 clib_warning("Critical: Could not open unix socket");
2135 vui->sock_errno = errno;
2140 /* check if socket is alive */
2142 socklen_t len = sizeof (error);
2143 int fd = UNIX_GET_FD(vui->unix_file_index);
2145 getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
2149 DBG_SOCK ("getsockopt returned %d", retval);
2150 vhost_user_if_disconnect (vui);
2161 VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
2162 .function = vhost_user_process,
2163 .type = VLIB_NODE_TYPE_PROCESS,
2164 .name = "vhost-user-process",
2169 * Disables and reset interface structure.
2170 * It can then be either init again, or removed from used interfaces.
2173 vhost_user_term_if (vhost_user_intf_t * vui)
2175 // Delete configured thread pinning
2176 vec_reset_length (vui->workers);
2177 // disconnect interface sockets
2178 vhost_user_if_disconnect (vui);
2179 vhost_user_update_iface_state (vui);
2181 if (vui->unix_server_index != ~0)
2183 //Close server socket
2184 unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
2185 vui->unix_server_index);
2186 unix_file_del (&unix_main, uf);
2187 vui->unix_server_index = ~0;
2192 vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
2194 vhost_user_main_t *vum = &vhost_user_main;
2195 vhost_user_intf_t *vui;
2197 vnet_hw_interface_t *hwif;
2199 if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
2200 hwif->dev_class_index != vhost_user_dev_class.index)
2201 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
2203 DBG_SOCK ("Deleting vhost-user interface %s (instance %d)",
2204 hwif->name, hwif->dev_instance);
2206 vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
2208 // Disable and reset interface
2209 vhost_user_term_if (vui);
2212 pool_put (vum->vhost_user_interfaces, vui);
2214 // Reset renumbered iface
2215 if (hwif->dev_instance <
2216 vec_len (vum->show_dev_instance_by_real_dev_instance))
2217 vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
2219 // Delete ethernet interface
2220 ethernet_delete_interface (vnm, vui->hw_if_index);
2225 * Open server unix socket on specified sock_filename.
2228 vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
2231 struct sockaddr_un un = { };
2233 /* create listening socket */
2234 if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
2235 return VNET_API_ERROR_SYSCALL_ERROR_1;
2237 un.sun_family = AF_UNIX;
2238 strncpy ((char *) un.sun_path, (char *) sock_filename,
2239 sizeof (un.sun_path) - 1);
2241 /* remove if exists */
2242 unlink ((char *) sock_filename);
2244 if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
2246 rv = VNET_API_ERROR_SYSCALL_ERROR_2;
2250 if (listen (fd, 1) == -1)
2252 rv = VNET_API_ERROR_SYSCALL_ERROR_3;
2265 * Create ethernet interface for vhost user interface.
2268 vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm,
2269 vhost_user_intf_t * vui, u8 * hwaddress)
2271 vhost_user_main_t *vum = &vhost_user_main;
2273 clib_error_t *error;
2275 /* create hw and sw interface */
2278 clib_memcpy (hwaddr, hwaddress, 6);
2282 random_u32 (&vum->random);
2283 clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
2288 error = ethernet_register_interface
2290 vhost_user_dev_class.index,
2291 vui - vum->vhost_user_interfaces /* device instance */ ,
2292 hwaddr /* ethernet address */ ,
2293 &vui->hw_if_index, 0 /* flag change */ );
2296 clib_error_report (error);
2298 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
2299 hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
2303 * Initialize vui with specified attributes
2306 vhost_user_vui_init (vnet_main_t * vnm,
2307 vhost_user_intf_t * vui,
2309 const char *sock_filename,
2310 u64 feature_mask, u32 * sw_if_index)
2312 vnet_sw_interface_t *sw;
2313 sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
2316 if (server_sock_fd != -1)
2318 unix_file_t template = { 0 };
2319 template.read_function = vhost_user_socksvr_accept_ready;
2320 template.file_descriptor = server_sock_fd;
2321 template.private_data = vui - vhost_user_main.vhost_user_interfaces; //hw index
2322 vui->unix_server_index = unix_file_add (&unix_main, &template);
2326 vui->unix_server_index = ~0;
2329 vui->sw_if_index = sw->sw_if_index;
2330 strncpy (vui->sock_filename, sock_filename,
2331 ARRAY_LEN (vui->sock_filename) - 1);
2332 vui->sock_errno = 0;
2334 vui->feature_mask = feature_mask;
2335 vui->unix_file_index = ~0;
2336 vui->log_base_addr = 0;
2338 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2339 vhost_user_vring_init (vui, q);
2341 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
2344 *sw_if_index = vui->sw_if_index;
2346 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2348 vui->vring_locks[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
2349 CLIB_CACHE_LINE_BYTES);
2350 memset ((void *) vui->vring_locks[q], 0, CLIB_CACHE_LINE_BYTES);
2353 vec_validate (vui->per_cpu_tx_qid,
2354 vlib_get_thread_main ()->n_vlib_mains - 1);
2355 vhost_user_tx_thread_placement (vui);
2359 vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
2360 const char *sock_filename,
2364 u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
2366 vhost_user_intf_t *vui = NULL;
2369 int server_sock_fd = -1;
2374 vhost_user_init_server_sock (sock_filename, &server_sock_fd)) != 0)
2380 pool_get (vhost_user_main.vhost_user_interfaces, vui);
2382 vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
2383 vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
2384 feature_mask, &sw_if_idx);
2387 vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
2390 *sw_if_index = sw_if_idx;
2392 // Process node must connect
2393 vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
2398 vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
2399 const char *sock_filename,
2402 u64 feature_mask, u8 renumber, u32 custom_dev_instance)
2404 vhost_user_main_t *vum = &vhost_user_main;
2405 vhost_user_intf_t *vui = NULL;
2407 int server_sock_fd = -1;
2409 vnet_hw_interface_t *hwif;
2411 if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
2412 hwif->dev_class_index != vhost_user_dev_class.index)
2413 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
2415 vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
2417 // First try to open server socket
2419 if ((rv = vhost_user_init_server_sock (sock_filename,
2420 &server_sock_fd)) != 0)
2423 vhost_user_term_if (vui);
2424 vhost_user_vui_init (vnm, vui, server_sock_fd,
2425 sock_filename, feature_mask, &sw_if_idx);
2428 vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
2430 // Process node must connect
2431 vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
2436 vhost_user_connect_command_fn (vlib_main_t * vm,
2437 unformat_input_t * input,
2438 vlib_cli_command_t * cmd)
2440 unformat_input_t _line_input, *line_input = &_line_input;
2441 u8 *sock_filename = NULL;
2444 u64 feature_mask = (u64) ~ (0ULL);
2446 u32 custom_dev_instance = ~0;
2450 /* Get a line of input. */
2451 if (!unformat_user (input, unformat_line_input, line_input))
2454 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
2456 if (unformat (line_input, "socket %s", &sock_filename))
2458 else if (unformat (line_input, "server"))
2460 else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
2464 (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
2466 else if (unformat (line_input, "renumber %d", &custom_dev_instance))
2471 return clib_error_return (0, "unknown input `%U'",
2472 format_unformat_error, input);
2474 unformat_free (line_input);
2476 vnet_main_t *vnm = vnet_get_main ();
2479 if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
2480 is_server, &sw_if_index, feature_mask,
2481 renumber, custom_dev_instance, hw)))
2483 vec_free (sock_filename);
2484 return clib_error_return (0, "vhost_user_create_if returned %d", rv);
2487 vec_free (sock_filename);
2488 vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
2494 vhost_user_delete_command_fn (vlib_main_t * vm,
2495 unformat_input_t * input,
2496 vlib_cli_command_t * cmd)
2498 unformat_input_t _line_input, *line_input = &_line_input;
2499 u32 sw_if_index = ~0;
2500 vnet_main_t *vnm = vnet_get_main ();
2502 /* Get a line of input. */
2503 if (!unformat_user (input, unformat_line_input, line_input))
2506 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
2508 if (unformat (line_input, "sw_if_index %d", &sw_if_index))
2511 (line_input, "%U", unformat_vnet_sw_interface, vnm,
2514 vnet_hw_interface_t *hwif =
2515 vnet_get_sup_hw_interface (vnm, sw_if_index);
2517 vhost_user_dev_class.index != hwif->dev_class_index)
2518 return clib_error_return (0, "Not a vhost interface");
2521 return clib_error_return (0, "unknown input `%U'",
2522 format_unformat_error, input);
2524 unformat_free (line_input);
2525 vhost_user_delete_if (vnm, vm, sw_if_index);
2530 vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
2531 vhost_user_intf_details_t ** out_vuids)
2534 vhost_user_main_t *vum = &vhost_user_main;
2535 vhost_user_intf_t *vui;
2536 vhost_user_intf_details_t *r_vuids = NULL;
2537 vhost_user_intf_details_t *vuid = NULL;
2538 u32 *hw_if_indices = 0;
2539 vnet_hw_interface_t *hi;
2546 pool_foreach (vui, vum->vhost_user_interfaces,
2547 vec_add1 (hw_if_indices, vui->hw_if_index);
2550 for (i = 0; i < vec_len (hw_if_indices); i++)
2552 hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2553 vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
2555 vec_add2 (r_vuids, vuid, 1);
2556 vuid->sw_if_index = vui->sw_if_index;
2557 vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
2558 vuid->features = vui->features;
2559 vuid->num_regions = vui->nregions;
2560 vuid->sock_errno = vui->sock_errno;
2561 strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
2562 ARRAY_LEN (vuid->sock_filename) - 1);
2564 s = format (s, "%v%c", hi->name, 0);
2566 strncpy ((char *) vuid->if_name, (char *) s,
2567 ARRAY_LEN (vuid->if_name) - 1);
2572 vec_free (hw_if_indices);
2574 *out_vuids = r_vuids;
2580 show_vhost_user_command_fn (vlib_main_t * vm,
2581 unformat_input_t * input,
2582 vlib_cli_command_t * cmd)
2584 clib_error_t *error = 0;
2585 vnet_main_t *vnm = vnet_get_main ();
2586 vhost_user_main_t *vum = &vhost_user_main;
2587 vhost_user_intf_t *vui;
2588 u32 hw_if_index, *hw_if_indices = 0;
2589 vnet_hw_interface_t *hi;
2591 vhost_iface_and_queue_t *vhiq;
2601 struct feat_struct *feat_entry;
2603 static struct feat_struct feat_array[] = {
2604 #define _(s,b) { .str = #s, .bit = b, },
2605 foreach_virtio_net_feature
2610 #define foreach_protocol_feature \
2611 _(VHOST_USER_PROTOCOL_F_MQ) \
2612 _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
2614 static struct feat_struct proto_feat_array[] = {
2615 #define _(s) { .str = #s, .bit = s},
2616 foreach_protocol_feature
2621 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2624 (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
2626 vec_add1 (hw_if_indices, hw_if_index);
2628 else if (unformat (input, "descriptors") || unformat (input, "desc"))
2632 error = clib_error_return (0, "unknown input `%U'",
2633 format_unformat_error, input);
2637 if (vec_len (hw_if_indices) == 0)
2639 pool_foreach (vui, vum->vhost_user_interfaces,
2640 vec_add1 (hw_if_indices, vui->hw_if_index);
2643 vlib_cli_output (vm, "Virtio vhost-user interfaces");
2644 vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
2645 vum->coalesce_frames, vum->coalesce_time);
2647 for (i = 0; i < vec_len (hw_if_indices); i++)
2649 hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2650 vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
2651 vlib_cli_output (vm, "Interface: %s (ifindex %d)",
2652 hi->name, hw_if_indices[i]);
2654 vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
2655 " features mask (0x%llx): \n"
2656 " features (0x%llx): \n",
2657 vui->virtio_net_hdr_sz, vui->feature_mask,
2660 feat_entry = (struct feat_struct *) &feat_array;
2661 while (feat_entry->str)
2663 if (vui->features & (1ULL << feat_entry->bit))
2664 vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2669 vlib_cli_output (vm, " protocol features (0x%llx)",
2670 vui->protocol_features);
2671 feat_entry = (struct feat_struct *) &proto_feat_array;
2672 while (feat_entry->str)
2674 if (vui->protocol_features & (1ULL << feat_entry->bit))
2675 vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2680 vlib_cli_output (vm, "\n");
2682 vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
2684 (vui->unix_server_index != ~0) ? "server" : "client",
2685 strerror (vui->sock_errno));
2687 vlib_cli_output (vm, " rx placement: ");
2688 vec_foreach (vhc, vum->cpus)
2690 vec_foreach (vhiq, vhc->rx_queues)
2692 if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces)
2693 vlib_cli_output (vm, " thread %d on vring %d\n",
2694 vhc - vum->cpus, VHOST_VRING_IDX_TX (vhiq->qid));
2698 vlib_cli_output (vm, " tx placement: %s\n",
2699 vui->use_tx_spinlock ? "spin-lock" : "lock-free");
2701 vec_foreach_index (ci, vui->per_cpu_tx_qid)
2703 vlib_cli_output (vm, " thread %d on vring %d\n", ci,
2704 VHOST_VRING_IDX_RX (vui->per_cpu_tx_qid[ci]));
2707 vlib_cli_output (vm, "\n");
2709 vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
2713 vlib_cli_output (vm,
2714 " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2715 vlib_cli_output (vm,
2716 " ====== ===== ================== ================== ================== ================== ==================\n");
2718 for (j = 0; j < vui->nregions; j++)
2720 vlib_cli_output (vm,
2721 " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2722 j, vui->region_mmap_fd[j],
2723 vui->regions[j].guest_phys_addr,
2724 vui->regions[j].memory_size,
2725 vui->regions[j].userspace_addr,
2726 vui->regions[j].mmap_offset,
2727 pointer_to_uword (vui->region_mmap_addr[j]));
2729 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2731 if (!vui->vrings[q].started)
2734 vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
2735 (q & 1) ? "RX" : "TX",
2736 vui->vrings[q].enabled ? "" : " disabled");
2738 vlib_cli_output (vm,
2739 " qsz %d last_avail_idx %d last_used_idx %d\n",
2740 vui->vrings[q].qsz, vui->vrings[q].last_avail_idx,
2741 vui->vrings[q].last_used_idx);
2743 if (vui->vrings[q].avail && vui->vrings[q].used)
2744 vlib_cli_output (vm,
2745 " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
2746 vui->vrings[q].avail->flags,
2747 vui->vrings[q].avail->idx,
2748 vui->vrings[q].used->flags,
2749 vui->vrings[q].used->idx);
2751 int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx);
2752 int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx);
2753 vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n",
2754 kickfd, callfd, vui->vrings[q].errfd);
2758 vlib_cli_output (vm, "\n descriptor table:\n");
2759 vlib_cli_output (vm,
2760 " id addr len flags next user_addr\n");
2761 vlib_cli_output (vm,
2762 " ===== ================== ===== ====== ===== ==================\n");
2763 for (j = 0; j < vui->vrings[q].qsz; j++)
2766 vlib_cli_output (vm,
2767 " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
2768 j, vui->vrings[q].desc[j].addr,
2769 vui->vrings[q].desc[j].len,
2770 vui->vrings[q].desc[j].flags,
2771 vui->vrings[q].desc[j].next,
2772 pointer_to_uword (map_guest_mem
2774 vui->vrings[q].desc[j].
2779 vlib_cli_output (vm, "\n");
2782 vec_free (hw_if_indices);
2791 * Create a vHost User interface. Once created, a new virtual interface
2792 * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
2793 * is the next free index.
2795 * There are several parameters associated with a vHost interface:
2797 * - <b>socket <socket-filename></b> - Name of the linux socket used by QEMU/VM and
2798 * VPP to manage the vHost interface. If socket does not already exist, VPP will
2799 * create the socket.
2801 * - <b>server</b> - Optional flag to indicate that VPP should be the server for the
2802 * linux socket. If not provided, VPP will be the client.
2804 * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated at
2805 * startup. By default, all supported features will be advertised. Otherwise,
2806 * provide the set of features desired.
2807 * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
2808 * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
2809 * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
2810 * - 0x000400000 (22) - VIRTIO_NET_F_MQ
2811 * - 0x004000000 (26) - VHOST_F_LOG_ALL
2812 * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
2813 * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
2814 * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
2815 * - 0x100000000 (32) - VIRTIO_F_VERSION_1
2817 * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
2818 * X:X:X:X:X:X unix or X.X.X cisco format.
2820 * - <b>renumber <dev_instance></b> - Optional parameter which allows the instance
2821 * in the name to be specified. If instance already exists, name will be used
2822 * anyway and multiple instances will have the same name. Use with caution.
2825 * Example of how to create a vhost interface with VPP as the client and all features enabled:
2826 * @cliexstart{create vhost-user socket /tmp/vhost1.sock}
2827 * VirtualEthernet0/0/0
2829 * Example of how to create a vhost interface with VPP as the server and with just
2830 * multiple queues enabled:
2831 * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000}
2832 * VirtualEthernet0/0/1
2834 * Once the vHost interface is created, enable the interface using:
2835 * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
2838 VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
2839 .path = "create vhost-user",
2840 .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>]",
2841 .function = vhost_user_connect_command_fn,
2846 * Delete a vHost User interface using the interface name or the
2847 * software interface index. Use the '<em>show interfaces</em>'
2848 * command to determine the software interface index. On deletion,
2849 * the linux socket will not be deleted.
2852 * Example of how to delete a vhost interface by name:
2853 * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
2854 * Example of how to delete a vhost interface by software interface index:
2855 * @cliexcmd{delete vhost-user sw_if_index 1}
2858 VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
2859 .path = "delete vhost-user",
2860 .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
2861 .function = vhost_user_delete_command_fn,
2865 * Display the attributes of a single vHost User interface (provide interface
2866 * name), multiple vHost User interfaces (provide a list of interface names seperated
2867 * by spaces) or all Vhost User interfaces (omit an interface name to display all
2868 * vHost interfaces).
2872 * Example of how to display a vhost interface:
2873 * @cliexstart{show vhost-user VirtualEthernet0/0/0}
2874 * Virtio vhost-user interfaces
2876 * coalesce frames 32 time 1e-3
2877 * Interface: VirtualEthernet0/0/0 (ifindex 1)
2878 * virtio_net_hdr_sz 12
2879 * features mask (0xffffffffffffffff):
2880 * features (0x50408000):
2881 * VIRTIO_NET_F_MRG_RXBUF (15)
2882 * VIRTIO_NET_F_MQ (22)
2883 * VIRTIO_F_INDIRECT_DESC (28)
2884 * VHOST_USER_F_PROTOCOL_FEATURES (30)
2885 * protocol features (0x3)
2886 * VHOST_USER_PROTOCOL_F_MQ (0)
2887 * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
2889 * socket filename /tmp/vhost1.sock type client errno "Success"
2892 * thread 1 on vring 1
2893 * thread 1 on vring 5
2894 * thread 2 on vring 3
2895 * thread 2 on vring 7
2896 * tx placement: spin-lock
2897 * thread 0 on vring 0
2898 * thread 1 on vring 2
2899 * thread 2 on vring 0
2901 * Memory regions (total 2)
2902 * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
2903 * ====== ===== ================== ================== ================== ================== ==================
2904 * 0 60 0x0000000000000000 0x00000000000a0000 0x00002aaaaac00000 0x0000000000000000 0x00002aab2b400000
2905 * 1 61 0x00000000000c0000 0x000000003ff40000 0x00002aaaaacc0000 0x00000000000c0000 0x00002aababcc0000
2908 * qsz 256 last_avail_idx 0 last_used_idx 0
2909 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
2910 * kickfd 62 callfd 64 errfd -1
2913 * qsz 256 last_avail_idx 0 last_used_idx 0
2914 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2915 * kickfd 65 callfd 66 errfd -1
2918 * qsz 256 last_avail_idx 0 last_used_idx 0
2919 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
2920 * kickfd 63 callfd 70 errfd -1
2923 * qsz 256 last_avail_idx 0 last_used_idx 0
2924 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2925 * kickfd 72 callfd 74 errfd -1
2927 * Virtqueue 4 (TX disabled)
2928 * qsz 256 last_avail_idx 0 last_used_idx 0
2929 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2930 * kickfd 76 callfd 78 errfd -1
2932 * Virtqueue 5 (RX disabled)
2933 * qsz 256 last_avail_idx 0 last_used_idx 0
2934 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2935 * kickfd 80 callfd 82 errfd -1
2937 * Virtqueue 6 (TX disabled)
2938 * qsz 256 last_avail_idx 0 last_used_idx 0
2939 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2940 * kickfd 84 callfd 86 errfd -1
2942 * Virtqueue 7 (RX disabled)
2943 * qsz 256 last_avail_idx 0 last_used_idx 0
2944 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2945 * kickfd 88 callfd 90 errfd -1
2949 * The optional '<em>descriptors</em>' parameter will display the same output as
2950 * the previous example but will include the descriptor table for each queue.
2951 * The output is truncated below:
2952 * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
2953 * Virtio vhost-user interfaces
2955 * coalesce frames 32 time 1e-3
2956 * Interface: VirtualEthernet0/0/0 (ifindex 1)
2957 * virtio_net_hdr_sz 12
2958 * features mask (0xffffffffffffffff):
2959 * features (0x50408000):
2960 * VIRTIO_NET_F_MRG_RXBUF (15)
2961 * VIRTIO_NET_F_MQ (22)
2964 * qsz 256 last_avail_idx 0 last_used_idx 0
2965 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
2966 * kickfd 62 callfd 64 errfd -1
2969 * id addr len flags next user_addr
2970 * ===== ================== ===== ====== ===== ==================
2971 * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
2972 * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
2973 * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
2974 * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
2975 * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
2976 * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
2977 * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
2978 * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
2979 * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
2980 * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
2981 * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
2983 * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
2984 * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
2985 * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
2986 * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
2987 * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
2988 * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
2989 * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
2992 * qsz 256 last_avail_idx 0 last_used_idx 0
2998 VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
2999 .path = "show vhost-user",
3000 .short_help = "show vhost-user [<interface> [<interface> [..]]] [descriptors]",
3001 .function = show_vhost_user_command_fn,
3005 static clib_error_t *
3006 vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
3008 vhost_user_main_t *vum = &vhost_user_main;
3010 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3012 if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
3014 else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
3016 else if (unformat (input, "dont-dump-memory"))
3017 vum->dont_dump_vhost_user_memory = 1;
3019 return clib_error_return (0, "unknown input `%U'",
3020 format_unformat_error, input);
3026 /* vhost-user { ... } configuration. */
3027 VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
3030 vhost_user_unmap_all (void)
3032 vhost_user_main_t *vum = &vhost_user_main;
3033 vhost_user_intf_t *vui;
3035 if (vum->dont_dump_vhost_user_memory)
3037 pool_foreach (vui, vum->vhost_user_interfaces,
3038 unmap_all_mem_regions (vui);
3043 static clib_error_t *
3044 vhost_thread_command_fn (vlib_main_t * vm,
3045 unformat_input_t * input, vlib_cli_command_t * cmd)
3047 unformat_input_t _line_input, *line_input = &_line_input;
3048 u32 worker_thread_index;
3053 /* Get a line of input. */
3054 if (!unformat_user (input, unformat_line_input, line_input))
3058 (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (),
3059 &sw_if_index, &worker_thread_index))
3061 unformat_free (line_input);
3062 return clib_error_return (0, "unknown input `%U'",
3063 format_unformat_error, input);
3066 if (unformat (line_input, "del"))
3070 vhost_user_thread_placement (sw_if_index, worker_thread_index, del)))
3071 return clib_error_return (0, "vhost_user_thread_placement returned %d",
3078 * This command is used to move the RX processing for the given
3079 * interfaces to the provided thread. If the '<em>del</em>' option is used,
3080 * the forced thread assignment is removed and the thread assigment is
3081 * reassigned automatically. Use '<em>show vhost-user <interface></em>'
3082 * to see the thread assignment.
3085 * Example of how to move the RX processing for a given interface to a given thread:
3086 * @cliexcmd{vhost thread VirtualEthernet0/0/0 1}
3087 * Example of how to remove the forced thread assignment for a given interface:
3088 * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del}
3091 VLIB_CLI_COMMAND (vhost_user_thread_command, static) = {
3092 .path = "vhost thread",
3093 .short_help = "vhost thread <iface> <worker-index> [del]",
3094 .function = vhost_thread_command_fn,
3099 * fd.io coding-style-patch-verification: ON
3102 * eval: (c-set-style "gnu")