X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Fvirtio%2Fvhost-user.c;h=5460f10b74e1072347ca8a6019a2ee46ae958035;hb=fe7d4a2;hp=4f4f038a38b50325b4f12caee409d72200243fe1;hpb=53129423a6f4e43b39f7547424fbaea99e56f7e2;p=vpp.git diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 4f4f038a38b..5460f10b74e 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -49,14 +49,14 @@ */ -#define VHOST_USER_DEBUG_SOCKET 0 #define VHOST_DEBUG_VQ 0 -#if VHOST_USER_DEBUG_SOCKET == 1 -#define DBG_SOCK(args...) clib_warning(args); -#else -#define DBG_SOCK(args...) -#endif +#define DBG_SOCK(args...) \ + { \ + vhost_user_main_t *_vum = &vhost_user_main; \ + if (_vum->debug) \ + clib_warning(args); \ + }; #if VHOST_DEBUG_VQ == 1 #define DBG_VQ(args...) clib_warning(args); @@ -86,11 +86,22 @@ * The value 64 was obtained by testing (48 and 128 were not as good). */ #define VHOST_USER_RX_COPY_THRESHOLD 64 +/* + * On the transmit side, we keep processing the buffers from vlib in the while + * loop and prepare the copy order to be executed later. However, the static + * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N + * entries. In order to not corrupt memory, we have to do the copy when the + * static array reaches the copy threshold. We subtract 40 in case the code + * goes into the inner loop for a maximum of 64k frames which may require + * more array entries. + */ +#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40) -#define UNIX_GET_FD(unixfd_idx) \ - (unixfd_idx != ~0) ? \ - pool_elt_at_index (unix_main.file_pool, \ - unixfd_idx)->file_descriptor : -1; +#define UNIX_GET_FD(unixfd_idx) ({ \ + typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \ + (__unixfd_idx != ~0) ? \ + pool_elt_at_index (file_main.file_pool, \ + __unixfd_idx)->file_descriptor : -1; }) #define foreach_virtio_trace_flags \ _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \ @@ -246,7 +257,76 @@ map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint) return (void *) (vui->region_mmap_addr[i] + addr - vui->regions[i].guest_phys_addr); } +#elif __aarch64__ && __ARM_NEON + uint64x2_t al, ah, rl, rh, r; + uint32_t u32 = 0; + + al = vdupq_n_u64 (addr + 1); + ah = vdupq_n_u64 (addr); + + /*First Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[0]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[0]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1); + + if (u32) + { + i = __builtin_ctzll (u32); + goto vhost_map_guest_mem_done; + } + + /*Second Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[2]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[2]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3); + + if (u32) + { + i = __builtin_ctzll (u32); + goto vhost_map_guest_mem_done; + } + + /*Third Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[4]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[4]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 4); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 5); + if (u32) + { + i = __builtin_ctzll (u32); + goto vhost_map_guest_mem_done; + } + + /*Fourth Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[6]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[6]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7); + + i = __builtin_ctzll (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS)); + +vhost_map_guest_mem_done: + if (i < vui->nregions) + { + *hint = i; + return (void *) (vui->region_mmap_addr[i] + addr - + vui->regions[i].guest_phys_addr); + } #else for (i = 0; i < vui->nregions; i++) { @@ -296,14 +376,14 @@ unmap_all_mem_regions (vhost_user_intf_t * vui) int i, r; for (i = 0; i < vui->nregions; i++) { - if (vui->region_mmap_addr[i] != (void *) -1) + if (vui->region_mmap_addr[i] != MAP_FAILED) { long page_sz = get_huge_page_size (vui->region_mmap_fd[i]); ssize_t map_sz = (vui->regions[i].memory_size + vui->regions[i].mmap_offset + - page_sz) & ~(page_sz - 1); + page_sz - 1) & ~(page_sz - 1); r = munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset, @@ -313,7 +393,7 @@ unmap_all_mem_regions (vhost_user_intf_t * vui) ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz, page_sz); - vui->region_mmap_addr[i] = (void *) -1; + vui->region_mmap_addr[i] = MAP_FAILED; if (r == -1) { @@ -362,140 +442,71 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui) } } +/** + * @brief Unassign existing interface/queue to thread mappings and re-assign + * new interface/queue to thread mappings + */ static void vhost_user_rx_thread_placement () { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; - vhost_cpu_t *vhc; - u32 *workers = 0; - u32 thread_index; - vlib_main_t *vm; - - //Let's list all workers cpu indexes - u32 i; - for (i = vum->input_cpu_first_index; - i < vum->input_cpu_first_index + vum->input_cpu_count; i++) - { - vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index, - VLIB_NODE_STATE_DISABLED); - vec_add1 (workers, i); - } - - vec_foreach (vhc, vum->cpus) - { - vec_reset_length (vhc->rx_queues); - } + vhost_user_vring_t *txvq; + vnet_main_t *vnm = vnet_get_main (); + u32 qid; + int rv; + u16 *queue; - i = 0; - vhost_iface_and_queue_t iaq; + // Scrap all existing mappings for all interfaces/queues /* *INDENT-OFF* */ pool_foreach (vui, vum->vhost_user_interfaces, { - u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers; - u32 qid; - for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) + vec_foreach (queue, vui->rx_queues) { - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - if (!txvq->started) - continue; - - i %= vec_len (vui_workers); - thread_index = vui_workers[i]; - i++; - vhc = &vum->cpus[thread_index]; - txvq->interrupt_thread_index = thread_index; - - iaq.qid = qid; - iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; - vec_add1 (vhc->rx_queues, iaq); + rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index, + *queue); + if (rv) + clib_warning ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", vui->hw_if_index, *queue, rv); } + vec_reset_length (vui->rx_queues); }); /* *INDENT-ON* */ - vec_foreach (vhc, vum->cpus) - { - vhost_iface_and_queue_t *vhiq; - u8 mode = VHOST_USER_INTERRUPT_MODE; - - vec_foreach (vhiq, vhc->rx_queues) - { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - if (vui->operation_mode == VHOST_USER_POLLING_MODE) + // Create the rx_queues for all interfaces + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) { - /* At least one interface is polling, cpu is set to polling */ - mode = VHOST_USER_POLLING_MODE; - break; + txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + if (txvq->started) + { + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN) + /* Set polling as the default */ + txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + vec_add1 (vui->rx_queues, qid); + } } - } - vhc->operation_mode = mode; - } - - for (thread_index = vum->input_cpu_first_index; - thread_index < vum->input_cpu_first_index + vum->input_cpu_count; - thread_index++) - { - vlib_node_state_t state = VLIB_NODE_STATE_POLLING; + }); + /* *INDENT-ON* */ - vhc = &vum->cpus[thread_index]; - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - switch (vhc->operation_mode) + // Assign new mappings for all interfaces/queues + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + vnet_hw_interface_set_input_node (vnm, vui->hw_if_index, + vhost_user_input_node.index); + vec_foreach (queue, vui->rx_queues) { - case VHOST_USER_INTERRUPT_MODE: - state = VLIB_NODE_STATE_INTERRUPT; - break; - case VHOST_USER_POLLING_MODE: - state = VLIB_NODE_STATE_POLLING; - break; - default: - clib_warning ("BUG: bad operation mode %d", vhc->operation_mode); - break; + vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue, + ~0); + txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue, + txvq->mode); + if (rv) + clib_warning ("Warning: unable to set rx mode for interface %d, " + "queue %d: rc=%d", vui->hw_if_index, *queue, rv); } - vlib_node_set_state (vm, vhost_user_input_node.index, state); - } - - vec_free (workers); -} - -static int -vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - vnet_hw_interface_t *hw; - - if (worker_thread_index < vum->input_cpu_first_index || - worker_thread_index >= - vum->input_cpu_first_index + vum->input_cpu_count) - return -1; - - if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index))) - return -2; - - vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance); - u32 found = ~0, *w; - vec_foreach (w, vui->workers) - { - if (*w == worker_thread_index) - { - found = w - vui->workers; - break; - } - } - - if (del) - { - if (found == ~0) - return -3; - vec_del1 (vui->workers, found); - } - else if (found == ~0) - { - vec_add1 (vui->workers, worker_thread_index); - } - - vhost_user_rx_thread_placement (); - return 0; + }); + /* *INDENT-ON* */ } /** @brief Returns whether at least one TX and one RX vring are enabled */ @@ -532,58 +543,32 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui) static void vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) { - vhost_user_main_t *vum = &vhost_user_main; - vhost_cpu_t *vhc; - u32 thread_index; - vlib_main_t *vm; - u32 ifq2, qid; - vhost_user_vring_t *txvq; + u32 qid; + vnet_main_t *vnm = vnet_get_main (); qid = ifq & 0xff; - if ((qid % 2) == 0) - /* Only care about the odd number virtqueue which is TX */ + if ((qid & 1) == 0) + /* Only care about the odd number, or TX, virtqueue */ return; if (vhost_user_intf_ready (vui)) - { - txvq = &vui->vrings[qid]; - thread_index = txvq->interrupt_thread_index; - vhc = &vum->cpus[thread_index]; - if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE) - { - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - /* - * Convert virtqueue number in the lower byte to vring - * queue index for the input node process. Top bytes contain - * the interface, lower byte contains the queue index. - */ - ifq2 = ((ifq >> 8) << 8) | qid / 2; - vhc->pending_input_bitmap = - clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1); - vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index); - } - } + // qid >> 1 is to convert virtqueue number to vring queue index + vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1); } static clib_error_t * -vhost_user_callfd_read_ready (unix_file_t * uf) +vhost_user_callfd_read_ready (clib_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; - vhost_user_intf_t *vui = - pool_elt_at_index (vhost_user_main.vhost_user_interfaces, - uf->private_data >> 8); n = read (uf->file_descriptor, ((char *) &buff), 8); - DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8, - uf->private_data & 0xff); - vhost_user_set_interrupt_pending (vui, uf->private_data); return 0; } static clib_error_t * -vhost_user_kickfd_read_ready (unix_file_t * uf) +vhost_user_kickfd_read_ready (clib_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; @@ -664,16 +649,16 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) vhost_user_vring_t *vring = &vui->vrings[qid]; if (vring->kickfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vring->kickfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vring->kickfd_idx = ~0; } if (vring->callfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vring->callfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vring->callfd_idx = ~0; } if (vring->errfd != -1) @@ -692,10 +677,10 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui) vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); - if (vui->unix_file_index != ~0) + if (vui->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); - vui->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + vui->clib_file_index = ~0; } vui->is_up = 0; @@ -719,7 +704,7 @@ vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui, } if (is_host_address) { - addr = (u64) map_user_mem (vui, (uword) addr); + addr = pointer_to_uword (map_user_mem (vui, (uword) addr)); } if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size)) { @@ -749,7 +734,7 @@ vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len) } static clib_error_t * -vhost_user_socket_read (unix_file_t * uf) +vhost_user_socket_read (clib_file_t * uf) { int n, i; int fd, number_of_fds = 0; @@ -761,7 +746,7 @@ vhost_user_socket_read (unix_file_t * uf) vhost_user_intf_t *vui; struct cmsghdr *cmsg; u8 q; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vnet_main_t *vnm = vnet_get_main (); vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); @@ -914,10 +899,10 @@ vhost_user_socket_read (unix_file_t * uf) long page_sz = get_huge_page_size (fds[i]); - /* align size to 2M page */ + /* align size to page */ ssize_t map_sz = (vui->regions[i].memory_size + vui->regions[i].mmap_offset + - page_sz) & ~(page_sz - 1); + page_sz - 1) & ~(page_sz - 1); vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0); @@ -937,8 +922,9 @@ vhost_user_socket_read (unix_file_t * uf) } vui->region_mmap_addr[i] += vui->regions[i].mmap_offset; vui->region_mmap_fd[i] = fds[i]; + + vui->nregions++; } - vui->nregions = msg.memory.nregions; break; case VHOST_USER_SET_VRING_NUM: @@ -949,7 +935,7 @@ vhost_user_socket_read (unix_file_t * uf) (msg.state.num == 0) || /* it cannot be zero */ ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */ goto close_socket; - vui->vrings[msg.state.index].qsz = msg.state.num; + vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1; break; case VHOST_USER_SET_VRING_ADDR: @@ -1001,12 +987,8 @@ vhost_user_socket_read (unix_file_t * uf) vui->vrings[msg.state.index].last_avail_idx = vui->vrings[msg.state.index].used->idx; - if (vui->operation_mode == VHOST_USER_POLLING_MODE) - /* tell driver that we don't want interrupts */ - vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; - else - /* tell driver that we want interrupts */ - vui->vrings[msg.state.index].used->flags = 0; + /* tell driver that we don't want interrupts */ + vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; break; case VHOST_USER_SET_OWNER: @@ -1018,7 +1000,7 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_SET_VRING_CALL: - DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %d", + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL %d", vui->hw_if_index, msg.u64); q = (u8) (msg.u64 & 0xFF); @@ -1026,13 +1008,13 @@ vhost_user_socket_read (unix_file_t * uf) /* if there is old fd, delete and close it */ if (vui->vrings[q].callfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->vrings[q].callfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->vrings[q].callfd_idx = ~0; } - if (!(msg.u64 & 0x100)) + if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) { if (number_of_fds != 1) { @@ -1044,27 +1026,27 @@ vhost_user_socket_read (unix_file_t * uf) template.file_descriptor = fds[0]; template.private_data = ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q; - vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template); + vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template); } else vui->vrings[q].callfd_idx = ~0; break; case VHOST_USER_SET_VRING_KICK: - DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %d", + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK %d", vui->hw_if_index, msg.u64); q = (u8) (msg.u64 & 0xFF); if (vui->vrings[q].kickfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->vrings[q].kickfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->vrings[q].kickfd_idx = ~0; } - if (!(msg.u64 & 0x100)) + if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) { if (number_of_fds != 1) { @@ -1077,7 +1059,7 @@ vhost_user_socket_read (unix_file_t * uf) template.private_data = (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) + q; - vui->vrings[q].kickfd_idx = unix_file_add (&unix_main, &template); + vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template); } else { @@ -1089,7 +1071,7 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_SET_VRING_ERR: - DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %d", + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR %d", vui->hw_if_index, msg.u64); q = (u8) (msg.u64 & 0xFF); @@ -1097,7 +1079,7 @@ vhost_user_socket_read (unix_file_t * uf) if (vui->vrings[q].errfd != -1) close (vui->vrings[q].errfd); - if (!(msg.u64 & 0x100)) + if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) { if (number_of_fds != 1) goto close_socket; @@ -1117,9 +1099,6 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_GET_VRING_BASE: - DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", - vui->hw_if_index, msg.state.index, msg.state.num); - if (msg.state.index >= VHOST_VRING_MAX_N) { DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:" @@ -1137,6 +1116,8 @@ vhost_user_socket_read (unix_file_t * uf) /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ vhost_user_vring_close (vui, msg.state.index); + DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); break; case VHOST_USER_NONE: @@ -1165,10 +1146,10 @@ vhost_user_socket_read (unix_file_t * uf) } fd = fds[0]; - /* align size to 2M page */ + /* align size to page */ long page_sz = get_huge_page_size (fd); ssize_t map_sz = - (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1); + (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1); vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); @@ -1198,28 +1179,30 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_GET_PROTOCOL_FEATURES: - DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", - vui->hw_if_index); - msg.flags |= 4; msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | (1 << VHOST_USER_PROTOCOL_F_MQ); msg.size = sizeof (msg.u64); + DBG_SOCK + ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - reply 0x%016llx", + vui->hw_if_index, msg.u64); break; case VHOST_USER_SET_PROTOCOL_FEATURES: - DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx", - vui->hw_if_index, msg.u64); + DBG_SOCK + ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%016llx", + vui->hw_if_index, msg.u64); vui->protocol_features = msg.u64; break; case VHOST_USER_GET_QUEUE_NUM: - DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM", vui->hw_if_index); msg.flags |= 4; msg.u64 = VHOST_VRING_MAX_N; msg.size = sizeof (msg.u64); + DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d", + vui->hw_if_index, msg.u64); break; case VHOST_USER_SET_VRING_ENABLE: @@ -1266,7 +1249,7 @@ close_socket: } static clib_error_t * -vhost_user_socket_error (unix_file_t * uf) +vhost_user_socket_error (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); vhost_user_main_t *vum = &vhost_user_main; @@ -1282,11 +1265,11 @@ vhost_user_socket_error (unix_file_t * uf) } static clib_error_t * -vhost_user_socksvr_accept_ready (unix_file_t * uf) +vhost_user_socksvr_accept_ready (clib_file_t * uf) { int client_fd, client_len; struct sockaddr_un client; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; @@ -1300,12 +1283,20 @@ vhost_user_socksvr_accept_ready (unix_file_t * uf) if (client_fd < 0) return clib_error_return_unix (0, "accept"); - DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index); + if (vui->clib_file_index != ~0) + { + DBG_SOCK ("Close client socket for vhost interface %d, fd %d", + vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index)); + clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + } + + DBG_SOCK ("New client socket for vhost interface %d, fd %d", + vui->sw_if_index, client_fd); template.read_function = vhost_user_socket_read; template.error_function = vhost_user_socket_error; template.file_descriptor = client_fd; template.private_data = vui - vhost_user_main.vhost_user_interfaces; - vui->unix_file_index = unix_file_add (&unix_main, &template); + vui->clib_file_index = clib_file_add (&file_main, &template); return 0; } @@ -1315,8 +1306,6 @@ vhost_user_init (vlib_main_t * vm) clib_error_t *error; vhost_user_main_t *vum = &vhost_user_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; error = vlib_call_init_function (vm, ip4_init); if (error) @@ -1335,20 +1324,10 @@ vhost_user_init (vlib_main_t * vm) cpu->rx_buffers_len = 0; } - /* find out which cpus will be used for input */ - vum->input_cpu_first_index = 0; - vum->input_cpu_count = 1; - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - vum->input_cpu_first_index = tr->first_index; - vum->input_cpu_count = tr->count; - } - vum->random = random_default_seed (); + mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword)); + return 0; } @@ -1367,7 +1346,7 @@ format_vhost_trace (u8 * s, va_list * va) vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index); - uword indent = format_get_indent (s); + u32 indent = format_get_indent (s); s = format (s, "%U %U queue %d\n", format_white_space, indent, format_vnet_sw_interface_name, vnm, sw, t->qid); @@ -1398,9 +1377,8 @@ vhost_user_rx_trace (vhost_trace_t * t, vlib_buffer_t * b, vhost_user_vring_t * txvq) { vhost_user_main_t *vum = &vhost_user_main; - u32 qsz_mask = txvq->qsz - 1; u32 last_avail_idx = txvq->last_avail_idx; - u32 desc_current = txvq->avail->ring[last_avail_idx & qsz_mask]; + u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask]; vring_desc_t *hdr_desc = 0; virtio_net_hdr_mrg_rxbuf_t *hdr; u32 hint = 0; @@ -1445,9 +1423,16 @@ vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq) vhost_user_main_t *vum = &vhost_user_main; u64 x = 1; int fd = UNIX_GET_FD (vq->callfd_idx); - int rv __attribute__ ((unused)); - /* TODO: pay attention to rv */ + int rv; + rv = write (fd, &x, sizeof (x)); + if (rv <= 0) + { + clib_unix_warning + ("Error: Could not write to unix socket for callfd %d", fd); + return; + } + vq->n_since_last_int = 0; vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; } @@ -1512,19 +1497,19 @@ vhost_user_rx_discard_packet (vlib_main_t * vm, */ u32 discarded_packets = 0; u32 avail_idx = txvq->avail->idx; - u16 qsz_mask = txvq->qsz - 1; while (discarded_packets != discard_max) { if (avail_idx == txvq->last_avail_idx) goto out; u16 desc_chain_head = - txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask]; txvq->last_avail_idx++; - txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head; - txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id = + desc_chain_head; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0; vhost_user_log_dirty_ring (vui, txvq, - ring[txvq->last_used_idx & qsz_mask]); + ring[txvq->last_used_idx & txvq->qsz_mask]); txvq->last_used_idx++; discarded_packets++; } @@ -1555,13 +1540,15 @@ vhost_user_input_rewind_buffers (vlib_main_t * vm, b_current->current_length = 0; b_current->flags = 0; } + cpu->rx_buffers_len++; } static u32 vhost_user_if_input (vlib_main_t * vm, vhost_user_main_t * vum, vhost_user_intf_t * vui, - u16 qid, vlib_node_runtime_t * node) + u16 qid, vlib_node_runtime_t * node, + vnet_hw_interface_rx_mode mode) { vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; u16 n_rx_packets = 0; @@ -1570,7 +1557,6 @@ vhost_user_if_input (vlib_main_t * vm, u32 n_left_to_next, *to_next; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; u32 n_trace = vlib_get_trace_count (vm, node); - u16 qsz_mask; u32 map_hint = 0; u16 thread_index = vlib_get_thread_index (); u16 copy_len = 0; @@ -1587,6 +1573,26 @@ vhost_user_if_input (vlib_main_t * vm, vhost_user_send_call (vm, rxvq); } + /* + * For adaptive mode, it is optimized to reduce interrupts. + * If the scheduler switches the input node to polling due + * to burst of traffic, we tell the driver no interrupt. + * When the traffic subsides, the scheduler switches the node back to + * interrupt mode. We must tell the driver we want interrupt. + */ + if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) + { + if ((node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || + !(node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + /* Tell driver we want notification */ + txvq->used->flags = 0; + else + /* Tell driver we don't want notification */ + txvq->used->flags = VRING_USED_F_NO_NOTIFY; + } + if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE)) return 0; @@ -1610,7 +1616,7 @@ vhost_user_if_input (vlib_main_t * vm, return 0; } - if (PREDICT_FALSE (n_left == txvq->qsz)) + if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1))) { /* * Informational error logging when VPP is not @@ -1620,8 +1626,6 @@ vhost_user_if_input (vlib_main_t * vm, VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1); } - qsz_mask = txvq->qsz - 1; - if (n_left > VLIB_FRAME_SIZE) n_left = VLIB_FRAME_SIZE; @@ -1630,8 +1634,10 @@ vhost_user_if_input (vlib_main_t * vm, * per packet. In case packets are bigger, we will just yeld at some point * in the loop and come back later. This is not an issue as for big packet, * processing cost really comes from the memory copy. + * The assumption is that big packets will fit in 40 buffers. */ - if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1)) + if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 || + vum->cpus[thread_index].rx_buffers_len < 40)) { u32 curr_len = vum->cpus[thread_index].rx_buffers_len; vum->cpus[thread_index].rx_buffers_len += @@ -1686,7 +1692,8 @@ vhost_user_if_input (vlib_main_t * vm, break; } - desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + desc_current = + txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask]; vum->cpus[thread_index].rx_buffers_len--; bi_current = (vum->cpus[thread_index].rx_buffers) [vum->cpus[thread_index].rx_buffers_len]; @@ -1702,10 +1709,12 @@ vhost_user_if_input (vlib_main_t * vm, rx_buffers_len - 1], LOAD); /* Just preset the used descriptor id and length for later */ - txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_current; - txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id = + desc_current; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0; vhost_user_log_dirty_ring (vui, txvq, - ring[txvq->last_used_idx & qsz_mask]); + ring[txvq->last_used_idx & + txvq->qsz_mask]); /* The buffer should already be initialized */ b_head->total_length_not_including_first_buffer = 0; @@ -1733,7 +1742,8 @@ vhost_user_if_input (vlib_main_t * vm, desc_current = 0; if (PREDICT_FALSE (desc_table == 0)) { - //FIXME: Handle error by shutdown the queue + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); goto out; } } @@ -1811,7 +1821,8 @@ vhost_user_if_input (vlib_main_t * vm, desc_table[desc_current].len - desc_data_offset; cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length; cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; - cpy->dst = (uword) vlib_buffer_get_current (b_current); + cpy->dst = (uword) (vlib_buffer_get_current (b_current) + + b_current->current_length); cpy->src = desc_table[desc_current].addr + desc_data_offset; desc_data_offset += cpy->len; @@ -1865,13 +1876,8 @@ vhost_user_if_input (vlib_main_t * vm, (vhost_user_input_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { - clib_warning - ("Memory mapping error on interface hw_if_index=%d " - "(Shutting down - Switch interface down and up to restart)", - vui->hw_if_index); - vui->admin_up = 0; - copy_len = 0; - break; + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); } copy_len = 0; @@ -1890,10 +1896,8 @@ vhost_user_if_input (vlib_main_t * vm, (vhost_user_input_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { - clib_warning ("Memory mapping error on interface hw_if_index=%d " - "(Shutting down - Switch interface down and up to restart)", - vui->hw_if_index); - vui->admin_up = 0; + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); } /* give buffers back to driver */ @@ -1928,34 +1932,23 @@ vhost_user_input (vlib_main_t * vm, { vhost_user_main_t *vum = &vhost_user_main; uword n_rx_packets = 0; - u32 thread_index = vlib_get_thread_index (); - vhost_iface_and_queue_t *vhiq; vhost_user_intf_t *vui; - vhost_cpu_t *vhc; + vnet_device_input_runtime_t *rt = + (vnet_device_input_runtime_t *) node->runtime_data; + vnet_device_and_queue_t *dq; - vhc = &vum->cpus[thread_index]; - if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE)) - { - vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) + vec_foreach (dq, rt->devices_and_queues) + { + if (clib_smp_swap (&dq->interrupt_pending, 0) || + (node->state == VLIB_NODE_STATE_POLLING)) { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); + vui = + pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance); + n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node, + dq->mode); } - } - else - { - int i; - - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({ - int qid = i & 0xff; + } - clib_bitmap_set (vhc->pending_input_bitmap, i, 0); - vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8); - n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node); - })); - /* *INDENT-ON* */ - } return n_rx_packets; } @@ -1986,9 +1979,8 @@ vhost_user_tx_trace (vhost_trace_t * t, vlib_buffer_t * b, vhost_user_vring_t * rxvq) { vhost_user_main_t *vum = &vhost_user_main; - u32 qsz_mask = rxvq->qsz - 1; u32 last_avail_idx = rxvq->last_avail_idx; - u32 desc_current = rxvq->avail->ring[last_avail_idx & qsz_mask]; + u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask]; vring_desc_t *hdr_desc = 0; u32 hint = 0; @@ -2076,7 +2068,6 @@ vhost_user_tx (vlib_main_t * vm, pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance); u32 qid = ~0; vhost_user_vring_t *rxvq; - u16 qsz_mask; u8 error; u32 thread_index = vlib_get_thread_index (); u32 map_hint = 0; @@ -2098,13 +2089,11 @@ vhost_user_tx (vlib_main_t * vm, qid = VHOST_VRING_IDX_RX (*vec_elt_at_index - (vui->per_cpu_tx_qid, vlib_get_thread_index ())); + (vui->per_cpu_tx_qid, thread_index)); rxvq = &vui->vrings[qid]; if (PREDICT_FALSE (vui->use_tx_spinlock)) vhost_user_vring_lock (vui, qid); - qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */ - retry: error = VHOST_USER_TX_FUNC_ERROR_NONE; tx_headers_len = 0; @@ -2140,7 +2129,7 @@ retry: desc_table = rxvq->desc; desc_head = desc_index = - rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask]; /* Go deeper in case of indirect descriptor * I don't know of any driver providing indirect for RX. */ @@ -2205,13 +2194,13 @@ retry: &vum->cpus[thread_index].tx_headers[tx_headers_len - 1]; //Move from available to used buffer - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head; - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len; vhost_user_log_dirty_ring (vui, rxvq, ring[rxvq->last_used_idx & - qsz_mask]); + rxvq->qsz_mask]); rxvq->last_avail_idx++; rxvq->last_used_idx++; @@ -2230,7 +2219,7 @@ retry: desc_table = rxvq->desc; desc_head = desc_index = - rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask]; if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)) { @@ -2298,10 +2287,10 @@ retry: } //Move from available to used ring - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = desc_head; - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = desc_len; + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head; + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len; vhost_user_log_dirty_ring (vui, rxvq, - ring[rxvq->last_used_idx & qsz_mask]); + ring[rxvq->last_used_idx & rxvq->qsz_mask]); rxvq->last_avail_idx++; rxvq->last_used_idx++; @@ -2312,6 +2301,27 @@ retry: } n_left--; //At the end for error counting when 'goto done' is invoked + + /* + * Do the copy periodically to prevent + * vum->cpus[thread_index].copy array overflow and corrupt memory + */ + if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD)) + { + if (PREDICT_FALSE + (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy, + copy_len, &map_hint))) + { + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + } + copy_len = 0; + + /* give buffers back to driver */ + CLIB_MEMORY_BARRIER (); + rxvq->used->idx = rxvq->last_used_idx; + vhost_user_log_dirty_ring (vui, rxvq, idx); + } buffers++; } @@ -2321,10 +2331,8 @@ done: (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { - clib_warning ("Memory mapping error on interface hw_if_index=%d " - "(Shutting down - Switch interface down and up to restart)", - vui->hw_if_index); - vui->admin_up = 0; + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); } CLIB_MEMORY_BARRIER (); @@ -2368,13 +2376,173 @@ done3: vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP, - vlib_get_thread_index (), vui->sw_if_index, n_left); + thread_index, vui->sw_if_index, n_left); } vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); return frame->n_vectors; } +static uword +vhost_user_send_interrupt_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vhost_user_intf_t *vui; + f64 timeout = 3153600000.0 /* 100 years */ ; + uword event_type, *event_data = 0; + vhost_user_main_t *vum = &vhost_user_main; + u16 *queue; + f64 now, poll_time_remaining; + f64 next_timeout; + u8 stop_timer = 0; + + while (1) + { + poll_time_remaining = + vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + /* + * Use the remaining timeout if it is less than coalesce time to avoid + * resetting the existing timer in the middle of expiration + */ + timeout = poll_time_remaining; + if (vlib_process_suspend_time_is_zero (timeout) || + (timeout > vum->coalesce_time)) + timeout = vum->coalesce_time; + + now = vlib_time_now (vm); + switch (event_type) + { + case VHOST_USER_EVENT_STOP_TIMER: + stop_timer = 1; + break; + + case VHOST_USER_EVENT_START_TIMER: + stop_timer = 0; + if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) + break; + /* fall through */ + + case ~0: + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + next_timeout = timeout; + vec_foreach (queue, vui->rx_queues) + { + vhost_user_vring_t *rxvq = + &vui->vrings[VHOST_VRING_IDX_RX (*queue)]; + vhost_user_vring_t *txvq = + &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + + if (txvq->n_since_last_int) + { + if (now >= txvq->int_deadline) + vhost_user_send_call (vm, txvq); + else + next_timeout = txvq->int_deadline - now; + } + + if (rxvq->n_since_last_int) + { + if (now >= rxvq->int_deadline) + vhost_user_send_call (vm, rxvq); + else + next_timeout = rxvq->int_deadline - now; + } + + if ((next_timeout < timeout) && (next_timeout > 0.0)) + timeout = next_timeout; + } + }); + /* *INDENT-ON* */ + break; + + default: + clib_warning ("BUG: unhandled event type %d", event_type); + break; + } + /* No less than 1 millisecond */ + if (timeout < 1e-3) + timeout = 1e-3; + if (stop_timer) + timeout = 3153600000.0; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { + .function = vhost_user_send_interrupt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vhost-user-send-interrupt-process", +}; +/* *INDENT-ON* */ + +static clib_error_t * +vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, + u32 qid, vnet_hw_interface_rx_mode mode) +{ + vlib_main_t *vm = vnm->vlib_main; + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui = + pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); + vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + + if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) + { + if (txvq->kickfd_idx == ~0) + { + // We cannot support interrupt mode if the driver opts out + return clib_error_return (0, "Driver does not support interrupt"); + } + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + vum->ifq_count++; + // Start the timer if this is the first encounter on interrupt + // interface/queue + if ((vum->ifq_count == 1) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_START_TIMER, 0); + } + } + else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) && + vum->ifq_count) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + } + } + + txvq->mode = mode; + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + txvq->used->flags = VRING_USED_F_NO_NOTIFY; + else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) || + (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT)) + txvq->used->flags = 0; + else + { + clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode, + hw_if_index, qid); + return clib_error_return (0, "unsupported"); + } + + return 0; +} + static clib_error_t * vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) @@ -2387,7 +2555,7 @@ vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, vui->admin_up = is_up; - if (is_up) + if (is_up && vui->is_up) vnet_hw_interface_set_flags (vnm, vui->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); @@ -2403,6 +2571,7 @@ VNET_DEVICE_CLASS (vhost_user_dev_class,static) = { .format_device_name = format_vhost_user_interface_name, .name_renumber = vhost_user_name_renumber, .admin_up_down_function = vhost_user_interface_admin_up_down, + .rx_mode_change_function = vhost_user_interface_rx_mode_change, .format_tx_trace = format_vhost_trace, }; @@ -2418,7 +2587,7 @@ vhost_user_process (vlib_main_t * vm, vhost_user_intf_t *vui; struct sockaddr_un sun; int sockfd; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; f64 timeout = 3153600000.0 /* 100 years */ ; uword *event_data = 0; @@ -2439,7 +2608,7 @@ vhost_user_process (vlib_main_t * vm, pool_foreach (vui, vum->vhost_user_interfaces, { if (vui->unix_server_index == ~0) { //Nothing to do for server sockets - if (vui->unix_file_index == ~0) + if (vui->clib_file_index == ~0) { if ((sockfd < 0) && ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)) @@ -2477,7 +2646,7 @@ vhost_user_process (vlib_main_t * vm, template.file_descriptor = sockfd; template.private_data = vui - vhost_user_main.vhost_user_interfaces; - vui->unix_file_index = unix_file_add (&unix_main, &template); + vui->clib_file_index = clib_file_add (&file_main, &template); /* This sockfd is considered consumed */ sockfd = -1; @@ -2492,7 +2661,7 @@ vhost_user_process (vlib_main_t * vm, /* check if socket is alive */ int error = 0; socklen_t len = sizeof (error); - int fd = UNIX_GET_FD(vui->unix_file_index); + int fd = UNIX_GET_FD(vui->clib_file_index); int retval = getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len); @@ -2525,9 +2694,8 @@ static void vhost_user_term_if (vhost_user_intf_t * vui) { int q; + vhost_user_main_t *vum = &vhost_user_main; - // Delete configured thread pinning - vec_reset_length (vui->workers); // disconnect interface sockets vhost_user_if_disconnect (vui); vhost_user_update_iface_state (vui); @@ -2540,12 +2708,15 @@ vhost_user_term_if (vhost_user_intf_t * vui) if (vui->unix_server_index != ~0) { //Close server socket - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->unix_server_index); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->unix_server_index = ~0; unlink (vui->sock_filename); } + + mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename, + &vui->if_index); } int @@ -2555,6 +2726,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vhost_user_intf_t *vui; int rv = 0; vnet_hw_interface_t *hwif; + u16 *queue; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) @@ -2565,6 +2737,28 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + vec_foreach (queue, vui->rx_queues) + { + vhost_user_vring_t *txvq; + + txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + if ((vum->ifq_count > 0) && + ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + { + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + break; + } + } + } + // Disable and reset interface vhost_user_term_if (vui); @@ -2589,11 +2783,13 @@ vhost_user_exit (vlib_main_t * vm) vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; + vlib_worker_thread_barrier_sync (vlib_get_main ()); /* *INDENT-OFF* */ pool_foreach (vui, vum->vhost_user_interfaces, { vhost_user_delete_if (vnm, vm, vui->sw_if_index); }); /* *INDENT-ON* */ + vlib_worker_thread_barrier_release (vlib_get_main ()); return 0; } @@ -2685,19 +2881,22 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui, int server_sock_fd, const char *sock_filename, - u64 feature_mask, u32 * sw_if_index, u8 operation_mode) + u64 feature_mask, u32 * sw_if_index) { vnet_sw_interface_t *sw; - sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); int q; + vhost_user_main_t *vum = &vhost_user_main; + vnet_hw_interface_t *hw; + hw = vnet_get_hw_interface (vnm, vui->hw_if_index); + sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); if (server_sock_fd != -1) { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = vhost_user_socksvr_accept_ready; template.file_descriptor = server_sock_fd; - template.private_data = vui - vhost_user_main.vhost_user_interfaces; //hw index - vui->unix_server_index = unix_file_add (&unix_main, &template); + template.private_data = vui - vum->vhost_user_interfaces; //hw index + vui->unix_server_index = clib_file_add (&file_main, &template); } else { @@ -2710,13 +2909,16 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->sock_errno = 0; vui->is_up = 0; vui->feature_mask = feature_mask; - vui->unix_file_index = ~0; + vui->clib_file_index = ~0; vui->log_base_addr = 0; - vui->operation_mode = operation_mode; + vui->if_index = vui - vum->vhost_user_interfaces; + mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, + &vui->if_index, 0); for (q = 0; q < VHOST_VRING_MAX_N; q++) vhost_user_vring_init (vui, q); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); if (sw_if_index) @@ -2734,122 +2936,37 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_tx_thread_placement (vui); } -static uword -vhost_user_send_interrupt_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vhost_user_intf_t *vui; - f64 timeout = 3153600000.0 /* 100 years */ ; - uword event_type, *event_data = 0; - vhost_user_main_t *vum = &vhost_user_main; - vhost_iface_and_queue_t *vhiq; - vhost_cpu_t *vhc; - f64 now, poll_time_remaining; - - while (1) - { - poll_time_remaining = - vlib_process_wait_for_event_or_clock (vm, timeout); - event_type = vlib_process_get_events (vm, &event_data); - vec_reset_length (event_data); - - /* - * Use the remaining timeout if it is less than coalesce time to avoid - * resetting the existing timer in the middle of expiration - */ - timeout = poll_time_remaining; - if (vlib_process_suspend_time_is_zero (timeout) || - (timeout > vum->coalesce_time)) - timeout = vum->coalesce_time; - - now = vlib_time_now (vm); - switch (event_type) - { - case VHOST_USER_EVENT_START_TIMER: - if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) - break; - /* fall through */ - - case ~0: - vec_foreach (vhc, vum->cpus) - { - u32 thread_index = vhc - vum->cpus; - f64 next_timeout; - - next_timeout = timeout; - vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) - { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - vhost_user_vring_t *rxvq = - &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)]; - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)]; - - if (txvq->n_since_last_int) - { - if (now >= txvq->int_deadline) - vhost_user_send_call (vm, txvq); - else - next_timeout = txvq->int_deadline - now; - } - - if (rxvq->n_since_last_int) - { - if (now >= rxvq->int_deadline) - vhost_user_send_call (vm, rxvq); - else - next_timeout = rxvq->int_deadline - now; - } - - if ((next_timeout < timeout) && (next_timeout > 0.0)) - timeout = next_timeout; - } - } - break; - - default: - clib_warning ("BUG: unhandled event type %d", event_type); - break; - } - /* No less than 1 millisecond */ - if (timeout < 1e-3) - timeout = 1e-3; - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { - .function = vhost_user_send_interrupt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "vhost-user-send-interrupt-process", -}; -/* *INDENT-ON* */ - int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr, - u8 operation_mode) + u8 renumber, u32 custom_dev_instance, u8 * hwaddr) { vhost_user_intf_t *vui = NULL; u32 sw_if_idx = ~0; int rv = 0; int server_sock_fd = -1; vhost_user_main_t *vum = &vhost_user_main; - - if ((operation_mode != VHOST_USER_POLLING_MODE) && - (operation_mode != VHOST_USER_INTERRUPT_MODE)) - return VNET_API_ERROR_UNIMPLEMENTED; + uword *if_index; if (sock_filename == NULL || !(strlen (sock_filename) > 0)) { return VNET_API_ERROR_INVALID_ARGUMENT; } + if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename); + if (if_index) + { + if (sw_if_index) + { + vui = &vum->vhost_user_interfaces[*if_index]; + *sw_if_index = vui->sw_if_index; + } + return VNET_API_ERROR_IF_ALREADY_EXISTS; + } + if (is_server) { if ((rv = @@ -2863,7 +2980,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_create_ethernet (vnm, vm, vui, hwaddr); vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename, - feature_mask, &sw_if_idx, operation_mode); + feature_mask, &sw_if_idx); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2874,14 +2991,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && - !vum->interrupt_mode && (vum->coalesce_time > 0.0) && - (vum->coalesce_frames > 0)) - { - vum->interrupt_mode = 1; - vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } return rv; } @@ -2890,8 +2999,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, - u64 feature_mask, u8 renumber, u32 custom_dev_instance, - u8 operation_mode) + u64 feature_mask, u8 renumber, u32 custom_dev_instance) { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui = NULL; @@ -2899,16 +3007,25 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, int server_sock_fd = -1; int rv = 0; vnet_hw_interface_t *hwif; + uword *if_index; - if ((operation_mode != VHOST_USER_POLLING_MODE) && - (operation_mode != VHOST_USER_INTERRUPT_MODE)) - return VNET_API_ERROR_UNIMPLEMENTED; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; + if (sock_filename == NULL || !(strlen (sock_filename) > 0)) + return VNET_API_ERROR_INVALID_ARGUMENT; + vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + /* + * Disallow changing the interface to have the same path name + * as other interface + */ + if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename); + if (if_index && (*if_index != vui->if_index)) + return VNET_API_ERROR_IF_ALREADY_EXISTS; + // First try to open server socket if (is_server) if ((rv = vhost_user_init_server_sock (sock_filename, @@ -2917,8 +3034,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_term_if (vui); vhost_user_vui_init (vnm, vui, server_sock_fd, - sock_filename, feature_mask, &sw_if_idx, - operation_mode); + sock_filename, feature_mask, &sw_if_idx); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2926,33 +3042,9 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && - !vum->interrupt_mode && (vum->coalesce_time > 0.0) && - (vum->coalesce_frames > 0)) - { - vum->interrupt_mode = 1; - vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } return rv; } -static uword -unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args) -{ - u8 *operation_mode = va_arg (*args, u8 *); - uword rc = 1; - - if (unformat (input, "interrupt")) - *operation_mode = VHOST_USER_INTERRUPT_MODE; - else if (unformat (input, "polling")) - *operation_mode = VHOST_USER_POLLING_MODE; - else - rc = 0; - - return rc; -} - clib_error_t * vhost_user_connect_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2968,7 +3060,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm, u8 hwaddr[6]; u8 *hw = NULL; clib_error_t *error = NULL; - u8 operation_mode = VHOST_USER_POLLING_MODE; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2990,9 +3081,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm, { renumber = 1; } - else if (unformat (line_input, "mode %U", - unformat_vhost_user_operation_mode, &operation_mode)) - ; else { error = clib_error_return (0, "unknown input `%U'", @@ -3006,8 +3094,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm, int rv; if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename, is_server, &sw_if_index, feature_mask, - renumber, custom_dev_instance, hw, - operation_mode))) + renumber, custom_dev_instance, hw))) { error = clib_error_return (0, "vhost_user_create_if returned %d", rv); goto done; @@ -3097,7 +3184,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); vec_add2 (r_vuids, vuid, 1); - vuid->operation_mode = vui->operation_mode; vuid->sw_if_index = vui->sw_if_index; vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; vuid->features = vui->features; @@ -3122,25 +3208,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, return rv; } -static u8 * -format_vhost_user_operation_mode (u8 * s, va_list * va) -{ - int operation_mode = va_arg (*va, int); - - switch (operation_mode) - { - case VHOST_USER_POLLING_MODE: - s = format (s, "%s", "polling"); - break; - case VHOST_USER_INTERRUPT_MODE: - s = format (s, "%s", "interrupt"); - break; - default: - s = format (s, "%s", "invalid"); - } - return s; -} - clib_error_t * show_vhost_user_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -3152,10 +3219,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vhost_user_intf_t *vui; u32 hw_if_index, *hw_if_indices = 0; vnet_hw_interface_t *hi; - vhost_cpu_t *vhc; - vhost_iface_and_queue_t *vhiq; + u16 *queue; u32 ci; - int i, j, q; int show_descr = 0; struct feat_struct @@ -3208,6 +3273,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "Virtio vhost-user interfaces"); vlib_cli_output (vm, "Global:\n coalesce frames %d time %e", vum->coalesce_frames, vum->coalesce_time); + vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d", + vum->ifq_count); for (i = 0; i < vec_len (hw_if_indices); i++) { @@ -3249,23 +3316,21 @@ show_vhost_user_command_fn (vlib_main_t * vm, (vui->unix_server_index != ~0) ? "server" : "client", strerror (vui->sock_errno)); - vlib_cli_output (vm, " configured mode: %U\n", - format_vhost_user_operation_mode, vui->operation_mode); vlib_cli_output (vm, " rx placement: "); - vec_foreach (vhc, vum->cpus) + + vec_foreach (queue, vui->rx_queues) { - vec_foreach (vhiq, vhc->rx_queues) - { - if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces) - { - vlib_cli_output (vm, " thread %d on vring %d\n", - vhc - vum->cpus, - VHOST_VRING_IDX_TX (vhiq->qid)); - vlib_cli_output (vm, " mode: %U\n", - format_vhost_user_operation_mode, - vhc->operation_mode); - } - } + vnet_main_t *vnm = vnet_get_main (); + uword thread_index; + vnet_hw_interface_rx_mode mode; + + thread_index = vnet_get_device_input_thread_index (vnm, + vui->hw_if_index, + *queue); + vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode); + vlib_cli_output (vm, " thread %d on vring %d, %U\n", + thread_index, VHOST_VRING_IDX_TX (*queue), + format_vnet_hw_interface_rx_mode, mode); } vlib_cli_output (vm, " tx placement: %s\n", @@ -3310,7 +3375,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vlib_cli_output (vm, " qsz %d last_avail_idx %d last_used_idx %d\n", - vui->vrings[q].qsz, vui->vrings[q].last_avail_idx, + vui->vrings[q].qsz_mask + 1, + vui->vrings[q].last_avail_idx, vui->vrings[q].last_used_idx); if (vui->vrings[q].avail && vui->vrings[q].used) @@ -3333,7 +3399,7 @@ show_vhost_user_command_fn (vlib_main_t * vm, " id addr len flags next user_addr\n"); vlib_cli_output (vm, " ===== ================== ===== ====== ===== ==================\n"); - for (j = 0; j < vui->vrings[q].qsz; j++) + for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++) { u32 mem_hint = 0; vlib_cli_output (vm, @@ -3367,16 +3433,23 @@ done: * * There are several parameters associated with a vHost interface: * - * - socket - Name of the linux socket used by QEMU/VM and - * VPP to manage the vHost interface. If socket does not already exist, VPP will - * create the socket. + * - socket - Name of the linux socket used by hypervisor + * and VPP to manage the vHost interface. If in 'server' mode, VPP will + * create the socket if it does not already exist. If in 'client' mode, + * hypervisor will create the socket if it does not already exist. The VPP code + * is indifferent to the file location. However, if SELinux is enabled, then the + * socket needs to be created in '/var/run/vpp/'. * - * - server - Optional flag to indicate that VPP should be the server for the - * linux socket. If not provided, VPP will be the client. + * - server - Optional flag to indicate that VPP should be the server for + * the linux socket. If not provided, VPP will be the client. In 'server' + * mode, the VM can be reset without tearing down the vHost Interface. In + * 'client' mode, VPP can be reset without bringing down the VM and + * tearing down the vHost Interface. * * - feature-mask - Optional virtio/vhost feature set negotiated at - * startup. By default, all supported features will be advertised. Otherwise, - * provide the set of features desired. + * startup. This is intended for degugging only. It is recommended that this + * parameter not be used except by experienced users. By default, all supported + * features will be advertised. Otherwise, provide the set of features desired. * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE @@ -3394,17 +3467,14 @@ done: * in the name to be specified. If instance already exists, name will be used * anyway and multiple instances will have the same name. Use with caution. * - * - mode [interrupt | polling] - Optional parameter specifying - * the input thread polling policy. - * * @cliexpar * Example of how to create a vhost interface with VPP as the client and all features enabled: - * @cliexstart{create vhost-user socket /tmp/vhost1.sock} + * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock} * VirtualEthernet0/0/0 * @cliexend * Example of how to create a vhost interface with VPP as the server and with just * multiple queues enabled: - * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000} + * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server feature-mask 0x40400000} * VirtualEthernet0/0/1 * @cliexend * Once the vHost interface is created, enable the interface using: @@ -3414,8 +3484,7 @@ done: VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { .path = "create vhost-user", .short_help = "create vhost-user socket [server] " - "[feature-mask ] [hwaddr ] [renumber ] " - "[mode {interrupt | polling}]", + "[feature-mask ] [hwaddr ] [renumber ] ", .function = vhost_user_connect_command_fn, }; /* *INDENT-ON* */ @@ -3464,7 +3533,7 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = { * VHOST_USER_PROTOCOL_F_MQ (0) * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1) * - * socket filename /tmp/vhost1.sock type client errno "Success" + * socket filename /var/run/vpp/vhost1.sock type client errno "Success" * * rx placement: * thread 1 on vring 1 @@ -3580,6 +3649,64 @@ VLIB_CLI_COMMAND (show_vhost_user_command, static) = { }; /* *INDENT-ON* */ +clib_error_t * +debug_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + vhost_user_main_t *vum = &vhost_user_main; + u8 onoff = 0; + u8 input_found = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "missing argument"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (input_found) + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + + if (unformat (line_input, "on")) + { + input_found = 1; + onoff = 1; + } + else if (unformat (line_input, "off")) + { + input_found = 1; + onoff = 0; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + vum->debug = onoff; + +done: + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (debug_vhost_user_command, static) = { + .path = "debug vhost-user", + .short_help = "debug vhost-user ", + .function = debug_vhost_user_command_fn, +}; +/* *INDENT-ON* */ + static clib_error_t * vhost_user_config (vlib_main_t * vm, unformat_input_t * input) { @@ -3618,69 +3745,6 @@ vhost_user_unmap_all (void) } } -static clib_error_t * -vhost_thread_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 worker_thread_index; - u32 sw_if_index; - u8 del = 0; - int rv; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - if (!unformat - (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (), - &sw_if_index, &worker_thread_index)) - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - - if (unformat (line_input, "del")) - del = 1; - - if ((rv = - vhost_user_thread_placement (sw_if_index, worker_thread_index, del))) - { - error = clib_error_return (0, "vhost_user_thread_placement returned %d", - rv); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - - -/*? - * This command is used to move the RX processing for the given - * interfaces to the provided thread. If the 'del' option is used, - * the forced thread assignment is removed and the thread assigment is - * reassigned automatically. Use 'show vhost-user ' - * to see the thread assignment. - * - * @cliexpar - * Example of how to move the RX processing for a given interface to a given thread: - * @cliexcmd{vhost thread VirtualEthernet0/0/0 1} - * Example of how to remove the forced thread assignment for a given interface: - * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (vhost_user_thread_command, static) = { - .path = "vhost thread", - .short_help = "vhost thread [del]", - .function = vhost_thread_command_fn, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON *