X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Fvirtio%2Fvhost-user.c;h=5460f10b74e1072347ca8a6019a2ee46ae958035;hb=fe7d4a2e31529eed5416b38b520fdc84687df03c;hp=ca54f1109481b8eeaf24ae331c7d4ed7c717cb93;hpb=d3c008d108aa2187d1a2afe2833b4de25ca2c2ab;p=vpp.git diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index ca54f110948..5460f10b74e 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -86,11 +86,22 @@ * The value 64 was obtained by testing (48 and 128 were not as good). */ #define VHOST_USER_RX_COPY_THRESHOLD 64 +/* + * On the transmit side, we keep processing the buffers from vlib in the while + * loop and prepare the copy order to be executed later. However, the static + * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N + * entries. In order to not corrupt memory, we have to do the copy when the + * static array reaches the copy threshold. We subtract 40 in case the code + * goes into the inner loop for a maximum of 64k frames which may require + * more array entries. + */ +#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40) -#define UNIX_GET_FD(unixfd_idx) \ - (unixfd_idx != ~0) ? \ +#define UNIX_GET_FD(unixfd_idx) ({ \ + typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \ + (__unixfd_idx != ~0) ? \ pool_elt_at_index (file_main.file_pool, \ - unixfd_idx)->file_descriptor : -1; + __unixfd_idx)->file_descriptor : -1; }) #define foreach_virtio_trace_flags \ _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \ @@ -246,7 +257,76 @@ map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint) return (void *) (vui->region_mmap_addr[i] + addr - vui->regions[i].guest_phys_addr); } +#elif __aarch64__ && __ARM_NEON + uint64x2_t al, ah, rl, rh, r; + uint32_t u32 = 0; + + al = vdupq_n_u64 (addr + 1); + ah = vdupq_n_u64 (addr); + + /*First Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[0]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[0]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1); + + if (u32) + { + i = __builtin_ctzll (u32); + goto vhost_map_guest_mem_done; + } + + /*Second Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[2]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[2]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3); + + if (u32) + { + i = __builtin_ctzll (u32); + goto vhost_map_guest_mem_done; + } + + /*Third Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[4]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[4]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 4); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 5); + + if (u32) + { + i = __builtin_ctzll (u32); + goto vhost_map_guest_mem_done; + } + /*Fourth Iteration */ + rl = vld1q_u64 (&vui->region_guest_addr_lo[6]); + rl = vcgtq_u64 (al, rl); + rh = vld1q_u64 (&vui->region_guest_addr_hi[6]); + rh = vcgtq_u64 (rh, ah); + r = vandq_u64 (rl, rh); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6); + u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7); + + i = __builtin_ctzll (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS)); + +vhost_map_guest_mem_done: + if (i < vui->nregions) + { + *hint = i; + return (void *) (vui->region_mmap_addr[i] + addr - + vui->regions[i].guest_phys_addr); + } #else for (i = 0; i < vui->nregions; i++) { @@ -296,7 +376,7 @@ unmap_all_mem_regions (vhost_user_intf_t * vui) int i, r; for (i = 0; i < vui->nregions; i++) { - if (vui->region_mmap_addr[i] != (void *) -1) + if (vui->region_mmap_addr[i] != MAP_FAILED) { long page_sz = get_huge_page_size (vui->region_mmap_fd[i]); @@ -313,7 +393,7 @@ unmap_all_mem_regions (vhost_user_intf_t * vui) ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz, page_sz); - vui->region_mmap_addr[i] = (void *) -1; + vui->region_mmap_addr[i] = MAP_FAILED; if (r == -1) { @@ -819,7 +899,7 @@ vhost_user_socket_read (clib_file_t * uf) long page_sz = get_huge_page_size (fds[i]); - /* align size to 2M page */ + /* align size to page */ ssize_t map_sz = (vui->regions[i].memory_size + vui->regions[i].mmap_offset + page_sz - 1) & ~(page_sz - 1); @@ -842,8 +922,9 @@ vhost_user_socket_read (clib_file_t * uf) } vui->region_mmap_addr[i] += vui->regions[i].mmap_offset; vui->region_mmap_fd[i] = fds[i]; + + vui->nregions++; } - vui->nregions = msg.memory.nregions; break; case VHOST_USER_SET_VRING_NUM: @@ -1065,7 +1146,7 @@ vhost_user_socket_read (clib_file_t * uf) } fd = fds[0]; - /* align size to 2M page */ + /* align size to page */ long page_sz = get_huge_page_size (fd); ssize_t map_sz = (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1); @@ -1202,7 +1283,15 @@ vhost_user_socksvr_accept_ready (clib_file_t * uf) if (client_fd < 0) return clib_error_return_unix (0, "accept"); - DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index); + if (vui->clib_file_index != ~0) + { + DBG_SOCK ("Close client socket for vhost interface %d, fd %d", + vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index)); + clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + } + + DBG_SOCK ("New client socket for vhost interface %d, fd %d", + vui->sw_if_index, client_fd); template.read_function = vhost_user_socket_read; template.error_function = vhost_user_socket_error; template.file_descriptor = client_fd; @@ -1545,8 +1634,10 @@ vhost_user_if_input (vlib_main_t * vm, * per packet. In case packets are bigger, we will just yeld at some point * in the loop and come back later. This is not an issue as for big packet, * processing cost really comes from the memory copy. + * The assumption is that big packets will fit in 40 buffers. */ - if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1)) + if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 || + vum->cpus[thread_index].rx_buffers_len < 40)) { u32 curr_len = vum->cpus[thread_index].rx_buffers_len; vum->cpus[thread_index].rx_buffers_len += @@ -1998,7 +2089,7 @@ vhost_user_tx (vlib_main_t * vm, qid = VHOST_VRING_IDX_RX (*vec_elt_at_index - (vui->per_cpu_tx_qid, vlib_get_thread_index ())); + (vui->per_cpu_tx_qid, thread_index)); rxvq = &vui->vrings[qid]; if (PREDICT_FALSE (vui->use_tx_spinlock)) vhost_user_vring_lock (vui, qid); @@ -2210,6 +2301,27 @@ retry: } n_left--; //At the end for error counting when 'goto done' is invoked + + /* + * Do the copy periodically to prevent + * vum->cpus[thread_index].copy array overflow and corrupt memory + */ + if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD)) + { + if (PREDICT_FALSE + (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy, + copy_len, &map_hint))) + { + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + } + copy_len = 0; + + /* give buffers back to driver */ + CLIB_MEMORY_BARRIER (); + rxvq->used->idx = rxvq->last_used_idx; + vhost_user_log_dirty_ring (vui, rxvq, idx); + } buffers++; } @@ -2264,7 +2376,7 @@ done3: vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP, - vlib_get_thread_index (), vui->sw_if_index, n_left); + thread_index, vui->sw_if_index, n_left); } vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); @@ -3321,16 +3433,23 @@ done: * * There are several parameters associated with a vHost interface: * - * - socket - Name of the linux socket used by QEMU/VM and - * VPP to manage the vHost interface. If socket does not already exist, VPP will - * create the socket. + * - socket - Name of the linux socket used by hypervisor + * and VPP to manage the vHost interface. If in 'server' mode, VPP will + * create the socket if it does not already exist. If in 'client' mode, + * hypervisor will create the socket if it does not already exist. The VPP code + * is indifferent to the file location. However, if SELinux is enabled, then the + * socket needs to be created in '/var/run/vpp/'. * - * - server - Optional flag to indicate that VPP should be the server for the - * linux socket. If not provided, VPP will be the client. + * - server - Optional flag to indicate that VPP should be the server for + * the linux socket. If not provided, VPP will be the client. In 'server' + * mode, the VM can be reset without tearing down the vHost Interface. In + * 'client' mode, VPP can be reset without bringing down the VM and + * tearing down the vHost Interface. * * - feature-mask - Optional virtio/vhost feature set negotiated at - * startup. By default, all supported features will be advertised. Otherwise, - * provide the set of features desired. + * startup. This is intended for degugging only. It is recommended that this + * parameter not be used except by experienced users. By default, all supported + * features will be advertised. Otherwise, provide the set of features desired. * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE @@ -3348,17 +3467,14 @@ done: * in the name to be specified. If instance already exists, name will be used * anyway and multiple instances will have the same name. Use with caution. * - * - mode [interrupt | polling] - Optional parameter specifying - * the input thread polling policy. - * * @cliexpar * Example of how to create a vhost interface with VPP as the client and all features enabled: - * @cliexstart{create vhost-user socket /tmp/vhost1.sock} + * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock} * VirtualEthernet0/0/0 * @cliexend * Example of how to create a vhost interface with VPP as the server and with just * multiple queues enabled: - * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000} + * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server feature-mask 0x40400000} * VirtualEthernet0/0/1 * @cliexend * Once the vHost interface is created, enable the interface using: @@ -3417,7 +3533,7 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = { * VHOST_USER_PROTOCOL_F_MQ (0) * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1) * - * socket filename /tmp/vhost1.sock type client errno "Success" + * socket filename /var/run/vpp/vhost1.sock type client errno "Success" * * rx placement: * thread 1 on vring 1