* The value 64 was obtained by testing (48 and 128 were not as good).
*/
#define VHOST_USER_RX_COPY_THRESHOLD 64
+/*
+ * On the transmit side, we keep processing the buffers from vlib in the while
+ * loop and prepare the copy order to be executed later. However, the static
+ * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
+ * entries. In order to not corrupt memory, we have to do the copy when the
+ * static array reaches the copy threshold. We subtract 40 in case the code
+ * goes into the inner loop for a maximum of 64k frames which may require
+ * more array entries.
+ */
+#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40)
-#define UNIX_GET_FD(unixfd_idx) \
- (unixfd_idx != ~0) ? \
- pool_elt_at_index (unix_main.file_pool, \
- unixfd_idx)->file_descriptor : -1;
+#define UNIX_GET_FD(unixfd_idx) ({ \
+ typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
+ (__unixfd_idx != ~0) ? \
+ pool_elt_at_index (file_main.file_pool, \
+ __unixfd_idx)->file_descriptor : -1; })
#define foreach_virtio_trace_flags \
_ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
- i = __builtin_ctzll (_mm_movemask_epi8 (r) |
- (1 << VHOST_MEMORY_MAX_NREGIONS));
+ i = count_trailing_zeros (_mm_movemask_epi8 (r) |
+ (1 << VHOST_MEMORY_MAX_NREGIONS));
if (i < vui->nregions)
{
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].guest_phys_addr);
}
+#elif __aarch64__ && __ARM_NEON
+ uint64x2_t al, ah, rl, rh, r;
+ uint32_t u32 = 0;
+
+ al = vdupq_n_u64 (addr + 1);
+ ah = vdupq_n_u64 (addr);
+
+ /*First Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
+
+ if (u32)
+ {
+ i = count_trailing_zeros (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Second Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
+
+ if (u32)
+ {
+ i = count_trailing_zeros (u32);
+ goto vhost_map_guest_mem_done;
+ }
+ /*Third Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 4);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 5);
+
+ if (u32)
+ {
+ i = count_trailing_zeros (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Fourth Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[6]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[6]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
+
+ i = count_trailing_zeros (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
+
+vhost_map_guest_mem_done:
+ if (i < vui->nregions)
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
#else
for (i = 0; i < vui->nregions; i++)
{
int i, r;
for (i = 0; i < vui->nregions; i++)
{
- if (vui->region_mmap_addr[i] != (void *) -1)
+ if (vui->region_mmap_addr[i] != MAP_FAILED)
{
long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
vui->region_mmap_addr[i], map_sz, page_sz);
- vui->region_mmap_addr[i] = (void *) -1;
+ vui->region_mmap_addr[i] = MAP_FAILED;
if (r == -1)
{
}
static clib_error_t *
-vhost_user_callfd_read_ready (unix_file_t * uf)
+vhost_user_callfd_read_ready (clib_file_t * uf)
{
__attribute__ ((unused)) int n;
u8 buff[8];
}
static clib_error_t *
-vhost_user_kickfd_read_ready (unix_file_t * uf)
+vhost_user_kickfd_read_ready (clib_file_t * uf)
{
__attribute__ ((unused)) int n;
u8 buff[8];
vhost_user_vring_t *vring = &vui->vrings[qid];
if (vring->kickfd_idx != ~0)
{
- unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
vring->kickfd_idx);
- unix_file_del (&unix_main, uf);
+ clib_file_del (&file_main, uf);
vring->kickfd_idx = ~0;
}
if (vring->callfd_idx != ~0)
{
- unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
vring->callfd_idx);
- unix_file_del (&unix_main, uf);
+ clib_file_del (&file_main, uf);
vring->callfd_idx = ~0;
}
if (vring->errfd != -1)
vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
- if (vui->unix_file_index != ~0)
+ if (vui->clib_file_index != ~0)
{
- unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index);
- vui->unix_file_index = ~0;
+ clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ vui->clib_file_index = ~0;
}
vui->is_up = 0;
}
static clib_error_t *
-vhost_user_socket_read (unix_file_t * uf)
+vhost_user_socket_read (clib_file_t * uf)
{
int n, i;
int fd, number_of_fds = 0;
vhost_user_intf_t *vui;
struct cmsghdr *cmsg;
u8 q;
- unix_file_t template = { 0 };
+ clib_file_t template = { 0 };
vnet_main_t *vnm = vnet_get_main ();
vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
long page_sz = get_huge_page_size (fds[i]);
- /* align size to 2M page */
+ /* align size to page */
ssize_t map_sz = (vui->regions[i].memory_size +
vui->regions[i].mmap_offset +
page_sz - 1) & ~(page_sz - 1);
}
vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
vui->region_mmap_fd[i] = fds[i];
+
+ vui->nregions++;
}
- vui->nregions = msg.memory.nregions;
break;
case VHOST_USER_SET_VRING_NUM:
/* if there is old fd, delete and close it */
if (vui->vrings[q].callfd_idx != ~0)
{
- unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
vui->vrings[q].callfd_idx);
- unix_file_del (&unix_main, uf);
+ clib_file_del (&file_main, uf);
vui->vrings[q].callfd_idx = ~0;
}
template.file_descriptor = fds[0];
template.private_data =
((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
- vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template);
+ vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
}
else
vui->vrings[q].callfd_idx = ~0;
if (vui->vrings[q].kickfd_idx != ~0)
{
- unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
vui->vrings[q].kickfd_idx);
- unix_file_del (&unix_main, uf);
+ clib_file_del (&file_main, uf);
vui->vrings[q].kickfd_idx = ~0;
}
template.private_data =
(((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
q;
- vui->vrings[q].kickfd_idx = unix_file_add (&unix_main, &template);
+ vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
}
else
{
}
fd = fds[0];
- /* align size to 2M page */
+ /* align size to page */
long page_sz = get_huge_page_size (fd);
ssize_t map_sz =
(msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
}
static clib_error_t *
-vhost_user_socket_error (unix_file_t * uf)
+vhost_user_socket_error (clib_file_t * uf)
{
vlib_main_t *vm = vlib_get_main ();
vhost_user_main_t *vum = &vhost_user_main;
}
static clib_error_t *
-vhost_user_socksvr_accept_ready (unix_file_t * uf)
+vhost_user_socksvr_accept_ready (clib_file_t * uf)
{
int client_fd, client_len;
struct sockaddr_un client;
- unix_file_t template = { 0 };
+ clib_file_t template = { 0 };
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui;
if (client_fd < 0)
return clib_error_return_unix (0, "accept");
- DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index);
+ if (vui->clib_file_index != ~0)
+ {
+ DBG_SOCK ("Close client socket for vhost interface %d, fd %d",
+ vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index));
+ clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ }
+
+ DBG_SOCK ("New client socket for vhost interface %d, fd %d",
+ vui->sw_if_index, client_fd);
template.read_function = vhost_user_socket_read;
template.error_function = vhost_user_socket_error;
template.file_descriptor = client_fd;
template.private_data = vui - vhost_user_main.vhost_user_interfaces;
- vui->unix_file_index = unix_file_add (&unix_main, &template);
+ vui->clib_file_index = clib_file_add (&file_main, &template);
return 0;
}
vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
- uword indent = format_get_indent (s);
+ u32 indent = format_get_indent (s);
s = format (s, "%U %U queue %d\n", format_white_space, indent,
format_vnet_sw_interface_name, vnm, sw, t->qid);
* per packet. In case packets are bigger, we will just yeld at some point
* in the loop and come back later. This is not an issue as for big packet,
* processing cost really comes from the memory copy.
+ * The assumption is that big packets will fit in 40 buffers.
*/
- if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1))
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 ||
+ vum->cpus[thread_index].rx_buffers_len < 40))
{
u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
vum->cpus[thread_index].rx_buffers_len +=
qid =
VHOST_VRING_IDX_RX (*vec_elt_at_index
- (vui->per_cpu_tx_qid, vlib_get_thread_index ()));
+ (vui->per_cpu_tx_qid, thread_index));
rxvq = &vui->vrings[qid];
if (PREDICT_FALSE (vui->use_tx_spinlock))
vhost_user_vring_lock (vui, qid);
}
n_left--; //At the end for error counting when 'goto done' is invoked
+
+ /*
+ * Do the copy periodically to prevent
+ * vum->cpus[thread_index].copy array overflow and corrupt memory
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
+ {
+ if (PREDICT_FALSE
+ (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
+ copy_len, &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+ copy_len = 0;
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_BARRIER ();
+ rxvq->used->idx = rxvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, rxvq, idx);
+ }
buffers++;
}
vlib_increment_simple_counter
(vnet_main.interface_main.sw_if_counters
+ VNET_INTERFACE_COUNTER_DROP,
- vlib_get_thread_index (), vui->sw_if_index, n_left);
+ thread_index, vui->sw_if_index, n_left);
}
vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
u32 flags)
{
vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
- uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui =
pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+ u32 hw_flags = 0;
+ vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ hw_flags = vui->admin_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
- vui->admin_up = is_up;
-
- if (is_up)
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, hw_flags);
return /* no error */ 0;
}
vhost_user_intf_t *vui;
struct sockaddr_un sun;
int sockfd;
- unix_file_t template = { 0 };
+ clib_file_t template = { 0 };
f64 timeout = 3153600000.0 /* 100 years */ ;
uword *event_data = 0;
pool_foreach (vui, vum->vhost_user_interfaces, {
if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
- if (vui->unix_file_index == ~0)
+ if (vui->clib_file_index == ~0)
{
if ((sockfd < 0) &&
((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
template.file_descriptor = sockfd;
template.private_data =
vui - vhost_user_main.vhost_user_interfaces;
- vui->unix_file_index = unix_file_add (&unix_main, &template);
+ vui->clib_file_index = clib_file_add (&file_main, &template);
/* This sockfd is considered consumed */
sockfd = -1;
/* check if socket is alive */
int error = 0;
socklen_t len = sizeof (error);
- int fd = UNIX_GET_FD(vui->unix_file_index);
+ int fd = UNIX_GET_FD(vui->clib_file_index);
int retval =
getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
if (vui->unix_server_index != ~0)
{
//Close server socket
- unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
vui->unix_server_index);
- unix_file_del (&unix_main, uf);
+ clib_file_del (&file_main, uf);
vui->unix_server_index = ~0;
unlink (vui->sock_filename);
}
if (error)
clib_error_report (error);
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
}
/*
sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
if (server_sock_fd != -1)
{
- unix_file_t template = { 0 };
+ clib_file_t template = { 0 };
template.read_function = vhost_user_socksvr_accept_ready;
template.file_descriptor = server_sock_fd;
template.private_data = vui - vum->vhost_user_interfaces; //hw index
- vui->unix_server_index = unix_file_add (&unix_main, &template);
+ vui->unix_server_index = clib_file_add (&file_main, &template);
}
else
{
vui->sock_errno = 0;
vui->is_up = 0;
vui->feature_mask = feature_mask;
- vui->unix_file_index = ~0;
+ vui->clib_file_index = ~0;
vui->log_base_addr = 0;
vui->if_index = vui - vum->vhost_user_interfaces;
mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
if (unformat
(input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
{
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ if (vhost_user_dev_class.index != hi->dev_class_index)
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
vec_add1 (hw_if_indices, hw_if_index);
}
else if (unformat (input, "descriptors") || unformat (input, "desc"))
*
* There are several parameters associated with a vHost interface:
*
- * - <b>socket <socket-filename></b> - Name of the linux socket used by QEMU/VM and
- * VPP to manage the vHost interface. If socket does not already exist, VPP will
- * create the socket.
+ * - <b>socket <socket-filename></b> - Name of the linux socket used by hypervisor
+ * and VPP to manage the vHost interface. If in '<em>server</em>' mode, VPP will
+ * create the socket if it does not already exist. If in '<em>client</em>' mode,
+ * hypervisor will create the socket if it does not already exist. The VPP code
+ * is indifferent to the file location. However, if SELinux is enabled, then the
+ * socket needs to be created in '<em>/var/run/vpp/</em>'.
*
- * - <b>server</b> - Optional flag to indicate that VPP should be the server for the
- * linux socket. If not provided, VPP will be the client.
+ * - <b>server</b> - Optional flag to indicate that VPP should be the server for
+ * the linux socket. If not provided, VPP will be the client. In '<em>server</em>'
+ * mode, the VM can be reset without tearing down the vHost Interface. In
+ * '<em>client</em>' mode, VPP can be reset without bringing down the VM and
+ * tearing down the vHost Interface.
*
* - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated at
- * startup. By default, all supported features will be advertised. Otherwise,
- * provide the set of features desired.
+ * startup. <b>This is intended for degugging only.</b> It is recommended that this
+ * parameter not be used except by experienced users. By default, all supported
+ * features will be advertised. Otherwise, provide the set of features desired.
* - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
* - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
* - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
* in the name to be specified. If instance already exists, name will be used
* anyway and multiple instances will have the same name. Use with caution.
*
- * - <b>mode [interrupt | polling]</b> - Optional parameter specifying
- * the input thread polling policy.
- *
* @cliexpar
* Example of how to create a vhost interface with VPP as the client and all features enabled:
- * @cliexstart{create vhost-user socket /tmp/vhost1.sock}
+ * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
* VirtualEthernet0/0/0
* @cliexend
* Example of how to create a vhost interface with VPP as the server and with just
* multiple queues enabled:
- * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000}
+ * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server feature-mask 0x40400000}
* VirtualEthernet0/0/1
* @cliexend
* Once the vHost interface is created, enable the interface using:
* VHOST_USER_PROTOCOL_F_MQ (0)
* VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
*
- * socket filename /tmp/vhost1.sock type client errno "Success"
+ * socket filename /var/run/vpp/vhost1.sock type client errno "Success"
*
* rx placement:
* thread 1 on vring 1