ssize_t map_sz = (vui->regions[i].memory_size +
vui->regions[i].mmap_offset +
- page_sz) & ~(page_sz - 1);
+ page_sz - 1) & ~(page_sz - 1);
r =
munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
{
//Let's try to assign one queue to each thread
u32 qid = 0;
- u32 cpu_index = 0;
+ u32 thread_index = 0;
vui->use_tx_spinlock = 0;
while (1)
{
if (!rxvq->started || !rxvq->enabled)
continue;
- vui->per_cpu_tx_qid[cpu_index] = qid;
- cpu_index++;
- if (cpu_index == vlib_get_thread_main ()->n_vlib_mains)
+ vui->per_cpu_tx_qid[thread_index] = qid;
+ thread_index++;
+ if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
return;
}
//We need to loop, meaning the spinlock has to be used
vui->use_tx_spinlock = 1;
- if (cpu_index == 0)
+ if (thread_index == 0)
{
//Could not find a single valid one
- for (cpu_index = 0;
- cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++)
+ for (thread_index = 0;
+ thread_index < vlib_get_thread_main ()->n_vlib_mains;
+ thread_index++)
{
- vui->per_cpu_tx_qid[cpu_index] = 0;
+ vui->per_cpu_tx_qid[thread_index] = 0;
}
return;
}
vhost_user_intf_t *vui;
vhost_cpu_t *vhc;
u32 *workers = 0;
- u32 cpu_index;
+ u32 thread_index;
vlib_main_t *vm;
//Let's list all workers cpu indexes
continue;
i %= vec_len (vui_workers);
- cpu_index = vui_workers[i];
+ thread_index = vui_workers[i];
i++;
- vhc = &vum->cpus[cpu_index];
+ vhc = &vum->cpus[thread_index];
+ txvq->interrupt_thread_index = thread_index;
iaq.qid = qid;
iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
vhc->operation_mode = mode;
}
- for (cpu_index = vum->input_cpu_first_index;
- cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
- cpu_index++)
+ for (thread_index = vum->input_cpu_first_index;
+ thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
+ thread_index++)
{
vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
- vhc = &vum->cpus[cpu_index];
- vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+ vhc = &vum->cpus[thread_index];
+ vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
switch (vhc->operation_mode)
{
case VHOST_USER_INTERRUPT_MODE:
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_cpu_t *vhc;
- u32 cpu_index;
- vhost_iface_and_queue_t *vhiq;
+ u32 thread_index;
vlib_main_t *vm;
- u32 ifq2;
- u8 done = 0;
+ u32 ifq2, qid;
+ vhost_user_vring_t *txvq;
+
+ qid = ifq & 0xff;
+ if ((qid % 2) == 0)
+ /* Only care about the odd number virtqueue which is TX */
+ return;
if (vhost_user_intf_ready (vui))
{
- vec_foreach (vhc, vum->cpus)
- {
- if (vhc->operation_mode == VHOST_USER_POLLING_MODE)
- continue;
-
- vec_foreach (vhiq, vhc->rx_queues)
+ txvq = &vui->vrings[qid];
+ thread_index = txvq->interrupt_thread_index;
+ vhc = &vum->cpus[thread_index];
+ if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE)
{
+ vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
/*
- * Match the interface and the virtqueue number
+ * Convert virtqueue number in the lower byte to vring
+ * queue index for the input node process. Top bytes contain
+ * the interface, lower byte contains the queue index.
*/
- if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
- (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
- {
- cpu_index = vhc - vum->cpus;
- vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
- /*
- * Convert RX virtqueue number in the lower byte to vring
- * queue index for the input node process. Top bytes contain
- * the interface, lower byte contains the queue index.
- */
- ifq2 = ((ifq >> 8) << 8) | vhiq->qid;
- vhc->pending_input_bitmap =
- clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
- vlib_node_set_interrupt_pending (vm,
- vhost_user_input_node.index);
- done = 1;
- break;
- }
+ ifq2 = ((ifq >> 8) << 8) | qid / 2;
+ vhc->pending_input_bitmap =
+ clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
+ vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index);
}
- if (done)
- break;
- }
}
}
n = read (uf->file_descriptor, ((char *) &buff), 8);
DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
-
- vlib_worker_thread_barrier_sync (vlib_get_main ());
if (!vui->vrings[qid].started ||
(vhost_user_intf_ready (vui) != vui->is_up))
{
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
vui->vrings[qid].started = 1;
vhost_user_update_iface_state (vui);
+ vlib_worker_thread_barrier_release (vlib_get_main ());
}
- vlib_worker_thread_barrier_release (vlib_get_main ());
vhost_user_set_interrupt_pending (vui, uf->private_data);
return 0;
/* align size to 2M page */
ssize_t map_sz = (vui->regions[i].memory_size +
vui->regions[i].mmap_offset +
- page_sz) & ~(page_sz - 1);
+ page_sz - 1) & ~(page_sz - 1);
vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
MAP_SHARED, fds[i], 0);
/* align size to 2M page */
long page_sz = get_huge_page_size (fd);
ssize_t map_sz =
- (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
+ (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
vum->random = random_default_seed ();
- return 0;
-}
-
-VLIB_INIT_FUNCTION (vhost_user_init);
+ mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
-static clib_error_t *
-vhost_user_exit (vlib_main_t * vm)
-{
- /* TODO cleanup */
return 0;
}
-VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
+VLIB_INIT_FUNCTION (vhost_user_init);
static u8 *
format_vhost_trace (u8 * s, va_list * va)
u32 n_trace = vlib_get_trace_count (vm, node);
u16 qsz_mask;
u32 map_hint = 0;
- u16 cpu_index = os_get_cpu_number ();
+ u16 thread_index = vlib_get_thread_index ();
u16 copy_len = 0;
{
* in the loop and come back later. This is not an issue as for big packet,
* processing cost really comes from the memory copy.
*/
- if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1))
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1))
{
- u32 curr_len = vum->cpus[cpu_index].rx_buffers_len;
- vum->cpus[cpu_index].rx_buffers_len +=
+ u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
+ vum->cpus[thread_index].rx_buffers_len +=
vlib_buffer_alloc_from_free_list (vm,
- vum->cpus[cpu_index].rx_buffers +
+ vum->cpus[thread_index].rx_buffers +
curr_len,
VHOST_USER_RX_BUFFERS_N - curr_len,
VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
if (PREDICT_FALSE
- (vum->cpus[cpu_index].rx_buffers_len <
+ (vum->cpus[thread_index].rx_buffers_len <
VHOST_USER_RX_BUFFER_STARVATION))
{
/* In case of buffer starvation, discard some packets from the queue
* and log the event.
* We keep doing best effort for the remaining packets. */
- u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ?
- n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1;
+ u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
+ n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
n_left -= flush;
vlib_increment_simple_counter (vnet_main.
interface_main.sw_if_counters +
VNET_INTERFACE_COUNTER_DROP,
- os_get_cpu_number (),
+ vlib_get_thread_index (),
vui->sw_if_index, flush);
vlib_error_count (vm, vhost_user_input_node.index,
u32 desc_data_offset;
vring_desc_t *desc_table = txvq->desc;
- if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1))
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
{
/* Not enough rx_buffers
* Note: We yeld on 1 so we don't need to do an additional
}
desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
- vum->cpus[cpu_index].rx_buffers_len--;
- bi_current = (vum->cpus[cpu_index].rx_buffers)
- [vum->cpus[cpu_index].rx_buffers_len];
+ vum->cpus[thread_index].rx_buffers_len--;
+ bi_current = (vum->cpus[thread_index].rx_buffers)
+ [vum->cpus[thread_index].rx_buffers_len];
b_head = b_current = vlib_get_buffer (vm, bi_current);
to_next[0] = bi_current; //We do that now so we can forget about bi_current
to_next++;
n_left_to_next--;
vlib_prefetch_buffer_with_index (vm,
- (vum->cpus[cpu_index].rx_buffers)
- [vum->cpus[cpu_index].
+ (vum->
+ cpus[thread_index].rx_buffers)
+ [vum->cpus[thread_index].
rx_buffers_len - 1], LOAD);
/* Just preset the used descriptor id and length for later */
(b_current->current_length == VLIB_BUFFER_DATA_SIZE))
{
if (PREDICT_FALSE
- (vum->cpus[cpu_index].rx_buffers_len == 0))
+ (vum->cpus[thread_index].rx_buffers_len == 0))
{
/* Cancel speculation */
to_next--;
* but valid.
*/
vhost_user_input_rewind_buffers (vm,
- &vum->cpus[cpu_index],
+ &vum->cpus
+ [thread_index],
b_head);
n_left = 0;
goto stop;
}
/* Get next output */
- vum->cpus[cpu_index].rx_buffers_len--;
+ vum->cpus[thread_index].rx_buffers_len--;
u32 bi_next =
- (vum->cpus[cpu_index].rx_buffers)[vum->cpus
- [cpu_index].rx_buffers_len];
+ (vum->cpus[thread_index].rx_buffers)[vum->cpus
+ [thread_index].rx_buffers_len];
b_current->next_buffer = bi_next;
b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
bi_current = bi_next;
}
/* Prepare a copy order executed later for the data */
- vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
copy_len++;
u32 desc_data_l =
desc_table[desc_current].len - desc_data_offset;
if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
{
if (PREDICT_FALSE
- (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+ (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
copy_len, &map_hint)))
{
clib_warning
/* Do the memory copies */
if (PREDICT_FALSE
- (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+ (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
copy_len, &map_hint)))
{
clib_warning ("Memory mapping error on interface hw_if_index=%d "
vlib_increment_combined_counter
(vnet_main.interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
- os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+ vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
- vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
return n_rx_packets;
}
{
vhost_user_main_t *vum = &vhost_user_main;
uword n_rx_packets = 0;
- u32 cpu_index = os_get_cpu_number ();
+ u32 thread_index = vlib_get_thread_index ();
vhost_iface_and_queue_t *vhiq;
vhost_user_intf_t *vui;
vhost_cpu_t *vhc;
- vhc = &vum->cpus[cpu_index];
+ vhc = &vum->cpus[thread_index];
if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
{
- vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+ vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
{
vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
vhost_user_vring_t *rxvq;
u16 qsz_mask;
u8 error;
- u32 cpu_index = os_get_cpu_number ();
+ u32 thread_index = vlib_get_thread_index ();
u32 map_hint = 0;
u8 retry = 8;
u16 copy_len;
qid =
VHOST_VRING_IDX_RX (*vec_elt_at_index
- (vui->per_cpu_tx_qid, os_get_cpu_number ()));
+ (vui->per_cpu_tx_qid, vlib_get_thread_index ()));
rxvq = &vui->vrings[qid];
if (PREDICT_FALSE (vui->use_tx_spinlock))
vhost_user_vring_lock (vui, qid);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- vum->cpus[cpu_index].current_trace =
+ vum->cpus[thread_index].current_trace =
vlib_add_trace (vm, node, b0,
- sizeof (*vum->cpus[cpu_index].current_trace));
- vhost_user_tx_trace (vum->cpus[cpu_index].current_trace,
+ sizeof (*vum->cpus[thread_index].current_trace));
+ vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
vui, qid / 2, b0, rxvq);
}
{
// Get a header from the header array
virtio_net_hdr_mrg_rxbuf_t *hdr =
- &vum->cpus[cpu_index].tx_headers[tx_headers_len];
+ &vum->cpus[thread_index].tx_headers[tx_headers_len];
tx_headers_len++;
hdr->hdr.flags = 0;
hdr->hdr.gso_type = 0;
hdr->num_buffers = 1; //This is local, no need to check
// Prepare a copy order executed later for the header
- vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
copy_len++;
cpy->len = vui->virtio_net_hdr_sz;
cpy->dst = buffer_map_addr;
else if (vui->virtio_net_hdr_sz == 12) //MRG is available
{
virtio_net_hdr_mrg_rxbuf_t *hdr =
- &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+ &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
//Move from available to used buffer
rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
}
{
- vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
copy_len++;
cpy->len = bytes_left;
cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- vum->cpus[cpu_index].current_trace->hdr =
- vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+ vum->cpus[thread_index].current_trace->hdr =
+ vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
}
n_left--; //At the end for error counting when 'goto done' is invoked
done:
//Do the memory copies
if (PREDICT_FALSE
- (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy,
+ (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
copy_len, &map_hint)))
{
clib_warning ("Memory mapping error on interface hw_if_index=%d "
vlib_increment_simple_counter
(vnet_main.interface_main.sw_if_counters
+ VNET_INTERFACE_COUNTER_DROP,
- os_get_cpu_number (), vui->sw_if_index, n_left);
+ vlib_get_thread_index (), vui->sw_if_index, n_left);
}
vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
f64 timeout = 3153600000.0 /* 100 years */ ;
uword *event_data = 0;
- sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
+ sockfd = -1;
sun.sun_family = AF_UNIX;
template.read_function = vhost_user_socket_read;
template.error_function = vhost_user_socket_error;
- if (sockfd < 0)
- return 0;
-
while (1)
{
vlib_process_wait_for_event_or_clock (vm, timeout);
if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
if (vui->unix_file_index == ~0)
{
+ if ((sockfd < 0) &&
+ ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
+ {
+ /*
+ * 1st time error or new error for this interface,
+ * spit out the message and record the error
+ */
+ if (!vui->sock_errno || (vui->sock_errno != errno))
+ {
+ clib_unix_warning
+ ("Error: Could not open unix socket for %s",
+ vui->sock_filename);
+ vui->sock_errno = errno;
+ }
+ continue;
+ }
+
/* try to connect */
strncpy (sun.sun_path, (char *) vui->sock_filename,
sizeof (sun.sun_path) - 1);
vui - vhost_user_main.vhost_user_interfaces;
vui->unix_file_index = unix_file_add (&unix_main, &template);
- //Re-open for next connect
- if ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) {
- clib_warning("Critical: Could not open unix socket");
- return 0;
- }
+ /* This sockfd is considered consumed */
+ sockfd = -1;
}
else
{
vhost_user_term_if (vhost_user_intf_t * vui)
{
int q;
+ vhost_user_main_t *vum = &vhost_user_main;
// Delete configured thread pinning
vec_reset_length (vui->workers);
vui->unix_server_index);
unix_file_del (&unix_main, uf);
vui->unix_server_index = ~0;
+ unlink (vui->sock_filename);
}
+
+ mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
+ &vui->if_index);
}
int
return rv;
}
+static clib_error_t *
+vhost_user_exit (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ vhost_user_delete_if (vnm, vm, vui->sw_if_index);
+ });
+ /* *INDENT-ON* */
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
+
/**
* Open server unix socket on specified sock_filename.
*/
vnet_sw_interface_t *sw;
sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
int q;
+ vhost_user_main_t *vum = &vhost_user_main;
if (server_sock_fd != -1)
{
unix_file_t template = { 0 };
template.read_function = vhost_user_socksvr_accept_ready;
template.file_descriptor = server_sock_fd;
- template.private_data = vui - vhost_user_main.vhost_user_interfaces; //hw index
+ template.private_data = vui - vum->vhost_user_interfaces; //hw index
vui->unix_server_index = unix_file_add (&unix_main, &template);
}
else
vui->unix_file_index = ~0;
vui->log_base_addr = 0;
vui->operation_mode = operation_mode;
+ vui->if_index = vui - vum->vhost_user_interfaces;
+ mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
+ &vui->if_index, 0);
for (q = 0; q < VHOST_VRING_MAX_N; q++)
vhost_user_vring_init (vui, q);
case ~0:
vec_foreach (vhc, vum->cpus)
{
- u32 cpu_index = vhc - vum->cpus;
+ u32 thread_index = vhc - vum->cpus;
f64 next_timeout;
next_timeout = timeout;
- vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+ vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
{
vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
vhost_user_vring_t *rxvq =
clib_warning ("BUG: unhandled event type %d", event_type);
break;
}
+ /* No less than 1 millisecond */
+ if (timeout < 1e-3)
+ timeout = 1e-3;
}
return 0;
}
int rv = 0;
int server_sock_fd = -1;
vhost_user_main_t *vum = &vhost_user_main;
+ uword *if_index;
if ((operation_mode != VHOST_USER_POLLING_MODE) &&
(operation_mode != VHOST_USER_INTERRUPT_MODE))
return VNET_API_ERROR_INVALID_ARGUMENT;
}
+ if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
+ if (if_index)
+ {
+ if (sw_if_index)
+ {
+ vui = &vum->vhost_user_interfaces[*if_index];
+ *sw_if_index = vui->sw_if_index;
+ }
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+ }
+
if (is_server)
{
if ((rv =
int server_sock_fd = -1;
int rv = 0;
vnet_hw_interface_t *hwif;
+ uword *if_index;
if ((operation_mode != VHOST_USER_POLLING_MODE) &&
(operation_mode != VHOST_USER_INTERRUPT_MODE))
hwif->dev_class_index != vhost_user_dev_class.index)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ if (sock_filename == NULL || !(strlen (sock_filename) > 0))
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+
vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+ /*
+ * Disallow changing the interface to have the same path name
+ * as other interface
+ */
+ if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
+ if (if_index && (*if_index != vui->if_index))
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+
// First try to open server socket
if (is_server)
if ((rv = vhost_user_init_server_sock (sock_filename,