}
-static inline void * map_guest_mem(vhost_user_intf_t * vui, u64 addr)
+static inline void * map_guest_mem(vhost_user_intf_t * vui, uword addr)
{
int i;
for (i=0; i<vui->nregions; i++) {
return 0;
}
-static inline void * map_user_mem(vhost_user_intf_t * vui, u64 addr)
+static inline void * map_user_mem(vhost_user_intf_t * vui, uword addr)
{
int i;
for (i=0; i<vui->nregions; i++) {
vui->vrings[q].desc = NULL;
vui->vrings[q].avail = NULL;
vui->vrings[q].used = NULL;
+ vui->vrings[q].log_guest_addr = 0;
+ vui->vrings[q].log_used = 0;
}
unmap_all_mem_regions(vui);
DBG_SOCK("interface ifindex %d disconnected", vui->sw_if_index);
}
+#define VHOST_LOG_PAGE 0x1000
+always_inline void vhost_user_log_dirty_pages(vhost_user_intf_t * vui,
+ u64 addr, u64 len)
+{
+ if (PREDICT_TRUE(vui->log_base_addr == 0
+ || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL)))) {
+ return;
+ }
+ if (PREDICT_FALSE((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size)) {
+ DBG_SOCK("vhost_user_log_dirty_pages(): out of range\n");
+ return;
+ }
+
+ CLIB_MEMORY_BARRIER();
+ u64 page = addr / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < addr + len) {
+ ((u8*)vui->log_base_addr)[page / 8] |= 1 << page % 8;
+ page++;
+ }
+}
+
+#define vhost_user_log_dirty_ring(vui, vq, member) \
+ if (PREDICT_FALSE(vq->log_used)) { \
+ vhost_user_log_dirty_pages(vui, vq->log_guest_addr + offsetof(vring_used_t, member), \
+ sizeof(vq->used->member)); \
+ }
+
static clib_error_t * vhost_user_socket_read (unix_file_t * uf)
{
int n, i;
(cmsg->cmsg_type == SCM_RIGHTS) &&
(cmsg->cmsg_len - CMSG_LEN(0) <= VHOST_MEMORY_MAX_NREGIONS * sizeof(int))) {
number_of_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
- memcpy(fds, CMSG_DATA(cmsg), number_of_fds * sizeof(int));
+ clib_memcpy(fds, CMSG_DATA(cmsg), number_of_fds * sizeof(int));
}
/* version 1, no reply bit set*/
msg.flags |= 4;
msg.u64 = (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
- (1 << FEAT_VIRTIO_F_ANY_LAYOUT);
+ (1 << FEAT_VIRTIO_F_ANY_LAYOUT) |
+ (1 << FEAT_VHOST_F_LOG_ALL) |
+ (1 << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
+ (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES);
msg.u64 &= vui->feature_mask;
msg.size = sizeof(msg.u64);
vui->hw_if_index, msg.u64);
vui->features = msg.u64;
+
if (vui->features & (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF))
vui->virtio_net_hdr_sz = 12;
else
vui->vrings[q].desc = 0;
vui->vrings[q].avail = 0;
vui->vrings[q].used = 0;
+ vui->vrings[q].log_guest_addr = 0;
+ vui->vrings[q].log_used = 0;
}
DBG_SOCK("interface %d disconnected", vui->sw_if_index);
}
unmap_all_mem_regions(vui);
for(i=0; i < msg.memory.nregions; i++) {
- memcpy(&(vui->regions[i]), &msg.memory.regions[i],
+ clib_memcpy(&(vui->regions[i]), &msg.memory.regions[i],
sizeof(vhost_user_memory_region_t));
long page_sz = get_huge_page_size(fds[i]);
goto close_socket;
}
+ vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
+ vui->vrings[msg.state.index].log_used =
+ (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
+
+ /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
+ the ring is initialized in an enabled state. */
+
+ if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES))) {
+ vui->vrings[msg.state.index].enabled = 1;
+ }
+
vui->vrings[msg.state.index].last_used_idx =
vui->vrings[msg.state.index].used->idx;
DBG_SOCK("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
vui->hw_if_index, msg.state.index, msg.state.num);
- msg.state.num = vui->vrings[msg.state.index].last_used_idx;
+ /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
+ vui->vrings[msg.state.index].enabled = 0;
+
+ msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
msg.flags |= 4;
msg.size = sizeof(msg.state);
break;
break;
case VHOST_USER_SET_LOG_BASE:
+ {
DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE",
vui->hw_if_index);
+ if (msg.size != sizeof(msg.log)) {
+ DBG_SOCK("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
+ msg.size, sizeof(msg.log));
+ goto close_socket;
+ }
+
+ if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) {
+ DBG_SOCK("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
+ goto close_socket;
+ }
+
+ fd = fds[0];
+ /* align size to 2M page */
+ long page_sz = get_huge_page_size(fd);
+ ssize_t map_sz = (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
+
+ vui->log_base_addr = mmap(0, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ DBG_SOCK("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
+ map_sz, msg.log.offset, fd, vui->log_base_addr);
+
+ if (vui->log_base_addr == MAP_FAILED) {
+ clib_warning("failed to map memory. errno is %d", errno);
+ goto close_socket;
+ }
+
+ vui->log_base_addr += msg.log.offset;
+ vui->log_size = msg.log.size;
+
+ msg.flags |= 4;
+ msg.size = sizeof(msg.u64);
+
break;
+ }
case VHOST_USER_SET_LOG_FD:
DBG_SOCK("if %d msg VHOST_USER_SET_LOG_FD",
break;
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ DBG_SOCK("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", vui->hw_if_index);
+
+ msg.flags |= 4;
+ msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+ msg.size = sizeof(msg.u64);
+ break;
+
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ DBG_SOCK("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx",
+ vui->hw_if_index, msg.u64);
+
+ vui->protocol_features = msg.u64;
+
+ break;
+
+ case VHOST_USER_SET_VRING_ENABLE:
+ DBG_SOCK("if %d VHOST_USER_SET_VRING_ENABLE, enable: %d",
+ vui->hw_if_index, msg.state.num);
+ vui->vrings[msg.state.index].enabled = msg.state.num;
+ break;
+
default:
DBG_SOCK("unknown vhost-user message %d received. closing socket",
msg.request);
t0->virtqueue = virtqueue;
t0->device_index = vui - vum->vhost_user_interfaces;
#if VHOST_USER_COPY_TX_HDR == 1
- rte_memcpy(&t0->hdr, b0->pre_data, sizeof(virtio_net_hdr_t));
+ clib_memcpy(&t0->hdr, b0->pre_data, sizeof(virtio_net_hdr_t));
#endif
b+=1;
vq->int_deadline = vlib_time_now(vm) + vum->coalesce_time;
}
+
static u32 vhost_user_if_input ( vlib_main_t * vm,
vhost_user_main_t * vum,
vhost_user_intf_t * vui,
vec_reset_length (vui->d_trace_buffers);
/* no descriptor ptr - bail out */
- if (PREDICT_FALSE(!txvq->desc || !txvq->avail))
+ if (PREDICT_FALSE(!txvq->desc || !txvq->avail || !txvq->enabled))
return 0;
/* do we have pending intterupts ? */
txvq->last_avail_idx = txvq->last_used_idx = txvq->avail->idx;
CLIB_MEMORY_BARRIER();
txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring(vui, txvq, idx);
vhost_user_send_call(vm, txvq);
return 0;
}
txvq->last_avail_idx++;
txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head;
txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
+ vhost_user_log_dirty_ring(vui, txvq, ring[txvq->last_used_idx & qsz_mask]);
txvq->last_used_idx++;
flush--;
}
#if VHOST_USER_COPY_TX_HDR == 1
if (PREDICT_TRUE(offset))
- rte_memcpy(b->pre_data, buffer_addr, sizeof(virtio_net_hdr_t)); /* 12 byte hdr is not used on tx */
+ clib_memcpy(b->pre_data, buffer_addr, sizeof(virtio_net_hdr_t)); /* 12 byte hdr is not used on tx */
#endif
if (txvq->desc[desc_current].len > offset) {
txvq->last_avail_idx++;
txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head;
txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
+ vhost_user_log_dirty_ring(vui, txvq, ring[txvq->last_used_idx & qsz_mask]);
txvq->last_used_idx++;
if(PREDICT_FALSE(b_head->current_length < 14 &&
error = VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME;
}
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b_head);
vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32)~0;
/* give buffers back to driver */
CLIB_MEMORY_BARRIER();
txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring(vui, txvq, idx);
if (PREDICT_FALSE (vec_len (vui->d_trace_buffers) > 0))
{
},
};
+VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input)
+
static uword
vhost_user_intfc_tx (vlib_main_t * vm,
vlib_node_runtime_t * node,
if (PREDICT_FALSE(!vui->is_up))
goto done2;
- if (PREDICT_FALSE(!rxvq->desc || !rxvq->avail || vui->sock_errno != 0)) {
+ if (PREDICT_FALSE(!rxvq->desc || !rxvq->avail || vui->sock_errno != 0 || !rxvq->enabled)) {
error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
goto done2;
}
error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
goto done;
}
- CLIB_PREFETCH(buffer_addr, clib_min(rxvq->desc[desc_current].len, 500), STORE);
+ CLIB_PREFETCH(buffer_addr, clib_min(rxvq->desc[desc_current].len,
+ 4*CLIB_CACHE_LINE_BYTES), STORE);
virtio_net_hdr_mrg_rxbuf_t * hdr = (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr;
hdr->hdr.flags = 0;
hdr->hdr.gso_type = 0;
+ vhost_user_log_dirty_pages(vui, rxvq->desc[desc_current].addr, vui->virtio_net_hdr_sz);
+
if (vui->virtio_net_hdr_sz == 12)
hdr->num_buffers = 1;
//Move from available to used buffer
rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head;
rxvq->used->ring[used_index & qsz_mask].len = desc_len;
+ vhost_user_log_dirty_ring(vui, rxvq, ring[used_index & qsz_mask]);
rxvq->last_avail_idx++;
used_index++;
hdr->num_buffers++;
}
u16 bytes_to_copy = bytes_left > (rxvq->desc[desc_current].len - offset) ? (rxvq->desc[desc_current].len - offset) : bytes_left;
- rte_memcpy(buffer_addr, vlib_buffer_get_current (current_b0) + current_b0->current_length - bytes_left, bytes_to_copy);
+ clib_memcpy(buffer_addr, vlib_buffer_get_current (current_b0) + current_b0->current_length - bytes_left, bytes_to_copy);
+ vhost_user_log_dirty_pages(vui, rxvq->desc[desc_current].addr + offset, bytes_to_copy);
bytes_left -= bytes_to_copy;
offset += bytes_to_copy;
buffer_addr += bytes_to_copy;
//Move from available to used ring
rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head;
rxvq->used->ring[used_index & qsz_mask].len = desc_len;
+ vhost_user_log_dirty_ring(vui, rxvq, ring[used_index & qsz_mask]);
+
rxvq->last_avail_idx++;
used_index++;
}
done:
CLIB_MEMORY_BARRIER();
rxvq->used->idx = used_index;
+ vhost_user_log_dirty_ring(vui, rxvq, idx);
/* interrupt (call) handling */
if((rxvq->callfd > 0) && !(rxvq->avail->flags & 1)) {
.no_flatten_output_chains = 1,
};
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class,
+ vhost_user_intfc_tx)
+
static uword
vhost_user_process (vlib_main_t * vm,
vlib_node_runtime_t * rt,
// init server socket on specified sock_filename
static int vhost_user_init_server_sock(const char * sock_filename, int *sockfd)
{
- int rv = 0, len;
- struct sockaddr_un un;
+ int rv = 0;
+ struct sockaddr_un un = {};
int fd;
/* create listening socket */
fd = socket(AF_UNIX, SOCK_STREAM, 0);
/* remove if exists */
unlink( (char *) sock_filename);
- len = strlen((char *) un.sun_path) + strlen((char *) sock_filename);
-
- if (bind(fd, (struct sockaddr *) &un, len) == -1) {
+ if (bind(fd, (struct sockaddr *) &un, sizeof(un)) == -1) {
rv = VNET_API_ERROR_SYSCALL_ERROR_2;
goto error;
}
/* create hw and sw interface */
if (hwaddress) {
- memcpy(hwaddr, hwaddress, 6);
+ clib_memcpy(hwaddr, hwaddress, 6);
} else {
f64 now = vlib_time_now(vm);
u32 rnd;
rnd = (u32) (now * 1e6);
rnd = random_u32 (&rnd);
- memcpy (hwaddr+2, &rnd, sizeof(rnd));
+ clib_memcpy (hwaddr+2, &rnd, sizeof(rnd));
hwaddr[0] = 2;
hwaddr[1] = 0xfe;
}
vnet_sw_interface_t * sw;
sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
vlib_thread_main_t * tm = vlib_get_thread_main();
+ int q;
vui->unix_fd = sockfd;
vui->sw_if_index = sw->sw_if_index;
vui->feature_mask = feature_mask;
vui->active = 1;
vui->unix_file_index = ~0;
+ vui->log_base_addr = 0;
+
+ for (q = 0; q < 2; q++) {
+ vui->vrings[q].enabled = 0;
+ }
vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
if (tm->n_vlib_mains == 1)
vlib_node_set_state (vm, vhost_user_input_node.index,
VLIB_NODE_STATE_POLLING);
- else if (!dm->have_io_threads)
+ else
vlib_node_set_state (vlib_mains[cpu_index], vhost_user_input_node.index,
VLIB_NODE_STATE_POLLING);
u8 *hwaddr)
{
vhost_user_intf_t * vui = NULL;
- dpdk_main_t * dm = &dpdk_main;
- vlib_thread_main_t * tm = vlib_get_thread_main();
u32 sw_if_idx = ~0;
int sockfd = -1;
int rv = 0;
- if (tm->n_vlib_mains > 1 && dm->have_io_threads)
- {
- clib_warning("vhost-user interfaces are not supported with multiple io threads");
- return -1;
- }
-
if (is_server) {
if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) {
return rv;
vnet_main_t *vnm = vnet_get_main();
- vhost_user_create_if(vnm, vm, (char *)sock_filename,
+ int rv;
+ if ((rv = vhost_user_create_if(vnm, vm, (char *)sock_filename,
is_server, &sw_if_index, feature_mask,
- renumber, custom_dev_instance, hw);
+ renumber, custom_dev_instance, hw))) {
+ vec_free(sock_filename);
+ return clib_error_return (0, "vhost_user_create_if returned %d", rv);
+ }
vec_free(sock_filename);
-
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
return 0;
}
vui->regions[j].memory_size,
vui->regions[j].userspace_addr,
vui->regions[j].mmap_offset,
- (u64) vui->region_mmap_addr[j]);
+ pointer_to_uword( vui->region_mmap_addr[j]) );
}
for (q = 0; q < vui->num_vrings; q++) {
vlib_cli_output(vm, "\n Virtqueue %d\n", q);
vui->vrings[q].desc[j].len,
vui->vrings[q].desc[j].flags,
vui->vrings[q].desc[j].next,
- (u64) map_guest_mem(vui, vui->vrings[q].desc[j].addr));}
+ pointer_to_uword(map_guest_mem(vui, vui->vrings[q].desc[j].addr)));}
}
}
vlib_cli_output (vm, "\n");