[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
[VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
[VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
[VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
[VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
-#endif
};
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
static int dpdk_vhost_user_set_vring_enable(u32 hw_if_index,
u8 idx, int enable);
-#endif
/*
* DPDK vhost-user functions
*/
-static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
+static uword
+qva_to_vva(struct virtio_net *dev, uword qemu_va)
{
struct virtio_memory_regions *region;
- uint64_t vhost_va = 0;
+ uword vhost_va = 0;
uint32_t regionidx = 0;
/* Find the region where the address lives. */
dpdk_device_t *xd =
dpdk_vhost_user_device_from_hw_if_index(hw_if_index);
assert(xd);
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
-#else
- xd->vu_is_running = 0;
-#endif
}
static void disable_interface(dpdk_device_t * xd)
{
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
u8 idx;
int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
for (idx = 0; idx < numqs; idx++)
xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
-#endif
xd->vu_is_running = 0;
}
-static inline void * map_guest_mem(dpdk_device_t * xd, u64 addr)
+static inline void * map_guest_mem(dpdk_device_t * xd, uword addr)
{
dpdk_vu_intf_t * vui = xd->vu_intf;
struct virtio_memory * mem = xd->vu_vhost_dev.mem;
for (i=0; i<mem->nregions; i++) {
if ((mem->regions[i].guest_phys_address <= addr) &&
((mem->regions[i].guest_phys_address + mem->regions[i].memory_size) > addr)) {
- return (void *) (vui->region_addr[i] + addr - mem->regions[i].guest_phys_address);
+ return (void *) ((uword)vui->region_addr[i] + addr - (uword)mem->regions[i].guest_phys_address);
}
}
DBG_SOCK("failed to map guest mem addr %lx", addr);
int num_qpairs = 1;
dpdk_vu_intf_t *vui = NULL;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
num_qpairs = dm->use_rss < 1 ? 1 : tm->n_vlib_mains;
-#endif
dpdk_device_t * xd = NULL;
u8 addr[6];
xd->dev_type = VNET_DPDK_DEV_VHOST_USER;
xd->rx_q_used = num_qpairs;
xd->tx_q_used = num_qpairs;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
xd->vu_vhost_dev.virt_qp_nb = num_qpairs;
-#endif
vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, CLIB_CACHE_LINE_BYTES);
if (tm->n_vlib_mains == 1 && dpdk_input_node.state != VLIB_NODE_STATE_POLLING)
vlib_node_set_state (vm, dpdk_input_node.index, VLIB_NODE_STATE_POLLING);
- if (tm->n_vlib_mains > 1 && tm->main_thread_is_io_node)
- vlib_node_set_state (vm, dpdk_io_input_node.index, VLIB_NODE_STATE_POLLING);
-
- if (tm->n_vlib_mains > 1 && !tm->main_thread_is_io_node)
+ if (tm->n_vlib_mains > 1)
vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index,
VLIB_NODE_STATE_POLLING);
next_cpu++;
return 0;
}
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
+static long get_huge_page_size(int fd)
+{
+ struct statfs s;
+ fstatfs(fd, &s);
+ return s.f_bsize;
+}
+#endif
+
static clib_error_t *
dpdk_vhost_user_set_protocol_features(u32 hw_if_index, u64 prot_features)
{
xd->vu_vhost_dev.protocol_features = prot_features;
return 0;
}
-#endif
static clib_error_t *
dpdk_vhost_user_get_features(u32 hw_if_index, u64 * features)
{
*features = rte_vhost_feature_get();
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
+#define OFFLOAD_FEATURES ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_CSUM) | \
+ (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6))
+
+ /* These are not suppoted as bridging/tunneling VHOST
+ * interfaces with hardware interfaces/drivers that does
+ * not support offloading breaks L4 traffic.
+ */
+ *features &= (~OFFLOAD_FEATURES);
+#endif
+
DBG_SOCK("supported features: 0x%lx", *features);
return 0;
}
int numqs = VIRTIO_QNUM;
u8 idx;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
int prot_feature = features &
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
-#endif
for (idx = 0; idx < numqs; idx++) {
xd->vu_vhost_dev.virtqueue[idx]->vhost_hlen = hdr_len;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
/*
* Spec says, if F_PROTOCOL_FEATURE is not set by the
* slave, then all the vrings should start off as
*/
if (! prot_feature)
dpdk_vhost_user_set_vring_enable(hw_if_index, idx, 1);
-#endif
}
return 0;
mapped_address += vum->regions[i].mmap_offset;
vui->region_addr[i] = mapped_address;
vui->region_fd[i] = fd[i];
+ vui->region_offset[i] = vum->regions[i].mmap_offset;
mem->regions[i].address_offset = mapped_address - mem->regions[i].guest_phys_address;
+ DBG_SOCK("map memory region %d addr 0x%lx off 0x%lx len 0x%lx",
+ i, vui->region_addr[i], vui->region_offset[i], mapped_size);
+
if (vum->regions[i].guest_phys_addr == 0) {
mem->base_address = vum->regions[i].userspace_addr;
mem->mapped_address = mem->regions[i].address_offset;
}
static clib_error_t *
-dpdk_vhost_user_set_vring_addr(u32 hw_if_index, u8 idx, u64 desc, u64 used, u64 avail)
+dpdk_vhost_user_set_vring_addr(u32 hw_if_index, u8 idx, uword desc, \
+ uword used, uword avail, uword log)
{
dpdk_device_t * xd;
struct vhost_virtqueue *vq;
- DBG_SOCK("idx %u desc 0x%lx used 0x%lx avail 0x%lx",
- idx, desc, used, avail);
+ DBG_SOCK("idx %u desc 0x%lx used 0x%lx avail 0x%lx log 0x%lx",
+ idx, desc, used, avail, log);
if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) {
clib_warning("not a vhost-user interface");
vq->desc = (struct vring_desc *) qva_to_vva(&xd->vu_vhost_dev, desc);
vq->used = (struct vring_used *) qva_to_vva(&xd->vu_vhost_dev, used);
vq->avail = (struct vring_avail *) qva_to_vva(&xd->vu_vhost_dev, avail);
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
+ vq->log_guest_addr = log;
+#endif
if (!(vq->desc && vq->used && vq->avail)) {
clib_warning("falied to set vring addr");
}
+ if (vq->last_used_idx != vq->used->idx) {
+ clib_warning("last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+ "some packets maybe resent for Tx and dropped for Rx",
+ vq->last_used_idx, vq->used->idx);
+ vq->last_used_idx = vq->used->idx;
+ vq->last_used_idx_res = vq->used->idx;
+ }
+
+ /*
+ * Inform the guest that there is no need to inform (kick) the
+ * host when it adds buffers. kick results in vmexit and will
+ * incur performance degradation.
+ *
+ * The below function sets a flag in used table. Therefore,
+ * should be initialized after initializing vq->used.
+ */
+ rte_vhost_enable_guest_notification(&xd->vu_vhost_dev, idx, 0);
stop_processing_packets(hw_if_index, idx);
return 0;
* and stop ring upon receiving VHOST_USER_GET_VRING_BASE.
*/
DBG_SOCK("Stopping vring Q %u of device %d", idx, hw_if_index);
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
dpdk_vu_intf_t *vui = xd->vu_intf;
vui->vrings[idx].enabled = 0; /* Reset local copy */
vui->vrings[idx].callfd = -1; /* Reset FD */
vq->desc = NULL;
vq->used = NULL;
vq->avail = NULL;
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
+ vq->log_guest_addr = 0;
+#endif
/* Check if all Qs are disabled */
int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
DBG_SOCK("Device %d disabled", hw_if_index);
xd->vu_is_running = 0;
}
-#else
- vq->desc = NULL;
- vq->used = NULL;
- vq->avail = NULL;
- xd->vu_is_running = 0;
-#endif
return 0;
}
{
dpdk_main_t * dm = &dpdk_main;
dpdk_device_t * xd;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
dpdk_vu_vring *vring;
-#endif
struct vhost_virtqueue *vq0, *vq1, *vq;
int index, vu_is_running = 0;
vq = xd->vu_vhost_dev.virtqueue[idx];
vq->kickfd = fd;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
vring = &xd->vu_intf->vrings[idx];
vq->enabled = (vq->desc && vq->avail && vq->used && vring->enabled) ? 1 : 0;
-#endif
/*
* Set xd->vu_is_running if at least one pair of
* RX/TX queues are enabled.
*/
int numqs = VIRTIO_QNUM;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
-#endif
for (index = 0; index < numqs; index += 2) {
vq0 = xd->vu_vhost_dev.virtqueue[index]; /* RX */
vq1 = xd->vu_vhost_dev.virtqueue[index + 1]; /* TX */
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
if (vq0->enabled && vq1->enabled)
-#else
- if (vq0->desc && vq0->avail && vq0->used &&
- vq1->desc && vq1->avail && vq1->used)
-#endif
{
vu_is_running = 1;
break;
return 0;
}
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
static int
dpdk_vhost_user_set_vring_enable(u32 hw_if_index, u8 idx, int enable)
{
return 0;
}
-#endif
static clib_error_t * dpdk_vhost_user_callfd_read_ready (unix_file_t * uf)
{
if((vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
eventfd_write(vring->callfd, (eventfd_t)1);
vring->n_since_last_int = 0;
- vring->int_deadline = vlib_time_now(vm) + dm->vhost_coalesce_time;
+ vring->int_deadline = vlib_time_now(vm) + dm->conf->vhost_coalesce_time;
}
}
memset(vui, 0, sizeof(*vui));
vui->unix_fd = sockfd;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
vui->num_vrings = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
-#else
- vui->num_vrings = VIRTIO_QNUM;
-#endif
DBG_SOCK("dpdk_vhost_user_vui_init VRINGS: %d", vui->num_vrings);
vui->sock_is_server = is_server;
strncpy(vui->sock_filename, sock_filename, ARRAY_LEN(vui->sock_filename)-1);
xd->vlib_sw_if_index);
}
+static void dpdk_unmap_all_mem_regions(dpdk_device_t * xd)
+{
+ int i, r;
+ dpdk_vu_intf_t *vui = xd->vu_intf;
+ struct virtio_memory * mem = xd->vu_vhost_dev.mem;
+
+ for (i=0; i<mem->nregions; i++) {
+ if (vui->region_addr[i] != -1) {
+
+ long page_sz = get_huge_page_size(vui->region_fd[i]);
+
+ ssize_t map_sz = (mem->regions[i].memory_size +
+ vui->region_offset[i] + page_sz) & ~(page_sz - 1);
+
+ r = munmap((void *)(vui->region_addr[i] - vui->region_offset[i]), map_sz);
+
+ DBG_SOCK("unmap memory region %d addr 0x%lx off 0x%lx len 0x%lx page_sz 0x%x",
+ i, vui->region_addr[i], vui->region_offset[i], map_sz, page_sz);
+
+ vui->region_addr[i]= -1;
+
+ if (r == -1) {
+ clib_unix_warning("failed to unmap memory region");
+ }
+ close(vui->region_fd[i]);
+ }
+ }
+ mem->nregions = 0;
+}
+
static inline void
dpdk_vhost_user_if_disconnect(dpdk_device_t * xd)
{
dpdk_vu_intf_t *vui = xd->vu_intf;
vnet_main_t * vnm = vnet_get_main();
dpdk_main_t * dm = &dpdk_main;
+ struct vhost_virtqueue *vq;
+ int q;
xd->admin_up = 0;
vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0);
vui->unix_fd = -1;
vui->is_up = 0;
+ for (q = 0; q < vui->num_vrings; q++) {
+ vq = xd->vu_vhost_dev.virtqueue[q];
+ vui->vrings[q].enabled = 0; /* Reset local copy */
+ vui->vrings[q].callfd = -1; /* Reset FD */
+ vq->enabled = 0;
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
+ vq->log_guest_addr = 0;
+#endif
+ vq->desc = NULL;
+ vq->used = NULL;
+ vq->avail = NULL;
+ }
+ xd->vu_is_running = 0;
+
+ dpdk_unmap_all_mem_regions(xd);
DBG_SOCK("interface ifindex %d disconnected", xd->vlib_sw_if_index);
}
dpdk_vhost_user_set_vring_addr(xd->vlib_hw_if_index, msg.state.index,
msg.addr.desc_user_addr,
msg.addr.used_user_addr,
- msg.addr.avail_user_addr);
+ msg.addr.avail_user_addr,
+ msg.addr.log_guest_addr);
break;
case VHOST_USER_SET_OWNER:
break;
case VHOST_USER_SET_LOG_BASE:
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE",
xd->vlib_hw_if_index);
+
+ if (msg.size != sizeof(msg.log)) {
+ DBG_SOCK("invalid msg size for VHOST_USER_SET_LOG_BASE: %u instead of %lu",
+ msg.size, sizeof(msg.log));
+ goto close_socket;
+ }
+
+ if (!(xd->vu_vhost_dev.protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) {
+ DBG_SOCK("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
+ goto close_socket;
+ }
+
+ fd = fds[0];
+ /* align size to 2M page */
+ long page_sz = get_huge_page_size(fd);
+ ssize_t map_sz = (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
+
+ void *addr = mmap(0, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ DBG_SOCK("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped %p",
+ map_sz, msg.log.offset, fd, addr);
+
+ if (addr == MAP_FAILED) {
+ clib_warning("failed to map memory. errno is %d", errno);
+ goto close_socket;
+ }
+
+ xd->vu_vhost_dev.log_base += pointer_to_uword(addr) + msg.log.offset;
+ xd->vu_vhost_dev.log_size = msg.log.size;
+ msg.flags |= VHOST_USER_REPLY_MASK;
+ msg.size = sizeof(msg.u64);
+#else
+ DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE Not-Implemented",
+ xd->vlib_hw_if_index);
+#endif
break;
case VHOST_USER_SET_LOG_FD:
xd->vlib_hw_if_index);
break;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
case VHOST_USER_GET_PROTOCOL_FEATURES:
DBG_SOCK("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
xd->vlib_hw_if_index);
msg.u64 = xd->vu_vhost_dev.virt_qp_nb;
msg.size = sizeof(msg.u64);
break;
-#endif
default:
DBG_SOCK("unknown vhost-user message %d received. closing socket",
int rv = 0;
// using virtio vhost user?
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return vhost_user_create_if(vnm, vm, sock_filename, is_server,
sw_if_index, feature_mask, renumber, custom_dev_instance, hwaddr);
}
int rv = 0;
// using virtio vhost user?
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return vhost_user_modify_if(vnm, vm, sock_filename, is_server,
sw_if_index, feature_mask, renumber, custom_dev_instance);
}
int rv = 0;
// using virtio vhost user?
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return vhost_user_delete_if(vnm, vm, sw_if_index);
}
return -1;
// using virtio vhost user?
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return vhost_user_dump_ifs(vnm, vm, out_vuids);
}
u8 hwaddr[6];
u8 *hw = NULL;
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return vhost_user_connect_command_fn(vm, input, cmd);
}
renumber, custom_dev_instance, hw);
vec_free(sock_filename);
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
return 0;
}
unformat_input_t _line_input, * line_input = &_line_input;
u32 sw_if_index = ~0;
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return vhost_user_delete_command_fn(vm, input, cmd);
}
{ .str = NULL }
};
- if (dm->use_virtio_vhost) {
+ if (dm->conf->use_virtio_vhost) {
return show_vhost_user_command_fn(vm, input, cmd);
}
vlib_cli_output (vm, "DPDK vhost-user interfaces");
vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n",
- dm->vhost_coalesce_frames, dm->vhost_coalesce_time);
+ dm->conf->vhost_coalesce_frames, dm->conf->vhost_coalesce_time);
for (i = 0; i < vec_len (hw_if_indices); i++) {
hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
vlib_cli_output(vm, " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
vq->avail->flags, vq->avail->idx, vq->used->flags, vq->used->idx);
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
vlib_cli_output(vm, " kickfd %d callfd %d errfd %d enabled %d\n",
vq->kickfd, vq->callfd, vui->vrings[q].errfd, vq->enabled);
if (show_descr && vq->enabled) {
-#else
- vlib_cli_output(vm, " kickfd %d callfd %d errfd\n",
- vq->kickfd, vq->callfd, vui->vrings[q].errfd);
-
- if (show_descr) {
-#endif
vlib_cli_output(vm, "\n descriptor table:\n");
vlib_cli_output(vm, " id addr len flags next user_addr\n");
vlib_cli_output(vm, " ===== ================== ===== ====== ===== ==================\n");
vq->desc[j].len,
vq->desc[j].flags,
vq->desc[j].next,
- (u64) map_guest_mem(xd, vq->desc[j].addr));}
+ pointer_to_uword(map_guest_mem(xd, vq->desc[j].addr)));}
}
}
vlib_cli_output (vm, "\n");