vhost: predict map_guest_mem returns similar results over time
[vpp.git] / vnet / vnet / devices / virtio / vhost-user.c
index 2ca5aa8..8ca8c17 100644 (file)
@@ -148,22 +148,71 @@ vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
   return 0;
 }
 
-
-static inline void *
-map_guest_mem (vhost_user_intf_t * vui, uword addr)
+static_always_inline void *
+map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
 {
-  int i;
+  int i = *hint;
+  if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
+                   ((vui->regions[i].guest_phys_addr +
+                     vui->regions[i].memory_size) > addr)))
+    {
+      return (void *) (vui->region_mmap_addr[i] + addr -
+                      vui->regions[i].guest_phys_addr);
+    }
+#if __SSE4_2__
+  __m128i rl, rh, al, ah, r;
+  al = _mm_set1_epi64x (addr + 1);
+  ah = _mm_set1_epi64x (addr);
+
+  rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
+  rl = _mm_cmpgt_epi64 (al, rl);
+  rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
+  rh = _mm_cmpgt_epi64 (rh, ah);
+  r = _mm_and_si128 (rl, rh);
+
+  rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
+  rl = _mm_cmpgt_epi64 (al, rl);
+  rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
+  rh = _mm_cmpgt_epi64 (rh, ah);
+  r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
+
+  rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
+  rl = _mm_cmpgt_epi64 (al, rl);
+  rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
+  rh = _mm_cmpgt_epi64 (rh, ah);
+  r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
+
+  rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
+  rl = _mm_cmpgt_epi64 (al, rl);
+  rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
+  rh = _mm_cmpgt_epi64 (rh, ah);
+  r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
+
+  r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
+  i = __builtin_ctzll (_mm_movemask_epi8 (r));
+
+  if (i < vui->nregions)
+    {
+      *hint = i;
+      return (void *) (vui->region_mmap_addr[i] + addr -
+                      vui->regions[i].guest_phys_addr);
+    }
+
+#else
   for (i = 0; i < vui->nregions; i++)
     {
       if ((vui->regions[i].guest_phys_addr <= addr) &&
          ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
           addr))
        {
+         *hint = i;
          return (void *) (vui->region_mmap_addr[i] + addr -
                           vui->regions[i].guest_phys_addr);
        }
     }
+#endif
   DBG_VQ ("failed to map guest mem addr %llx", addr);
+  *hint = 0;
   return 0;
 }
 
@@ -261,6 +310,18 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui)
   vui->is_up = 0;
   for (q = 0; q < vui->num_vrings; q++)
     {
+      if (vui->vrings[q].callfd > -1)
+       {
+         unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+                                              vui->vrings[q].callfd_idx);
+         unix_file_del (&unix_main, uf);
+       }
+
+      if (vui->vrings[q].kickfd > -1)
+       close (vui->vrings[q].kickfd);
+
+      vui->vrings[q].callfd = -1;
+      vui->vrings[q].kickfd = -1;
       vui->vrings[q].desc = NULL;
       vui->vrings[q].avail = NULL;
       vui->vrings[q].used = NULL;
@@ -463,6 +524,9 @@ vhost_user_socket_read (unix_file_t * uf)
 
          vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
                                           MAP_SHARED, fds[i], 0);
+         vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
+         vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
+           vui->regions[i].memory_size;
 
          DBG_SOCK
            ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
@@ -550,7 +614,7 @@ vhost_user_socket_read (unix_file_t * uf)
            goto close_socket;
 
          /* if there is old fd, delete it */
-         if (vui->vrings[q].callfd)
+         if (vui->vrings[q].callfd > -1)
            {
              unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
                                                   vui->vrings[q].callfd_idx);
@@ -576,6 +640,9 @@ vhost_user_socket_read (unix_file_t * uf)
          if (number_of_fds != 1)
            goto close_socket;
 
+         if (vui->vrings[q].kickfd > -1)
+           close (vui->vrings[q].kickfd);
+
          vui->vrings[q].kickfd = fds[0];
        }
       else
@@ -804,6 +871,8 @@ vhost_user_init (vlib_main_t * vm)
   clib_error_t *error;
   vhost_user_main_t *vum = &vhost_user_main;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vlib_thread_registration_t *tr;
+  uword *p;
 
   error = vlib_call_init_function (vm, ip4_init);
   if (error)
@@ -821,6 +890,18 @@ vhost_user_init (vlib_main_t * vm)
   vec_validate_aligned (vum->rx_buffers, tm->n_vlib_mains - 1,
                        CLIB_CACHE_LINE_BYTES);
 
+  /* find out which cpus will be used for input */
+  vum->input_cpu_first_index = 0;
+  vum->input_cpu_count = 1;
+  p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+  tr = p ? (vlib_thread_registration_t *) p[0] : 0;
+
+  if (tr && tr->count > 0)
+    {
+      vum->input_cpu_first_index = tr->first_index;
+      vum->input_cpu_count = tr->count;
+    }
+
   return 0;
 }
 
@@ -945,6 +1026,9 @@ vhost_user_if_input (vlib_main_t * vm,
   u16 qsz_mask;
   u32 cpu_index, rx_len, drops, flush;
   f64 now = vlib_time_now (vm);
+  u32 map_guest_hint_desc = 0;
+  u32 map_guest_hint_indirect = 0;
+  u32 *map_guest_hint_p = &map_guest_hint_desc;
 
   vec_reset_length (vui->d_trace_buffers);
 
@@ -1064,7 +1148,8 @@ vhost_user_if_input (vlib_main_t * vm,
              u32 next_desc =
                txvq->avail->ring[(txvq->last_avail_idx + 1) & qsz_mask];
              void *buffer_addr =
-               map_guest_mem (vui, txvq->desc[next_desc].addr);
+               map_guest_mem (vui, txvq->desc[next_desc].addr,
+                              &map_guest_hint_desc);
              if (PREDICT_TRUE (buffer_addr != 0))
                CLIB_PREFETCH (buffer_addr, 64, STORE);
 
@@ -1095,11 +1180,14 @@ vhost_user_if_input (vlib_main_t * vm,
 
          vring_desc_t *desc_table = txvq->desc;
          u32 desc_index = desc_current;
+         map_guest_hint_p = &map_guest_hint_desc;
 
          if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
            {
-             desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr);
+             desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
+                                         &map_guest_hint_desc);
              desc_index = 0;
+             map_guest_hint_p = &map_guest_hint_indirect;
              if (PREDICT_FALSE (desc_table == 0))
                {
                  error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
@@ -1110,7 +1198,8 @@ vhost_user_if_input (vlib_main_t * vm,
          while (1)
            {
              void *buffer_addr =
-               map_guest_mem (vui, desc_table[desc_index].addr);
+               map_guest_mem (vui, desc_table[desc_index].addr,
+                              map_guest_hint_p);
              if (PREDICT_FALSE (buffer_addr == 0))
                {
                  error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
@@ -1226,7 +1315,7 @@ vhost_user_if_input (vlib_main_t * vm,
     }
 
   /* interrupt (call) handling */
-  if ((txvq->callfd > 0) && !(txvq->avail->flags & 1))
+  if ((txvq->callfd > -1) && !(txvq->avail->flags & 1))
     {
       txvq->n_since_last_int += n_rx_packets;
 
@@ -1256,10 +1345,7 @@ vhost_user_input (vlib_main_t * vm,
                  vlib_node_runtime_t * node, vlib_frame_t * f)
 {
   vhost_user_main_t *vum = &vhost_user_main;
-#if DPDK > 0
-  dpdk_main_t *dm = &dpdk_main;
   u32 cpu_index = os_get_cpu_number ();
-#endif
   vhost_user_intf_t *vui;
   uword n_rx_packets = 0;
   int i;
@@ -1269,10 +1355,8 @@ vhost_user_input (vlib_main_t * vm,
       vui = vec_elt_at_index (vum->vhost_user_interfaces, i);
       if (vui->is_up)
        {
-#if DPDK > 0
-         if ((i % dm->input_cpu_count) ==
-             (cpu_index - dm->input_cpu_first_index))
-#endif
+         if ((i % vum->input_cpu_count) ==
+             (cpu_index - vum->input_cpu_first_index))
            n_rx_packets += vhost_user_if_input (vm, vum, vui, node);
        }
     }
@@ -1320,6 +1404,9 @@ vhost_user_intfc_tx (vlib_main_t * vm,
   u8 error = VHOST_USER_TX_FUNC_ERROR_NONE;
 
   n_left = n_packets = frame->n_vectors;
+  u32 map_guest_hint_desc = 0;
+  u32 map_guest_hint_indirect = 0;
+  u32 *map_guest_hint_p = &map_guest_hint_desc;
 
   if (PREDICT_FALSE (!vui->is_up))
     goto done2;
@@ -1371,6 +1458,7 @@ vhost_user_intfc_tx (vlib_main_t * vm,
        }
 
       desc_table = rxvq->desc;
+      map_guest_hint_p = &map_guest_hint_desc;
       desc_head = desc_index =
        rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
       if (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)
@@ -1383,18 +1471,22 @@ vhost_user_intfc_tx (vlib_main_t * vm,
            }
          if (PREDICT_FALSE
              (!(desc_table =
-                map_guest_mem (vui, rxvq->desc[desc_index].addr))))
+                map_guest_mem (vui, rxvq->desc[desc_index].addr,
+                               &map_guest_hint_desc))))
            {
              error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
              goto done;
            }
          desc_index = 0;
+         map_guest_hint_p = &map_guest_hint_indirect;
        }
 
       desc_len = vui->virtio_net_hdr_sz;
 
       if (PREDICT_FALSE
-         (!(buffer_addr = map_guest_mem (vui, desc_table[desc_index].addr))))
+         (!(buffer_addr =
+            map_guest_mem (vui, desc_table[desc_index].addr,
+                           map_guest_hint_p))))
        {
          error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
          goto done;
@@ -1442,7 +1534,8 @@ vhost_user_intfc_tx (vlib_main_t * vm,
                  desc_index = desc_table[desc_index].next;
                  if (PREDICT_FALSE
                      (!(buffer_addr =
-                        map_guest_mem (vui, desc_table[desc_index].addr))))
+                        map_guest_mem (vui, desc_table[desc_index].addr,
+                                       map_guest_hint_p))))
                    {
                      rxvq->last_used_idx -= hdr->num_buffers - 1;
                      rxvq->last_avail_idx -= hdr->num_buffers - 1;
@@ -1476,6 +1569,7 @@ vhost_user_intfc_tx (vlib_main_t * vm,
                    }
 
                  desc_table = rxvq->desc;
+                 map_guest_hint_p = &map_guest_hint_desc;
                  desc_head = desc_index =
                    rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
                  if (PREDICT_FALSE
@@ -1492,17 +1586,20 @@ vhost_user_intfc_tx (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          (!(desc_table =
                             map_guest_mem (vui,
-                                           rxvq->desc[desc_index].addr))))
+                                           rxvq->desc[desc_index].addr,
+                                           &map_guest_hint_desc))))
                        {
                          error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
                          goto done;
                        }
                      desc_index = 0;
+                     map_guest_hint_p = &map_guest_hint_indirect;
                    }
 
                  if (PREDICT_FALSE
                      (!(buffer_addr =
-                        map_guest_mem (vui, desc_table[desc_index].addr))))
+                        map_guest_mem (vui, desc_table[desc_index].addr,
+                                       map_guest_hint_p))))
                    {
                      error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
                      goto done;
@@ -1563,7 +1660,7 @@ done:
   vhost_user_log_dirty_ring (vui, rxvq, idx);
 
   /* interrupt (call) handling */
-  if ((rxvq->callfd > 0) && !(rxvq->avail->flags & 1))
+  if ((rxvq->callfd > -1) && !(rxvq->avail->flags & 1))
     {
       rxvq->n_since_last_int += n_packets - n_left;
 
@@ -1887,6 +1984,8 @@ vhost_user_vui_init (vnet_main_t * vnm,
   for (q = 0; q < 2; q++)
     {
       vui->vrings[q].enabled = 0;
+      vui->vrings[q].callfd = -1;
+      vui->vrings[q].kickfd = -1;
     }
 
   vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
@@ -1907,11 +2006,8 @@ static void
 vhost_user_vui_register (vlib_main_t * vm, vhost_user_intf_t * vui)
 {
   vhost_user_main_t *vum = &vhost_user_main;
-#if DPDK > 0
-  dpdk_main_t *dm = &dpdk_main;
   int cpu_index;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-#endif
 
   hash_set (vum->vhost_user_interface_index_by_listener_fd, vui->unix_fd,
            vui - vum->vhost_user_interfaces);
@@ -1919,19 +2015,15 @@ vhost_user_vui_register (vlib_main_t * vm, vhost_user_intf_t * vui)
            vui - vum->vhost_user_interfaces);
 
   /* start polling */
-#if DPDK > 0
-  cpu_index = dm->input_cpu_first_index +
-    (vui - vum->vhost_user_interfaces) % dm->input_cpu_count;
+  cpu_index = vum->input_cpu_first_index +
+    (vui - vum->vhost_user_interfaces) % vum->input_cpu_count;
 
   if (tm->n_vlib_mains == 1)
-#endif
     vlib_node_set_state (vm, vhost_user_input_node.index,
                         VLIB_NODE_STATE_POLLING);
-#if DPDK > 0
   else
     vlib_node_set_state (vlib_mains[cpu_index], vhost_user_input_node.index,
                         VLIB_NODE_STATE_POLLING);
-#endif
 
   /* tell process to start polling for sockets */
   vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
@@ -2301,6 +2393,7 @@ show_vhost_user_command_fn (vlib_main_t * vm,
                               "  ===== ================== ===== ====== ===== ==================\n");
              for (j = 0; j < vui->vrings[q].qsz; j++)
                {
+                 u32 mem_hint = 0;
                  vlib_cli_output (vm,
                                   "  %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
                                   j, vui->vrings[q].desc[j].addr,
@@ -2310,7 +2403,7 @@ show_vhost_user_command_fn (vlib_main_t * vm,
                                   pointer_to_uword (map_guest_mem
                                                     (vui,
                                                      vui->vrings[q].desc[j].
-                                                     addr)));
+                                                     addr, &mem_hint)));
                }
            }
        }