vhost: support interrupt mode 64/5764/6
authorSteven <sluong@cisco.com>
Thu, 16 Mar 2017 04:18:55 +0000 (21:18 -0700)
committerDamjan Marion <dmarion.lists@gmail.com>
Wed, 22 Mar 2017 08:54:33 +0000 (08:54 +0000)
vhost currently supports only polling mode. This patch is to add
interrupt mode. When the interface is configured for interrupt
mode, our input node does not get called unless there is a packet
in the vring.

If a particular CPU has one interface configured for polling mode
and another in interrupt, the input node is set to polling for
that CPU.

This diffs also includes two crashes in vlib's dispatch_node. One is
included in https://gerrit.fd.io/r/#/c/5516. The other crash is in
the ASSERT. The ASSERT can become true when the caller of
dispatch_node is in a loop. The first call converted the node
to polling. The second call thereafter will hit the ASSERT.

Change-Id: If17b6d48b20d7d8605c6a161459828637173cd32
Signed-off-by: Steven <sluong@cisco.com>
src/vat/api_format.c
src/vlib/main.c
src/vnet/devices/virtio/vhost-user.c
src/vnet/devices/virtio/vhost-user.h

index 37b7f93..3b57ac6 100644 (file)
@@ -403,6 +403,46 @@ api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
 }
 #endif /* VPP_API_TEST_BUILTIN */
 
+#define VHOST_USER_POLLING_MODE   0
+#define VHOST_USER_INTERRUPT_MODE 1
+#define VHOST_USER_ADAPTIVE_MODE  2
+
+static u8 *
+api_format_vhost_user_operation_mode (u8 * s, va_list * va)
+{
+  int operation_mode = va_arg (*va, int);
+
+  switch (operation_mode)
+    {
+    case VHOST_USER_POLLING_MODE:
+      s = format (s, "%-9s", "polling");
+      break;
+    case VHOST_USER_INTERRUPT_MODE:
+      s = format (s, "%-9s", "interrupt");
+      break;
+    default:
+      s = format (s, "%-9s", "invalid");
+    }
+  return s;
+}
+
+static uword
+api_unformat_vhost_user_operation_mode (unformat_input_t * input,
+                                       va_list * args)
+{
+  u8 *operation_mode = va_arg (*args, u8 *);
+  uword rc = 1;
+
+  if (unformat (input, "interrupt"))
+    *operation_mode = VHOST_USER_INTERRUPT_MODE;
+  else if (unformat (input, "polling"))
+    *operation_mode = VHOST_USER_POLLING_MODE;
+  else
+    rc = 0;
+
+  return rc;
+}
+
 static uword
 unformat_policer_rate_type (unformat_input_t * input, va_list * args)
 {
@@ -11174,6 +11214,7 @@ api_create_vhost_user_if (vat_main_t * vam)
   u8 use_custom_mac = 0;
   u8 *tag = 0;
   int ret;
+  u8 operation_mode = VHOST_USER_POLLING_MODE;
 
   /* Shut up coverity */
   memset (hwaddr, 0, sizeof (hwaddr));
@@ -11192,6 +11233,10 @@ api_create_vhost_user_if (vat_main_t * vam)
        is_server = 1;
       else if (unformat (i, "tag %s", &tag))
        ;
+      else if (unformat (i, "mode %U",
+                        api_unformat_vhost_user_operation_mode,
+                        &operation_mode))
+       ;
       else
        break;
     }
@@ -11211,6 +11256,7 @@ api_create_vhost_user_if (vat_main_t * vam)
 
   M (CREATE_VHOST_USER_IF, mp);
 
+  mp->operation_mode = operation_mode;
   mp->is_server = is_server;
   clib_memcpy (mp->sock_filename, file_name, vec_len (file_name));
   vec_free (file_name);
@@ -11242,6 +11288,7 @@ api_modify_vhost_user_if (vat_main_t * vam)
   u8 sw_if_index_set = 0;
   u32 sw_if_index = (u32) ~ 0;
   int ret;
+  u8 operation_mode = VHOST_USER_POLLING_MODE;
 
   while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
     {
@@ -11257,6 +11304,10 @@ api_modify_vhost_user_if (vat_main_t * vam)
        ;
       else if (unformat (i, "server"))
        is_server = 1;
+      else if (unformat (i, "mode %U",
+                        api_unformat_vhost_user_operation_mode,
+                        &operation_mode))
+       ;
       else
        break;
     }
@@ -11282,6 +11333,7 @@ api_modify_vhost_user_if (vat_main_t * vam)
 
   M (MODIFY_VHOST_USER_IF, mp);
 
+  mp->operation_mode = operation_mode;
   mp->sw_if_index = ntohl (sw_if_index);
   mp->is_server = is_server;
   clib_memcpy (mp->sock_filename, file_name, vec_len (file_name));
@@ -11337,11 +11389,12 @@ static void vl_api_sw_interface_vhost_user_details_t_handler
 {
   vat_main_t *vam = &vat_main;
 
-  print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %s",
+  print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %U %s",
         (char *) mp->interface_name,
         ntohl (mp->sw_if_index), ntohl (mp->virtio_net_hdr_sz),
         clib_net_to_host_u64 (mp->features), mp->is_server,
-        ntohl (mp->num_regions), (char *) mp->sock_filename);
+        ntohl (mp->num_regions), api_format_vhost_user_operation_mode,
+        mp->operation_mode, (char *) mp->sock_filename);
   print (vam->ofp, "    Status: '%s'", strerror (ntohl (mp->sock_errno)));
 }
 
@@ -11370,6 +11423,7 @@ static void vl_api_sw_interface_vhost_user_details_t_handler_json
   vat_json_object_add_string_copy (node, "sock_filename", mp->sock_filename);
   vat_json_object_add_uint (node, "num_regions", ntohl (mp->num_regions));
   vat_json_object_add_uint (node, "sock_errno", ntohl (mp->sock_errno));
+  vat_json_object_add_uint (node, "mode", mp->operation_mode);
 }
 
 static int
@@ -11379,7 +11433,8 @@ api_sw_interface_vhost_user_dump (vat_main_t * vam)
   vl_api_control_ping_t *mp_ping;
   int ret;
   print (vam->ofp,
-        "Interface name           idx hdr_sz features server regions filename");
+        "Interface name            idx hdr_sz features server regions mode"
+        "      filename");
 
   /* Get list of vhost-user interfaces */
   M (SW_INTERFACE_VHOST_USER_DUMP, mp);
@@ -18492,10 +18547,12 @@ _(l2_interface_vlan_tag_rewrite,                                        \
   "[translate-2-[1|2]] [push_dot1q 0] tag1 <nn> tag2 <nn>")             \
 _(create_vhost_user_if,                                                 \
         "socket <filename> [server] [renumber <dev_instance>] "         \
-        "[mac <mac_address>]")                                          \
+        "[mac <mac_address>] "                                          \
+        "[mode <interrupt | polling>]")                                 \
 _(modify_vhost_user_if,                                                 \
         "<intfc> | sw_if_index <nn> socket <filename>\n"                \
-        "[server] [renumber <dev_instance>]")                           \
+        "[server] [renumber <dev_instance>] "                           \
+        "[mode <interrupt | polling>]")                                 \
 _(delete_vhost_user_if, "<intfc> | sw_if_index <nn>")                   \
 _(sw_interface_vhost_user_dump, "")                                     \
 _(show_version, "")                                                     \
index 605771c..50f0b16 100644 (file)
@@ -1032,15 +1032,14 @@ dispatch_node (vlib_main_t * vm,
          vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index;
 #endif
 
-         if (dispatch_state == VLIB_NODE_STATE_INTERRUPT
-             && v >= nm->polling_threshold_vector_length)
+         if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
+              && v >= nm->polling_threshold_vector_length) &&
+             !(node->flags &
+               VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
            {
              vlib_node_t *n = vlib_get_node (vm, node->node_index);
              n->state = VLIB_NODE_STATE_POLLING;
              node->state = VLIB_NODE_STATE_POLLING;
-             ASSERT (!
-                     (node->flags &
-                      VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE));
              node->flags &=
                ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
              node->flags |=
@@ -1445,6 +1444,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 
   /* Pre-allocate expired nodes. */
   vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+  if (!nm->polling_threshold_vector_length)
+    nm->polling_threshold_vector_length = 10;
+  if (!nm->interrupt_threshold_vector_length)
+    nm->interrupt_threshold_vector_length = 5;
 
   if (is_main)
     {
index 3cbeca9..5a5beb1 100644 (file)
@@ -368,6 +368,8 @@ vhost_user_rx_thread_placement ()
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
   u32 *workers = 0;
+  u32 cpu_index;
+  vlib_main_t *vm;
 
   //Let's list all workers cpu indexes
   u32 i;
@@ -398,19 +400,59 @@ vhost_user_rx_thread_placement ()
            continue;
 
          i %= vec_len (vui_workers);
-         u32 cpu_index = vui_workers[i];
+         cpu_index = vui_workers[i];
          i++;
          vhc = &vum->cpus[cpu_index];
 
          iaq.qid = qid;
          iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
          vec_add1 (vhc->rx_queues, iaq);
-         vlib_node_set_state (vlib_mains[cpu_index],
-                               vhost_user_input_node.index,
-                               VLIB_NODE_STATE_POLLING);
        }
   });
   /* *INDENT-ON* */
+
+  vec_foreach (vhc, vum->cpus)
+  {
+    vhost_iface_and_queue_t *vhiq;
+    u8 mode = VHOST_USER_INTERRUPT_MODE;
+
+    vec_foreach (vhiq, vhc->rx_queues)
+    {
+      vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
+      if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+       {
+         /* At least one interface is polling, cpu is set to polling */
+         mode = VHOST_USER_POLLING_MODE;
+         break;
+       }
+    }
+    vhc->operation_mode = mode;
+  }
+
+  for (cpu_index = vum->input_cpu_first_index;
+       cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
+       cpu_index++)
+    {
+      vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
+
+      vhc = &vum->cpus[cpu_index];
+      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+      switch (vhc->operation_mode)
+       {
+       case VHOST_USER_INTERRUPT_MODE:
+         state = VLIB_NODE_STATE_INTERRUPT;
+         break;
+       case VHOST_USER_POLLING_MODE:
+         state = VLIB_NODE_STATE_POLLING;
+         break;
+       default:
+         clib_warning ("BUG: bad operation mode %d", vhc->operation_mode);
+         break;
+       }
+      vlib_node_set_state (vm, vhost_user_input_node.index, state);
+    }
+
+  vec_free (workers);
 }
 
 static int
@@ -485,12 +527,68 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui)
   vhost_user_tx_thread_placement (vui);
 }
 
+static void
+vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
+{
+  vhost_user_main_t *vum = &vhost_user_main;
+  vhost_cpu_t *vhc;
+  u32 cpu_index;
+  vhost_iface_and_queue_t *vhiq;
+  vlib_main_t *vm;
+  u32 ifq2;
+  u8 done = 0;
+
+  if (vhost_user_intf_ready (vui))
+    {
+      vec_foreach (vhc, vum->cpus)
+      {
+       if (vhc->operation_mode == VHOST_USER_POLLING_MODE)
+         continue;
+
+       vec_foreach (vhiq, vhc->rx_queues)
+       {
+         /*
+          * Match the interface and the virtqueue number
+          */
+         if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
+             (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
+           {
+             cpu_index = vhc - vum->cpus;
+             vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+             /*
+              * Convert RX virtqueue number in the lower byte to vring
+              * queue index for the input node process. Top bytes contain
+              * the interface, lower byte contains the queue index.
+              */
+             ifq2 = ((ifq >> 8) << 8) | vhiq->qid;
+             vhc->pending_input_bitmap =
+               clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
+             vlib_node_set_interrupt_pending (vm,
+                                              vhost_user_input_node.index);
+             done = 1;
+             break;
+           }
+       }
+       if (done)
+         break;
+      }
+    }
+}
+
 static clib_error_t *
 vhost_user_callfd_read_ready (unix_file_t * uf)
 {
   __attribute__ ((unused)) int n;
   u8 buff[8];
+  vhost_user_intf_t *vui =
+    pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
+                      uf->private_data >> 8);
+
   n = read (uf->file_descriptor, ((char *) &buff), 8);
+  DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8,
+           uf->private_data & 0xff);
+  vhost_user_set_interrupt_pending (vui, uf->private_data);
+
   return 0;
 }
 
@@ -503,13 +601,20 @@ vhost_user_kickfd_read_ready (unix_file_t * uf)
     pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
                       uf->private_data >> 8);
   u32 qid = uf->private_data & 0xff;
+
   n = read (uf->file_descriptor, ((char *) &buff), 8);
   DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
 
   vlib_worker_thread_barrier_sync (vlib_get_main ());
-  vui->vrings[qid].started = 1;
-  vhost_user_update_iface_state (vui);
+  if (!vui->vrings[qid].started ||
+      (vhost_user_intf_ready (vui) != vui->is_up))
+    {
+      vui->vrings[qid].started = 1;
+      vhost_user_update_iface_state (vui);
+    }
   vlib_worker_thread_barrier_release (vlib_get_main ());
+
+  vhost_user_set_interrupt_pending (vui, uf->private_data);
   return 0;
 }
 
@@ -907,8 +1012,12 @@ vhost_user_socket_read (unix_file_t * uf)
        vui->vrings[msg.state.index].last_avail_idx =
        vui->vrings[msg.state.index].used->idx;
 
-      /* tell driver that we don't want interrupts */
-      vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+      if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+       /* tell driver that we don't want interrupts */
+       vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+      else
+       /* tell driver that we want interrupts */
+       vui->vrings[msg.state.index].used->flags = 0;
       break;
 
     case VHOST_USER_SET_OWNER:
@@ -1811,7 +1920,8 @@ vhost_user_if_input (vlib_main_t * vm,
   vhost_user_log_dirty_ring (vui, txvq, idx);
 
   /* interrupt (call) handling */
-  if ((txvq->callfd_idx != ~0) && !(txvq->avail->flags & 1))
+  if ((txvq->callfd_idx != ~0) &&
+      !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
     {
       txvq->n_since_last_int += n_rx_packets;
 
@@ -1837,16 +1947,33 @@ vhost_user_input (vlib_main_t * vm,
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
   u32 cpu_index = os_get_cpu_number ();
+  vhost_iface_and_queue_t *vhiq;
+  vhost_user_intf_t *vui;
+  vhost_cpu_t *vhc;
 
+  vhc = &vum->cpus[cpu_index];
+  if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
+    {
+      vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+      {
+       vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
+       n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
+      }
+    }
+  else
+    {
+      int i;
 
-  vhost_iface_and_queue_t *vhiq;
-  vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
-  {
-    vhost_user_intf_t *vui =
-      &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
-    n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
-  }
+      /* *INDENT-OFF* */
+      clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({
+       int qid = i & 0xff;
 
+       clib_bitmap_set (vhc->pending_input_bitmap, i, 0);
+       vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8);
+       n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node);
+      }));
+      /* *INDENT-ON* */
+    }
   return n_rx_packets;
 }
 
@@ -2241,7 +2368,8 @@ done:
     }
 
   /* interrupt (call) handling */
-  if ((rxvq->callfd_idx != ~0) && !(rxvq->avail->flags & 1))
+  if ((rxvq->callfd_idx != ~0) &&
+      !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
     {
       rxvq->n_since_last_int += frame->n_vectors - n_left;
 
@@ -2595,6 +2723,95 @@ vhost_user_vui_init (vnet_main_t * vnm,
   vhost_user_tx_thread_placement (vui);
 }
 
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+                                  vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  vhost_user_intf_t *vui;
+  f64 timeout = 3153600000.0 /* 100 years */ ;
+  uword event_type, *event_data = 0;
+  vhost_user_main_t *vum = &vhost_user_main;
+  vhost_iface_and_queue_t *vhiq;
+  vhost_cpu_t *vhc;
+  f64 now, poll_time_remaining;
+
+  while (1)
+    {
+      poll_time_remaining =
+       vlib_process_wait_for_event_or_clock (vm, timeout);
+      event_type = vlib_process_get_events (vm, &event_data);
+      vec_reset_length (event_data);
+
+      /*
+       * Use the remaining timeout if it is less than coalesce time to avoid
+       * resetting the existing timer in the middle of expiration
+       */
+      timeout = poll_time_remaining;
+      if (vlib_process_suspend_time_is_zero (timeout) ||
+         (timeout > vum->coalesce_time))
+       timeout = vum->coalesce_time;
+
+      now = vlib_time_now (vm);
+      switch (event_type)
+       {
+       case VHOST_USER_EVENT_START_TIMER:
+         if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+           break;
+         /* fall through */
+
+       case ~0:
+         vec_foreach (vhc, vum->cpus)
+         {
+           u32 cpu_index = vhc - vum->cpus;
+           f64 next_timeout;
+
+           next_timeout = timeout;
+           vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+           {
+             vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
+             vhost_user_vring_t *rxvq =
+               &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)];
+             vhost_user_vring_t *txvq =
+               &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)];
+
+             if (txvq->n_since_last_int)
+               {
+                 if (now >= txvq->int_deadline)
+                   vhost_user_send_call (vm, txvq);
+                 else
+                   next_timeout = txvq->int_deadline - now;
+               }
+
+             if (rxvq->n_since_last_int)
+               {
+                 if (now >= rxvq->int_deadline)
+                   vhost_user_send_call (vm, rxvq);
+                 else
+                   next_timeout = rxvq->int_deadline - now;
+               }
+
+             if ((next_timeout < timeout) && (next_timeout > 0.0))
+               timeout = next_timeout;
+           }
+         }
+         break;
+
+       default:
+         clib_warning ("BUG: unhandled event type %d", event_type);
+         break;
+       }
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
+    .function = vhost_user_send_interrupt_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "vhost-user-send-interrupt-process",
+};
+/* *INDENT-ON* */
+
 int
 vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
                      const char *sock_filename,
@@ -2608,8 +2825,10 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
   u32 sw_if_idx = ~0;
   int rv = 0;
   int server_sock_fd = -1;
+  vhost_user_main_t *vum = &vhost_user_main;
 
-  if (operation_mode != VHOST_USER_POLLING_MODE)
+  if ((operation_mode != VHOST_USER_POLLING_MODE) &&
+      (operation_mode != VHOST_USER_INTERRUPT_MODE))
     return VNET_API_ERROR_UNIMPLEMENTED;
 
   if (sock_filename == NULL || !(strlen (sock_filename) > 0))
@@ -2640,6 +2859,15 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
 
   // Process node must connect
   vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+  if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
+      !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
+      (vum->coalesce_frames > 0))
+    {
+      vum->interrupt_mode = 1;
+      vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
+                                VHOST_USER_EVENT_START_TIMER, 0);
+    }
   return rv;
 }
 
@@ -2658,7 +2886,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
   int rv = 0;
   vnet_hw_interface_t *hwif;
 
-  if (operation_mode != VHOST_USER_POLLING_MODE)
+  if ((operation_mode != VHOST_USER_POLLING_MODE) &&
+      (operation_mode != VHOST_USER_INTERRUPT_MODE))
     return VNET_API_ERROR_UNIMPLEMENTED;
   if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
       hwif->dev_class_index != vhost_user_dev_class.index)
@@ -2682,9 +2911,34 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
 
   // Process node must connect
   vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+  if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
+      !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
+      (vum->coalesce_frames > 0))
+    {
+      vum->interrupt_mode = 1;
+      vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
+                                VHOST_USER_EVENT_START_TIMER, 0);
+    }
   return rv;
 }
 
+static uword
+unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args)
+{
+  u8 *operation_mode = va_arg (*args, u8 *);
+  uword rc = 1;
+
+  if (unformat (input, "interrupt"))
+    *operation_mode = VHOST_USER_INTERRUPT_MODE;
+  else if (unformat (input, "polling"))
+    *operation_mode = VHOST_USER_POLLING_MODE;
+  else
+    rc = 0;
+
+  return rc;
+}
+
 clib_error_t *
 vhost_user_connect_command_fn (vlib_main_t * vm,
                               unformat_input_t * input,
@@ -2722,6 +2976,9 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
        {
          renumber = 1;
        }
+      else if (unformat (line_input, "mode %U",
+                        unformat_vhost_user_operation_mode, &operation_mode))
+       ;
       else
        {
          error = clib_error_return (0, "unknown input `%U'",
@@ -2851,6 +3108,25 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
   return rv;
 }
 
+static u8 *
+format_vhost_user_operation_mode (u8 * s, va_list * va)
+{
+  int operation_mode = va_arg (*va, int);
+
+  switch (operation_mode)
+    {
+    case VHOST_USER_POLLING_MODE:
+      s = format (s, "%s", "polling");
+      break;
+    case VHOST_USER_INTERRUPT_MODE:
+      s = format (s, "%s", "interrupt");
+      break;
+    default:
+      s = format (s, "%s", "invalid");
+    }
+  return s;
+}
+
 clib_error_t *
 show_vhost_user_command_fn (vlib_main_t * vm,
                            unformat_input_t * input,
@@ -2959,14 +3235,22 @@ show_vhost_user_command_fn (vlib_main_t * vm,
                       (vui->unix_server_index != ~0) ? "server" : "client",
                       strerror (vui->sock_errno));
 
+      vlib_cli_output (vm, " configured mode: %U\n",
+                      format_vhost_user_operation_mode, vui->operation_mode);
       vlib_cli_output (vm, " rx placement: ");
       vec_foreach (vhc, vum->cpus)
       {
        vec_foreach (vhiq, vhc->rx_queues)
        {
          if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces)
-           vlib_cli_output (vm, "   thread %d on vring %d\n",
-                            vhc - vum->cpus, VHOST_VRING_IDX_TX (vhiq->qid));
+           {
+             vlib_cli_output (vm, "   thread %d on vring %d\n",
+                              vhc - vum->cpus,
+                              VHOST_VRING_IDX_TX (vhiq->qid));
+             vlib_cli_output (vm, "   mode: %U\n",
+                              format_vhost_user_operation_mode,
+                              vhc->operation_mode);
+           }
        }
       }
 
@@ -3096,6 +3380,9 @@ done:
  * in the name to be specified. If instance already exists, name will be used
  * anyway and multiple instances will have the same name. Use with caution.
  *
+ * - <b>mode [interrupt | polling]</b> - Optional parameter specifying
+ * the input thread polling policy.
+ *
  * @cliexpar
  * Example of how to create a vhost interface with VPP as the client and all features enabled:
  * @cliexstart{create vhost-user socket /tmp/vhost1.sock}
@@ -3112,7 +3399,9 @@ done:
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
     .path = "create vhost-user",
-    .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>]",
+    .short_help = "create vhost-user socket <socket-filename> [server] "
+    "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] "
+    "[mode {interrupt | polling}]",
     .function = vhost_user_connect_command_fn,
 };
 /* *INDENT-ON* */
index 6b928f0..67f18b8 100644 (file)
@@ -216,6 +216,8 @@ typedef struct
 #define VHOST_USER_INTERRUPT_MODE 1
 #define VHOST_USER_ADAPTIVE_MODE  2
 
+#define VHOST_USER_EVENT_START_TIMER 1
+
 typedef struct
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -298,6 +300,12 @@ typedef struct
   /* This is here so it doesn't end-up
    * using stack or registers. */
   vhost_trace_t *current_trace;
+
+  /* bitmap of pending rx interfaces */
+  uword *pending_input_bitmap;
+
+  /* The operation mode computed per cpu based on interface setting */
+  u8 operation_mode;
 } vhost_cpu_t;
 
 typedef struct
@@ -320,6 +328,9 @@ typedef struct
 
   /** Pseudo random iterator */
   u32 random;
+
+  /* Node is in interrupt mode */
+  u8 interrupt_mode;
 } vhost_user_main_t;
 
 typedef struct