interface: rx queue infra rework, part one 08/29808/30
authorDamjan Marion <damarion@cisco.com>
Fri, 6 Nov 2020 22:25:57 +0000 (23:25 +0100)
committerDamjan Marion <dmarion@me.com>
Thu, 21 Jan 2021 13:20:10 +0000 (13:20 +0000)
Type: improvement
Change-Id: I4008cadfd5141f921afbdc09a3ebcd1dcf88eb29
Signed-off-by: Damjan Marion <damarion@cisco.com>
37 files changed:
src/plugins/avf/avf.h
src/plugins/avf/device.c
src/plugins/avf/input.c
src/plugins/dpdk/device/common.c
src/plugins/dpdk/device/dpdk.h
src/plugins/dpdk/device/init.c
src/plugins/dpdk/device/node.c
src/plugins/vmxnet3/input.c
src/plugins/vmxnet3/vmxnet3.c
src/plugins/vmxnet3/vmxnet3.h
src/vlib/main.c
src/vlib/node.c
src/vlib/node.h
src/vlib/node_funcs.h
src/vlib/threads.c
src/vlib/unix/input.c
src/vnet/CMakeLists.txt
src/vnet/devices/devices.h
src/vnet/devices/tap/tap.c
src/vnet/devices/virtio/node.c
src/vnet/devices/virtio/pci.c
src/vnet/devices/virtio/vhost_user.c
src/vnet/devices/virtio/vhost_user.h
src/vnet/devices/virtio/vhost_user_input.c
src/vnet/devices/virtio/virtio.c
src/vnet/devices/virtio/virtio.h
src/vnet/devices/virtio/virtio_process.c
src/vnet/interface.c
src/vnet/interface.h
src/vnet/interface/runtime.c [new file with mode: 0644]
src/vnet/interface/rx_queue.c [new file with mode: 0644]
src/vnet/interface/rx_queue_funcs.h [new file with mode: 0644]
src/vnet/interface_cli.c
src/vnet/interface_format.c
src/vppinfra/CMakeLists.txt
src/vppinfra/interrupt.c [new file with mode: 0644]
src/vppinfra/interrupt.h [new file with mode: 0644]

index 025fa6e..b3fcc25 100644 (file)
@@ -164,6 +164,7 @@ typedef struct
   u16 n_enqueued;
   u8 int_mode;
   u8 buffer_pool_index;
+  u32 queue_index;
 } avf_rxq_t;
 
 typedef struct
index ffd372d..139f1c9 100644 (file)
@@ -20,6 +20,7 @@
 #include <vlib/unix/unix.h>
 #include <vlib/pci/pci.h>
 #include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 #include <avf/avf.h>
 
@@ -1373,6 +1374,7 @@ avf_irq_n_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
   vnet_main_t *vnm = vnet_get_main ();
   uword pd = vlib_pci_get_private_data (vm, h);
   avf_device_t *ad = avf_get_device (pd);
+  avf_rxq_t *rxq = vec_elt_at_index (ad->rxqs, line - 1);
 
   if (ad->flags & AVF_DEVICE_F_ELOG)
     {
@@ -1396,8 +1398,8 @@ avf_irq_n_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
 
   line--;
 
-  if (ad->flags & AVF_DEVICE_F_RX_INT && ad->rxqs[line].int_mode)
-    vnet_device_input_set_interrupt_pending (vnm, ad->hw_if_index, line);
+  if (ad->flags & AVF_DEVICE_F_RX_INT && rxq->int_mode)
+    vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index);
   avf_irq_n_set_state (ad, line, AVF_IRQ_STATE_ENABLED);
 }
 
@@ -1415,7 +1417,6 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
       if (with_barrier)
        vlib_worker_thread_barrier_sync (vm);
       vnet_hw_interface_set_flags (vnm, ad->hw_if_index, 0);
-      vnet_hw_interface_unassign_rx_thread (vnm, ad->hw_if_index, 0);
       ethernet_delete_interface (vnm, ad->hw_if_index);
       if (with_barrier)
        vlib_worker_thread_barrier_release (vm);
@@ -1660,11 +1661,22 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
 
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, ad->hw_if_index);
   hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
-  vnet_hw_interface_set_input_node (vnm, ad->hw_if_index,
-                                   avf_input_node.index);
+  vnet_hw_if_set_input_node (vnm, ad->hw_if_index, avf_input_node.index);
 
   for (i = 0; i < ad->n_rx_queues; i++)
-    vnet_hw_interface_assign_rx_thread (vnm, ad->hw_if_index, i, ~0);
+    {
+      u32 qi, fi;
+      qi = vnet_hw_if_register_rx_queue (vnm, ad->hw_if_index, i,
+                                        VNET_HW_IF_RXQ_THREAD_ANY);
+
+      if (ad->flags & AVF_DEVICE_F_RX_INT)
+       {
+         fi = vlib_pci_get_msix_file_index (vm, ad->pci_dev_handle, i + 1);
+         vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi);
+       }
+      ad->rxqs[i].queue_index = qi;
+    }
+  vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index);
 
   if (pool_elts (am->devices) == 1)
     vlib_process_signal_event (vm, avf_process_node.index,
index 85f97ca..5041f6e 100644 (file)
@@ -19,7 +19,7 @@
 #include <vlib/unix/unix.h>
 #include <vlib/pci/pci.h>
 #include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 #include <avf/avf.h>
 
@@ -473,17 +473,18 @@ VLIB_NODE_FN (avf_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
                               vlib_frame_t * frame)
 {
   u32 n_rx = 0;
-  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
-
-  foreach_device_and_queue (dq, rt->devices_and_queues)
-  {
-    avf_device_t *ad;
-    ad = avf_get_device (dq->dev_instance);
-    if ((ad->flags & AVF_DEVICE_F_ADMIN_UP) == 0)
-      continue;
-    n_rx += avf_device_input_inline (vm, node, frame, ad, dq->queue_id);
-  }
+  vnet_hw_if_rxq_poll_vector_t *pv;
+
+  pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+
+  for (int i = 0; i < vec_len (pv); i++)
+    {
+      avf_device_t *ad = avf_get_device (pv[i].dev_instance);
+      if ((ad->flags & AVF_DEVICE_F_ADMIN_UP) == 0)
+       continue;
+      n_rx += avf_device_input_inline (vm, node, frame, ad, pv[i].queue_id);
+    }
+
   return n_rx;
 }
 
index 2521abd..e9b033e 100644 (file)
@@ -21,6 +21,7 @@
 #include <vnet/ip/ip.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/ethernet/arp_packet.h>
+#include <vnet/interface/rx_queue_funcs.h>
 #include <dpdk/buffer.h>
 #include <dpdk/device/dpdk.h>
 #include <dpdk/device/dpdk_priv.h>
@@ -122,11 +123,8 @@ dpdk_device_setup (dpdk_device_t * xd)
   for (j = 0; j < xd->rx_q_used; j++)
     {
       dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j);
-      uword tidx = vnet_get_device_input_thread_index (dm->vnet_main,
-                                                      xd->hw_if_index, j);
-      unsigned lcore = vlib_worker_threads[tidx].cpu_id;
-      u16 socket_id = rte_lcore_to_socket_id (lcore);
-      u8 bpidx = vlib_buffer_pool_get_default_for_numa (vm, socket_id);
+      u8 bpidx = vlib_buffer_pool_get_default_for_numa (
+       vm, vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index));
       vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx);
       struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx];
 
index 504bac5..3712015 100644 (file)
@@ -165,6 +165,7 @@ typedef struct
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
   u8 buffer_pool_index;
+  u32 queue_index;
 } dpdk_rx_queue_t;
 
 typedef struct
index a1eead2..8e4bf55 100644 (file)
@@ -22,6 +22,7 @@
 #include <vlib/log.h>
 
 #include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
 #include <dpdk/buffer.h>
 #include <dpdk/device/dpdk.h>
 #include <dpdk/cryptodev/cryptodev.h>
@@ -414,6 +415,9 @@ dpdk_lib_init (dpdk_main_t * dm)
       else
        xd->rx_q_used = 1;
 
+      vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+                           CLIB_CACHE_LINE_BYTES);
+
       xd->flags |= DPDK_DEVICE_FLAG_PMD;
 
       /* workaround for drivers not setting driver_name */
@@ -724,25 +728,30 @@ dpdk_lib_init (dpdk_main_t * dm)
 
       sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index);
       xd->sw_if_index = sw->sw_if_index;
-      vnet_hw_interface_set_input_node (dm->vnet_main, xd->hw_if_index,
-                                       dpdk_input_node.index);
+      vnet_hw_if_set_input_node (dm->vnet_main, xd->hw_if_index,
+                                dpdk_input_node.index);
 
       if (devconf->workers)
        {
          int i;
          q = 0;
          clib_bitmap_foreach (i, devconf->workers)  {
-           vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++,
-                                            vdm->first_worker_thread_index + i);
+             dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
+             rxq->queue_index = vnet_hw_if_register_rx_queue (
+               dm->vnet_main, xd->hw_if_index, q++,
+               vdm->first_worker_thread_index + i);
          }
        }
       else
        for (q = 0; q < xd->rx_q_used; q++)
          {
-           vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q,      /* any */
-                                               ~1);
+           dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
+           rxq->queue_index = vnet_hw_if_register_rx_queue (
+             dm->vnet_main, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
          }
 
+      vnet_hw_if_update_runtime_data (dm->vnet_main, xd->hw_if_index);
+
       /*Get vnet hardware interface */
       hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
 
index e7d836f..2eec74e 100644 (file)
@@ -25,6 +25,7 @@
 #include <vnet/mpls/packet.h>
 #include <vnet/handoff.h>
 #include <vnet/devices/devices.h>
+#include <vnet/interface/rx_queue_funcs.h>
 #include <vnet/feature/feature.h>
 
 #include <dpdk/device/dpdk_priv.h>
@@ -463,21 +464,21 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
   dpdk_main_t *dm = &dpdk_main;
   dpdk_device_t *xd;
   uword n_rx_packets = 0;
-  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
+  vnet_hw_if_rxq_poll_vector_t *pv;
   u32 thread_index = node->thread_index;
 
   /*
    * Poll all devices on this cpu for input/interrupts.
    */
-  /* *INDENT-OFF* */
-  foreach_device_and_queue (dq, rt->devices_and_queues)
+
+  pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+
+  for (int i = 0; i < vec_len (pv); i++)
     {
-      xd = vec_elt_at_index(dm->devices, dq->dev_instance);
-      n_rx_packets += dpdk_device_input (vm, dm, xd, node, thread_index,
-                                        dq->queue_id);
+      xd = vec_elt_at_index (dm->devices, pv[i].dev_instance);
+      n_rx_packets +=
+       dpdk_device_input (vm, dm, xd, node, thread_index, pv[i].queue_id);
     }
-  /* *INDENT-ON* */
   return n_rx_packets;
 }
 
index bed4c1e..affc369 100644 (file)
@@ -23,7 +23,7 @@
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/udp/udp_packet.h>
-
+#include <vnet/interface/rx_queue_funcs.h>
 #include <vmxnet3/vmxnet3.h>
 
 #define foreach_vmxnet3_input_error \
@@ -469,17 +469,17 @@ VLIB_NODE_FN (vmxnet3_input_node) (vlib_main_t * vm,
 {
   u32 n_rx = 0;
   vmxnet3_main_t *vmxm = &vmxnet3_main;
-  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
-
-  foreach_device_and_queue (dq, rt->devices_and_queues)
-  {
-    vmxnet3_device_t *vd;
-    vd = vec_elt_at_index (vmxm->devices, dq->dev_instance);
-    if ((vd->flags & VMXNET3_DEVICE_F_ADMIN_UP) == 0)
-      continue;
-    n_rx += vmxnet3_device_input_inline (vm, node, frame, vd, dq->queue_id);
-  }
+  vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+  vnet_hw_if_rxq_poll_vector_t *pve;
+
+  vec_foreach (pve, pv)
+    {
+      vmxnet3_device_t *vd;
+      vd = vec_elt_at_index (vmxm->devices, pve->dev_instance);
+      if ((vd->flags & VMXNET3_DEVICE_F_ADMIN_UP) == 0)
+       continue;
+      n_rx += vmxnet3_device_input_inline (vm, node, frame, vd, pve->queue_id);
+    }
   return n_rx;
 }
 
index 8ec10cd..2c7b970 100644 (file)
@@ -19,7 +19,7 @@
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/plugin/plugin.h>
 #include <vpp/app/version.h>
-
+#include <vnet/interface/rx_queue_funcs.h>
 #include <vmxnet3/vmxnet3.h>
 
 #define PCI_VENDOR_ID_VMWARE                           0x15ad
@@ -540,9 +540,10 @@ vmxnet3_rxq_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
   uword pd = vlib_pci_get_private_data (vm, h);
   vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, pd);
   u16 qid = line;
+  vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
 
   if (vec_len (vd->rxqs) > qid && vd->rxqs[qid].int_mode != 0)
-    vnet_device_input_set_interrupt_pending (vnm, vd->hw_if_index, qid);
+    vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index);
 }
 
 static void
@@ -812,25 +813,26 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
     hw->flags |= (VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO |
                  VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD);
 
-  vnet_hw_interface_set_input_node (vnm, vd->hw_if_index,
-                                   vmxnet3_input_node.index);
+  vnet_hw_if_set_input_node (vnm, vd->hw_if_index, vmxnet3_input_node.index);
   /* Disable interrupts */
   vmxnet3_disable_interrupt (vd);
   vec_foreach_index (qid, vd->rxqs)
   {
     vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
-    u32 thread_index;
-    u32 numa_node;
+    u32 qi, fi;
 
-    vnet_hw_interface_assign_rx_thread (vnm, vd->hw_if_index, qid, ~0);
-    thread_index = vnet_get_device_input_thread_index (vnm, vd->hw_if_index,
-                                                      qid);
-    numa_node = vlib_mains[thread_index]->numa_node;
+    qi = vnet_hw_if_register_rx_queue (vnm, vd->hw_if_index, qid,
+                                      VNET_HW_IF_RXQ_THREAD_ANY);
+    fi = vlib_pci_get_msix_file_index (vm, vd->pci_dev_handle, qid);
+    vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi);
+    rxq->queue_index = qi;
     rxq->buffer_pool_index =
-      vlib_buffer_pool_get_default_for_numa (vm, numa_node);
+      vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index);
     vmxnet3_rxq_refill_ring0 (vm, vd, rxq);
     vmxnet3_rxq_refill_ring1 (vm, vd, rxq);
   }
+  vnet_hw_if_update_runtime_data (vnm, vd->hw_if_index);
+
   vd->flags |= VMXNET3_DEVICE_F_INITIALIZED;
   vmxnet3_enable_interrupt (vd);
 
@@ -855,7 +857,7 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
   vnet_main_t *vnm = vnet_get_main ();
   vmxnet3_main_t *vmxm = &vmxnet3_main;
   u32 i, bi;
-  u16 desc_idx, qid;
+  u16 desc_idx;
 
   /* Quiesce the device */
   vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
@@ -866,8 +868,6 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
   if (vd->hw_if_index)
     {
       vnet_hw_interface_set_flags (vnm, vd->hw_if_index, 0);
-      vec_foreach_index (qid, vd->rxqs)
-       vnet_hw_interface_unassign_rx_thread (vnm, vd->hw_if_index, qid);
       ethernet_delete_interface (vnm, vd->hw_if_index);
     }
 
index 8790e0f..e8d2be0 100644 (file)
@@ -516,6 +516,7 @@ typedef struct
   u16 size;
   u8 int_mode;
   u8 buffer_pool_index;
+  u32 queue_index;
   vmxnet3_rx_ring rx_ring[VMXNET3_RX_RING_SIZE];
   vmxnet3_rx_desc *rx_desc[VMXNET3_RX_RING_SIZE];
   vmxnet3_rx_comp *rx_comp;
index 6369f39..c76d874 100644 (file)
@@ -1708,27 +1708,6 @@ vl_api_send_pending_rpc_requests (vlib_main_t * vm)
 {
 }
 
-static_always_inline u64
-dispatch_pending_interrupts (vlib_main_t * vm, vlib_node_main_t * nm,
-                            u64 cpu_time_now,
-                            vlib_node_interrupt_t * interrupts)
-{
-  vlib_node_runtime_t *n;
-
-  for (int i = 0; i < _vec_len (interrupts); i++)
-    {
-      vlib_node_interrupt_t *in;
-      in = vec_elt_at_index (interrupts, i);
-      n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
-                           in->node_runtime_index);
-      n->interrupt_data = in->data;
-      cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
-                                   VLIB_NODE_STATE_INTERRUPT, /* frame */ 0,
-                                   cpu_time_now);
-    }
-  return cpu_time_now;
-}
-
 static inline void
 pcap_postmortem_reset (vlib_main_t * vm)
 {
@@ -1752,7 +1731,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
   f64 now;
   vlib_frame_queue_main_t *fqm;
   u32 frame_queue_check_counter = 0;
-  vlib_node_interrupt_t *empty_int_list = 0;
 
   /* Initialize pending node vector. */
   if (is_main)
@@ -1771,12 +1749,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
     cpu_time_now = clib_cpu_time_now ();
 
   /* Pre-allocate interupt runtime indices and lock. */
-  vec_alloc (nm->pending_local_interrupts, 32);
-  vec_alloc (nm->pending_remote_interrupts, 32);
-  vec_alloc (empty_int_list, 32);
-  vec_alloc_aligned (nm->pending_remote_interrupts_notify, 1,
-                    CLIB_CACHE_LINE_BYTES);
-  clib_spinlock_init (&nm->pending_interrupt_lock);
+  vec_alloc_aligned (nm->pending_interrupts, 1, CLIB_CACHE_LINE_BYTES);
 
   /* Pre-allocate expired nodes. */
   if (!nm->polling_threshold_vector_length)
@@ -1874,35 +1847,22 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
       if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
        vm->queue_signal_callback (vm);
 
-      /* handle local interruots */
-      if (_vec_len (nm->pending_local_interrupts))
+      if (__atomic_load_n (nm->pending_interrupts, __ATOMIC_ACQUIRE))
        {
-         vlib_node_interrupt_t *interrupts = nm->pending_local_interrupts;
-         nm->pending_local_interrupts = empty_int_list;
-         cpu_time_now = dispatch_pending_interrupts (vm, nm, cpu_time_now,
-                                                     interrupts);
-         empty_int_list = interrupts;
-         vec_reset_length (empty_int_list);
-       }
+         int int_num = -1;
+         *nm->pending_interrupts = 0;
 
-      /* handle remote interruots */
-      if (PREDICT_FALSE (_vec_len (nm->pending_remote_interrupts)))
-       {
-         vlib_node_interrupt_t *interrupts;
-
-         /* at this point it is known that
-          * vec_len (nm->pending_local_interrupts) is zero so we quickly swap
-          * local and remote vector under the spinlock */
-         clib_spinlock_lock (&nm->pending_interrupt_lock);
-         interrupts = nm->pending_remote_interrupts;
-         nm->pending_remote_interrupts = empty_int_list;
-         *nm->pending_remote_interrupts_notify = 0;
-         clib_spinlock_unlock (&nm->pending_interrupt_lock);
-
-         cpu_time_now = dispatch_pending_interrupts (vm, nm, cpu_time_now,
-                                                     interrupts);
-         empty_int_list = interrupts;
-         vec_reset_length (empty_int_list);
+         while ((int_num =
+                   clib_interrupt_get_next (nm->interrupts, int_num)) != -1)
+           {
+             vlib_node_runtime_t *n;
+             clib_interrupt_clear (nm->interrupts, int_num);
+             n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+                                   int_num);
+             cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
+                                           VLIB_NODE_STATE_INTERRUPT,
+                                           /* frame */ 0, cpu_time_now);
+           }
        }
 
       /* Input nodes may have added work to the pending vector.
index cf65f29..13889d1 100644 (file)
@@ -447,6 +447,9 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r)
       {
        vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,
                          /* align */ CLIB_CACHE_LINE_BYTES);
+       if (n->type == VLIB_NODE_TYPE_INPUT)
+         clib_interrupt_resize (&nm->interrupts,
+                                vec_len (nm->nodes_by_type[n->type]));
        n->runtime_index = rt - nm->nodes_by_type[n->type];
       }
 
index 6b9a2df..1ec5a7a 100644 (file)
@@ -502,10 +502,6 @@ typedef struct vlib_node_runtime_t
 
   u16 state;                           /**< Input node state. */
 
-  u32 interrupt_data;                  /**< Data passed together with interrupt.
-                                         Valid only when state is
-                                         VLIB_NODE_STATE_INTERRUPT */
-
   u16 n_next_nodes;
 
   u16 cached_next_index;               /**< Next frame index that vector
@@ -668,12 +664,6 @@ vlib_timing_wheel_data_get_index (u32 d)
   return d / 2;
 }
 
-typedef struct
-{
-  u32 node_runtime_index;
-  u32 data;
-} vlib_node_interrupt_t;
-
 typedef struct
 {
   /* Public nodes. */
@@ -690,10 +680,8 @@ typedef struct
   vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];
 
   /* Node runtime indices for input nodes with pending interrupts. */
-  vlib_node_interrupt_t *pending_local_interrupts;
-  vlib_node_interrupt_t *pending_remote_interrupts;
-  volatile u32 *pending_remote_interrupts_notify;
-  clib_spinlock_t pending_interrupt_lock;
+  void *interrupts;
+  volatile u32 *pending_interrupts;
 
   /* Input nodes are switched from/to interrupt to/from polling mode
      when average vector length goes above/below polling/interrupt
index b33f496..a12aea4 100644 (file)
@@ -47,6 +47,7 @@
 
 #include <vppinfra/fifo.h>
 #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+#include <vppinfra/interrupt.h>
 
 #ifdef CLIB_SANITIZE_ADDR
 #include <sanitizer/asan_interface.h>
@@ -224,37 +225,19 @@ vlib_node_get_state (vlib_main_t * vm, u32 node_index)
 }
 
 always_inline void
-vlib_node_set_interrupt_pending_with_data (vlib_main_t * vm, u32 node_index,
-                                          u32 data)
+vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index)
 {
   vlib_node_main_t *nm = &vm->node_main;
   vlib_node_t *n = vec_elt (nm->nodes, node_index);
-  vlib_node_interrupt_t *i;
+
   ASSERT (n->type == VLIB_NODE_TYPE_INPUT);
 
-  if (vm == vlib_get_main ())
-    {
-      /* local thread */
-      vec_add2 (nm->pending_local_interrupts, i, 1);
-      i->node_runtime_index = n->runtime_index;
-      i->data = data;
-    }
+  if (vm != vlib_get_main ())
+    clib_interrupt_set_atomic (nm->interrupts, n->runtime_index);
   else
-    {
-      /* remote thread */
-      clib_spinlock_lock (&nm->pending_interrupt_lock);
-      vec_add2 (nm->pending_remote_interrupts, i, 1);
-      i->node_runtime_index = n->runtime_index;
-      i->data = data;
-      *nm->pending_remote_interrupts_notify = 1;
-      clib_spinlock_unlock (&nm->pending_interrupt_lock);
-    }
-}
+    clib_interrupt_set (nm->interrupts, n->runtime_index);
 
-always_inline void
-vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index)
-{
-  vlib_node_set_interrupt_pending_with_data (vm, node_index, 0);
+  __atomic_store_n (nm->pending_interrupts, 1, __ATOMIC_RELEASE);
 }
 
 always_inline vlib_process_t *
index 7efddff..ea63653 100644 (file)
@@ -18,6 +18,7 @@
 #include <math.h>
 #include <vppinfra/format.h>
 #include <vppinfra/time_range.h>
+#include <vppinfra/interrupt.h>
 #include <vppinfra/linux/sysfs.h>
 #include <vlib/vlib.h>
 
@@ -863,6 +864,9 @@ start_workers (vlib_main_t * vm)
              nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
                vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
                                 CLIB_CACHE_LINE_BYTES);
+             clib_interrupt_init (
+               &nm_clone->interrupts,
+               vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]));
              vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
              {
                vlib_node_t *n = vlib_get_node (vm, rt->node_index);
@@ -1178,6 +1182,9 @@ vlib_worker_thread_node_refork (void)
   nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
     vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
                     CLIB_CACHE_LINE_BYTES);
+  clib_interrupt_resize (
+    &nm_clone->interrupts,
+    vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]));
 
   vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
   {
index 7531dd1..6398148 100644 (file)
@@ -249,8 +249,8 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
                while (nanosleep (&ts, &tsrem) < 0)
                  ts = tsrem;
-               if (*vlib_worker_threads->wait_at_barrier
-                   || *nm->pending_remote_interrupts_notify)
+               if (*vlib_worker_threads->wait_at_barrier ||
+                   *nm->pending_interrupts)
                  goto done;
              }
          }
index c6c2b2e..0e1d9c4 100644 (file)
@@ -36,6 +36,8 @@ list(APPEND VNET_SOURCES
   interface_cli.c
   interface_format.c
   interface_output.c
+  interface/rx_queue.c
+  interface/runtime.c
   interface_stats.c
   misc.c
 )
@@ -55,6 +57,7 @@ list(APPEND VNET_HEADERS
   flow/flow.h
   global_funcs.h
   handoff.h
+  interface/rx_queue_funcs.h
   interface.h
   interface_funcs.h
   interface_output.h
index 917c872..02eb5cf 100644 (file)
@@ -68,6 +68,7 @@ typedef struct
 {
   vnet_device_and_queue_t *devices_and_queues;
   vlib_node_state_t enabled_node_state;
+  u32 pad;
 } vnet_device_input_runtime_t;
 
 extern vnet_device_main_t vnet_device_main;
index 11c2480..7e7d3d5 100644 (file)
@@ -739,16 +739,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
     {
       virtio_set_packet_coalesce (vif);
     }
-  vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
-                                   virtio_input_node.index);
 
-  for (i = 0; i < vif->num_rxqs; i++)
-    {
-      vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0);
-      vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i,
-                                    VNET_HW_IF_RX_MODE_DEFAULT);
-      virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i));
-    }
+  virtio_vring_set_rx_queues (vm, vif);
 
   vif->per_interface_next_index = ~0;
   vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
@@ -788,7 +780,6 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   virtio_main_t *mm = &virtio_main;
-  int i;
   virtio_if_t *vif;
   vnet_hw_interface_t *hw;
 
@@ -804,8 +795,6 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
   /* bring down the interface */
   vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
   vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0);
-  for (i = 0; i < vif->num_rxqs; i++)
-    vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i);
 
   if (vif->type == VIRTIO_IF_TYPE_TAP)
     ethernet_delete_interface (vnm, vif->hw_if_index);
index 1e75ca4..788cc61 100644 (file)
 #include <vlib/vlib.h>
 #include <vlib/unix/unix.h>
 #include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
 #include <vnet/feature/feature.h>
 #include <vnet/gso/gro_func.h>
+#include <vnet/interface/rx_queue_funcs.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/udp/udp_packet.h>
 #include <vnet/devices/virtio/virtio.h>
 
-
 #define foreach_virtio_input_error \
   _(BUFFER_ALLOC, "buffer alloc error") \
   _(UNKNOWN, "unknown")
@@ -638,30 +637,27 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
                                  vlib_frame_t * frame)
 {
   u32 n_rx = 0;
-  virtio_main_t *nm = &virtio_main;
-  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
-
-  foreach_device_and_queue (dq, rt->devices_and_queues)
-  {
-    virtio_if_t *vif;
-    vif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
-    if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
-      {
-       if (vif->type == VIRTIO_IF_TYPE_TAP)
-         n_rx += virtio_device_input_inline (vm, node, frame, vif,
-                                             dq->queue_id,
-                                             VIRTIO_IF_TYPE_TAP);
-       else if (vif->type == VIRTIO_IF_TYPE_PCI)
-         n_rx += virtio_device_input_inline (vm, node, frame, vif,
-                                             dq->queue_id,
-                                             VIRTIO_IF_TYPE_PCI);
-       else if (vif->type == VIRTIO_IF_TYPE_TUN)
-         n_rx += virtio_device_input_inline (vm, node, frame, vif,
-                                             dq->queue_id,
-                                             VIRTIO_IF_TYPE_TUN);
-      }
-  }
+  virtio_main_t *vim = &virtio_main;
+  vnet_hw_if_rxq_poll_vector_t *p,
+    *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+
+  vec_foreach (p, pv)
+    {
+      virtio_if_t *vif;
+      vif = vec_elt_at_index (vim->interfaces, p->dev_instance);
+      if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
+       {
+         if (vif->type == VIRTIO_IF_TYPE_TAP)
+           n_rx += virtio_device_input_inline (
+             vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TAP);
+         else if (vif->type == VIRTIO_IF_TYPE_PCI)
+           n_rx += virtio_device_input_inline (
+             vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_PCI);
+         else if (vif->type == VIRTIO_IF_TYPE_TUN)
+           n_rx += virtio_device_input_inline (
+             vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TUN);
+       }
+    }
 
   return n_rx;
 }
index 93ea70b..908aba7 100644 (file)
@@ -24,6 +24,7 @@
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/devices/virtio/virtio.h>
 #include <vnet/devices/virtio/pci.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 #define PCI_VENDOR_ID_VIRTIO                           0x1af4
 #define PCI_DEVICE_ID_VIRTIO_NIC                       0x1000
@@ -115,7 +116,8 @@ virtio_pci_irq_queue_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
   line--;
   u16 qid = line;
 
-  vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid);
+  virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
+  vnet_hw_if_rx_queue_set_int_pending (vnm, vring->queue_index);
 }
 
 static void
@@ -1519,17 +1521,8 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
        }
     }
 
-  vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
-                                   virtio_input_node.index);
-  u32 i = 0;
-  vec_foreach_index (i, vif->rxq_vrings)
-  {
-    vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0);
-    virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i));
-    /* Set default rx mode to POLLING */
-    vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i,
-                                  VNET_HW_IF_RX_MODE_POLLING);
-  }
+  virtio_vring_set_rx_queues (vm, vif);
+
   if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
     {
       vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
@@ -1584,10 +1577,6 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
   if (vif->hw_if_index)
     {
       vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
-      vec_foreach_index (i, vif->rxq_vrings)
-      {
-       vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i);
-      }
       ethernet_delete_interface (vnm, vif->hw_if_index);
     }
 
index daa1260..b45b18b 100644 (file)
@@ -36,6 +36,7 @@
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/devices/devices.h>
 #include <vnet/feature/feature.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 #include <vnet/devices/virtio/vhost_user.h>
 #include <vnet/devices/virtio/vhost_user_inline.h>
@@ -165,17 +166,19 @@ vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid)
 
   ASSERT ((qid & 1) == 1);     // should be odd
   // Assign new queue mappings for the interface
-  vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
-                                   vhost_user_input_node.index);
-  vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, q, ~0);
+  vnet_hw_if_set_input_node (vnm, vui->hw_if_index,
+                            vhost_user_input_node.index);
+  txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q,
+                                                   VNET_HW_IF_RXQ_THREAD_ANY);
   if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN)
     /* Set polling as the default */
     txvq->mode = VNET_HW_IF_RX_MODE_POLLING;
   txvq->qid = q;
-  rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, q, txvq->mode);
+  rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode);
   if (rv)
     vu_log_warn (vui, "unable to set rx mode for interface %d, "
                 "queue %d: rc=%d", vui->hw_if_index, q, rv);
+  vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
 }
 
 /** @brief Returns whether at least one TX and one RX vring are enabled */
@@ -213,15 +216,20 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
 {
   u32 qid;
   vnet_main_t *vnm = vnet_get_main ();
+  vhost_user_vring_t *txvq;
 
   qid = ifq & 0xff;
   if ((qid & 1) == 0)
     /* Only care about the odd number, or TX, virtqueue */
     return;
 
-  if (vhost_user_intf_ready (vui))
-    // qid >> 1 is to convert virtqueue number to vring queue index
-    vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
+  // qid >> 1 is to convert virtqueue number to vring queue index
+  qid >>= 1;
+  txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+  if (vhost_user_intf_ready (vui) &&
+      ((txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) ||
+       (txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT)))
+    vnet_hw_if_rx_queue_set_int_pending (vnm, txvq->queue_index);
 }
 
 static clib_error_t *
@@ -1370,24 +1378,6 @@ vhost_user_term_if (vhost_user_intf_t * vui)
 
   for (q = 0; q < vui->num_qid; q++)
     {
-      // Remove existing queue mapping for the interface
-      if (q & 1)
-       {
-         int rv;
-         vnet_main_t *vnm = vnet_get_main ();
-         vhost_user_vring_t *txvq = &vui->vrings[q];
-
-         if (txvq->qid != -1)
-           {
-             rv = vnet_hw_interface_unassign_rx_thread (vnm,
-                                                        vui->hw_if_index,
-                                                        q >> 1);
-             if (rv)
-               vu_log_warn (vui, "unable to unassign interface %d, "
-                            "queue %d: rc=%d", vui->hw_if_index, q >> 1, rv);
-           }
-       }
-
       clib_spinlock_free (&vui->vrings[q].vring_lock);
     }
 
@@ -2224,19 +2214,14 @@ show_vhost_user_command_fn (vlib_main_t * vm,
        {
          vnet_main_t *vnm = vnet_get_main ();
          uword thread_index;
-         vnet_hw_if_rx_mode mode;
          vhost_user_vring_t *txvq = &vui->vrings[qid];
 
          if (txvq->qid == -1)
            continue;
          thread_index =
-           vnet_get_device_input_thread_index (vnm, vui->hw_if_index,
-                                               qid >> 1);
-         vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, qid >> 1,
-                                        &mode);
-         vlib_cli_output (vm, "   thread %d on vring %d, %U\n",
-                          thread_index, qid,
-                          format_vnet_hw_if_rx_mode, mode);
+           vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
+         vlib_cli_output (vm, "   thread %d on vring %d, %U\n", thread_index,
+                          qid, format_vnet_hw_if_rx_mode, txvq->mode);
        }
 
       vlib_cli_output (vm, " tx placement: %s\n",
index 604e557..06c78bc 100644 (file)
@@ -227,9 +227,9 @@ typedef struct
 
   u16 used_wrap_counter;
   u16 avail_wrap_counter;
-
   u16 last_kick;
   u8 first_kick;
+  u32 queue_index;
 } vhost_user_vring_t;
 
 #define VHOST_USER_EVENT_START_TIMER 1
index 7ea70c6..62b59f6 100644 (file)
@@ -37,6 +37,7 @@
 #include <vnet/devices/devices.h>
 #include <vnet/feature/feature.h>
 #include <vnet/udp/udp_packet.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 #include <vnet/devices/virtio/vhost_user.h>
 #include <vnet/devices/virtio/vhost_user_inline.h>
@@ -372,11 +373,9 @@ vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
 }
 
 static_always_inline u32
-vhost_user_if_input (vlib_main_t * vm,
-                    vhost_user_main_t * vum,
-                    vhost_user_intf_t * vui,
-                    u16 qid, vlib_node_runtime_t * node,
-                    vnet_hw_if_rx_mode mode, u8 enable_csum)
+vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
+                    vhost_user_intf_t *vui, u16 qid,
+                    vlib_node_runtime_t *node, u8 enable_csum)
 {
   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
   vnet_feature_main_t *fm = &feature_main;
@@ -411,7 +410,7 @@ vhost_user_if_input (vlib_main_t * vm,
    * When the traffic subsides, the scheduler switches the node back to
    * interrupt mode. We must tell the driver we want interrupt.
    */
-  if (PREDICT_FALSE (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+  if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
     {
       if ((node->flags &
           VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
@@ -1081,10 +1080,9 @@ vhost_user_assemble_packet (vring_packed_desc_t * desc_table,
 }
 
 static_always_inline u32
-vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
-                           vhost_user_intf_t * vui, u16 qid,
-                           vlib_node_runtime_t * node,
-                           vnet_hw_if_rx_mode mode, u8 enable_csum)
+vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
+                           vhost_user_intf_t *vui, u16 qid,
+                           vlib_node_runtime_t *node, u8 enable_csum)
 {
   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
   vnet_feature_main_t *fm = &feature_main;
@@ -1126,7 +1124,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
    * When the traffic subsides, the scheduler switches the node back to
    * interrupt mode. We must tell the driver we want interrupt.
    */
-  if (PREDICT_FALSE (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+  if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
     {
       if ((node->flags &
           VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
@@ -1415,39 +1413,31 @@ VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
   vhost_user_intf_t *vui;
-  vnet_device_input_runtime_t *rt =
-    (vnet_device_input_runtime_t *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
+  vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+  vnet_hw_if_rxq_poll_vector_t *pve;
 
-  vec_foreach (dq, rt->devices_and_queues)
-  {
-    if ((node->state == VLIB_NODE_STATE_POLLING) ||
-       clib_atomic_swap_acq_n (&dq->interrupt_pending, 0))
-      {
-       vui =
-         pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
-       if (vhost_user_is_packed_ring_supported (vui))
-         {
-           if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
-             n_rx_packets += vhost_user_if_input_packed (vm, vum, vui,
-                                                         dq->queue_id, node,
-                                                         dq->mode, 1);
-           else
-             n_rx_packets += vhost_user_if_input_packed (vm, vum, vui,
-                                                         dq->queue_id, node,
-                                                         dq->mode, 0);
-         }
-       else
-         {
-           if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
-             n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id,
-                                                  node, dq->mode, 1);
-           else
-             n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id,
-                                                  node, dq->mode, 0);
-         }
-      }
-  }
+  vec_foreach (pve, pv)
+    {
+      vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance);
+      if (vhost_user_is_packed_ring_supported (vui))
+       {
+         if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+           n_rx_packets += vhost_user_if_input_packed (
+             vm, vum, vui, pve->queue_id, node, 1);
+         else
+           n_rx_packets += vhost_user_if_input_packed (
+             vm, vum, vui, pve->queue_id, node, 0);
+       }
+      else
+       {
+         if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+           n_rx_packets +=
+             vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1);
+         else
+           n_rx_packets +=
+             vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0);
+       }
+    }
 
   return n_rx_packets;
 }
index 99f581a..e84490b 100644 (file)
@@ -31,6 +31,7 @@
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/devices/virtio/virtio.h>
 #include <vnet/devices/virtio/pci.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 virtio_main_t virtio_main;
 
@@ -44,17 +45,11 @@ virtio_main_t virtio_main;
 static clib_error_t *
 call_read_ready (clib_file_t * uf)
 {
-  virtio_main_t *nm = &virtio_main;
   vnet_main_t *vnm = vnet_get_main ();
-  u16 qid = uf->private_data & 0xFFFF;
-  virtio_if_t *vif =
-    vec_elt_at_index (nm->interfaces, uf->private_data >> 16);
   u64 b;
 
   CLIB_UNUSED (ssize_t size) = read (uf->file_descriptor, &b, sizeof (b));
-  if ((qid & 1) == 0)
-    vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index,
-                                            RX_QUEUE_ACCESS (qid));
+  vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data);
 
   return 0;
 }
@@ -64,7 +59,6 @@ clib_error_t *
 virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
 {
   virtio_vring_t *vring;
-  clib_file_t t = { 0 };
   int i;
 
   if (!is_pow2 (sz))
@@ -123,13 +117,6 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
   virtio_log_debug (vif, "vring %u size %u call_fd %d kick_fd %d", idx,
                    vring->size, vring->call_fd, vring->kick_fd);
 
-  t.read_function = call_read_ready;
-  t.file_descriptor = vring->call_fd;
-  t.private_data = vif->dev_instance << 16 | idx;
-  t.description = format (0, "%U vring %u", format_virtio_device_name,
-                         vif->dev_instance, idx);
-  vring->call_file_index = clib_file_add (&file_main, &t);
-
   return 0;
 }
 
@@ -233,19 +220,38 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size)
 }
 
 void
-virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
+virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif)
 {
   vnet_main_t *vnm = vnet_get_main ();
-  u32 thread_index;
-  virtio_vring_t *vring =
-    vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
-  thread_index =
-    vnet_get_device_input_thread_index (vnm, vif->hw_if_index,
-                                       RX_QUEUE_ACCESS (idx));
-  vring->buffer_pool_index =
-    vlib_buffer_pool_get_default_for_numa (vm,
-                                          vlib_mains
-                                          [thread_index]->numa_node);
+  virtio_vring_t *vring;
+
+  vnet_hw_if_set_input_node (vnm, vif->hw_if_index, virtio_input_node.index);
+
+  vec_foreach (vring, vif->rxq_vrings)
+    {
+      vring->queue_index = vnet_hw_if_register_rx_queue (
+       vnm, vif->hw_if_index, RX_QUEUE_ACCESS (vring->queue_id),
+       VNET_HW_IF_RXQ_THREAD_ANY);
+      vring->buffer_pool_index = vlib_buffer_pool_get_default_for_numa (
+       vm, vnet_hw_if_get_rx_queue_numa_node (vnm, vring->queue_index));
+      if (vif->type == VIRTIO_IF_TYPE_TAP || vif->type == VIRTIO_IF_TYPE_TUN)
+       {
+
+         clib_file_t f = {
+           .read_function = call_read_ready,
+           .flags = UNIX_FILE_EVENT_EDGE_TRIGGERED,
+           .file_descriptor = vring->call_fd,
+           .private_data = vring->queue_index,
+           .description = format (0, "%U vring %u", format_virtio_device_name,
+                                  vif->dev_instance, vring->queue_id),
+         };
+
+         vring->call_file_index = clib_file_add (&file_main, &f);
+         vnet_hw_if_set_rx_queue_file_index (vnm, vring->queue_index,
+                                             vring->call_file_index);
+       }
+    }
+  vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
 }
 
 inline void
index 035dc9c..c149ce4 100644 (file)
@@ -82,6 +82,7 @@ typedef struct
   u32 *buffers;
   u16 size;
   u16 queue_id;
+  u32 queue_index;
   u16 desc_in_use;
   u16 desc_next;
   u16 last_used_idx;
@@ -230,8 +231,7 @@ clib_error_t *virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif,
                                    u32 idx);
 clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif,
                                    u32 idx);
-void virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif,
-                                u32 idx);
+void virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif);
 extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring);
 extern void virtio_set_net_hdr_size (virtio_if_t * vif);
 extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
index 1689459..f347ef2 100644 (file)
@@ -18,6 +18,7 @@
 #include <vlib/vlib.h>
 #include <vnet/devices/virtio/virtio.h>
 #include <vnet/gso/gro_func.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 static uword
 virtio_send_interrupt_process (vlib_main_t * vm,
@@ -45,22 +46,20 @@ virtio_send_interrupt_process (vlib_main_t * vm,
          break;
 
        case ~0:
-          /* *INDENT-OFF* */
-          pool_foreach (vif, vim->interfaces) {
-              if (vif->packet_coalesce || vif->packet_buffering)
-                {
-                  virtio_vring_t *vring;
-                  vec_foreach (vring, vif->rxq_vrings)
-                  {
-                    if (vring->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
-                        vring->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
-                      vnet_device_input_set_interrupt_pending (
-                                             vnet_get_main (), vif->hw_if_index,
-                                             RX_QUEUE_ACCESS (vring->queue_id));
-                  }
-                }
-          }
-          /* *INDENT-ON* */
+         pool_foreach (vif, vim->interfaces)
+           {
+             if (vif->packet_coalesce || vif->packet_buffering)
+               {
+                 virtio_vring_t *vring;
+                 vec_foreach (vring, vif->rxq_vrings)
+                   {
+                     if (vring->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
+                         vring->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
+                       vnet_hw_if_rx_queue_set_int_pending (
+                         vnet_get_main (), vring->queue_index);
+                   }
+               }
+           }
          break;
 
        default:
index 82dc29b..a03ea28 100644 (file)
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_mcast.h>
 #include <vnet/ip/ip.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 /* *INDENT-OFF* */
 VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
   .class_name = "interface",
+  .default_syslog_level = VLIB_LOG_LEVEL_DEBUG,
 };
 /* *INDENT-ON* */
 
 #define log_debug(fmt,...) vlib_log_debug(if_default_log.class, fmt, __VA_ARGS__)
 #define log_err(fmt,...) vlib_log_err(if_default_log.class, fmt, __VA_ARGS__)
+
 typedef enum vnet_interface_helper_flags_t_
 {
   VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE = (1 << 0),
@@ -493,6 +496,7 @@ vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index,
                                                hi->flags &
                                                ~VNET_HW_INTERFACE_FLAG_LINK_UP,
                                                helper_flags);
+         vnet_hw_if_update_runtime_data (vnm, si->hw_if_index);
        }
     }
 
@@ -1022,6 +1026,10 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
   /* Call delete callbacks. */
   call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0);
 
+  /* delete rx queues */
+  vnet_hw_if_unregister_all_rx_queues (vnm, hw_if_index);
+  vnet_hw_if_update_runtime_data (vnm, hw_if_index);
+
   /* Delete any sub-interfaces. */
   {
     u32 id, sw_if_index;
@@ -1072,7 +1080,7 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
   vec_free (hw->hw_address);
   vec_free (hw->input_node_thread_index_by_queue);
   vec_free (hw->dq_runtime_index_by_queue);
-
+  vec_free (hw->rx_queue_indices);
   pool_put (im->hw_interfaces, hw);
 }
 
@@ -1376,6 +1384,8 @@ vnet_interface_init (vlib_main_t * vm)
   im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
                                                       sizeof (uword));
 
+  im->rxq_index_by_hw_if_index_and_queue_id =
+    hash_create_mem (0, sizeof (u64), sizeof (u32));
   im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof (u64),
                                                    sizeof (uword));
   {
index 71d6a77..03c65d7 100644 (file)
@@ -525,6 +525,29 @@ typedef enum vnet_hw_interface_flags_t_
   (VNET_HW_INTERFACE_FLAG_HALF_DUPLEX |                \
    VNET_HW_INTERFACE_FLAG_FULL_DUPLEX)
 
+typedef struct
+{
+  /* hw interface index */
+  u32 hw_if_index;
+
+  /* device instance */
+  u32 dev_instance;
+
+  /* index of thread pollling this queue */
+  u32 thread_index;
+
+  /* file index of queue interrupt line */
+  u32 file_index;
+
+  /* hardware queue identifier */
+  u32 queue_id;
+
+  /* mode */
+  vnet_hw_if_rx_mode mode : 8;
+#define VNET_HW_IF_RXQ_THREAD_ANY      ~0
+#define VNET_HW_IF_RXQ_NO_RX_INTERRUPT ~0
+} vnet_hw_if_rx_queue_t;
+
 /* Hardware-interface.  This corresponds to a physical wire
    that packets flow over. */
 typedef struct vnet_hw_interface_t
@@ -609,6 +632,9 @@ typedef struct vnet_hw_interface_t
   /* device input device_and_queue runtime index */
   uword *dq_runtime_index_by_queue;
 
+  /* rx queues */
+  u32 *rx_queue_indices;
+
   /* numa node that hardware device connects to */
   u8 numa_node;
 
@@ -621,6 +647,18 @@ typedef struct vnet_hw_interface_t
   u32 trace_classify_table_index;
 } vnet_hw_interface_t;
 
+typedef struct
+{
+  u32 dev_instance;
+  u32 queue_id;
+} vnet_hw_if_rxq_poll_vector_t;
+
+typedef struct
+{
+  vnet_hw_if_rxq_poll_vector_t *rxq_poll_vector;
+  void *rxq_interrupts;
+} vnet_hw_if_rx_node_runtime_t;
+
 extern vnet_device_class_t vnet_local_interface_device_class;
 
 typedef enum
@@ -857,6 +895,10 @@ typedef struct
   /* Hardware interfaces. */
   vnet_hw_interface_t *hw_interfaces;
 
+  /* Hardware interface RX queues */
+  vnet_hw_if_rx_queue_t *hw_if_rx_queues;
+  uword *rxq_index_by_hw_if_index_and_queue_id;
+
   /* Hash table mapping HW interface name to index. */
   uword *hw_interface_by_name;
 
diff --git a/src/vnet/interface/runtime.c b/src/vnet/interface/runtime.c
new file mode 100644 (file)
index 0000000..c1b096f
--- /dev/null
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vlib/unix/unix.h>
+
+VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = {
+  .class_name = "interface",
+  .subclass_name = "runtime",
+};
+
+#define log_debug(fmt, ...) vlib_log_debug (if_rxq_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...)   vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__)
+
+static char *node_state_str[] = {
+  [VLIB_NODE_STATE_DISABLED] = "disabled",
+  [VLIB_NODE_STATE_POLLING] = "polling",
+  [VLIB_NODE_STATE_INTERRUPT] = "interrupt",
+};
+
+static int
+poll_data_sort (void *a1, void *a2)
+{
+  vnet_hw_if_rxq_poll_vector_t *pv1 = a1;
+  vnet_hw_if_rxq_poll_vector_t *pv2 = a2;
+
+  if (pv1->dev_instance > pv2->dev_instance)
+    return 1;
+  else if (pv1->dev_instance < pv2->dev_instance)
+    return -1;
+  else if (pv1->queue_id > pv2->queue_id)
+    return 1;
+  else if (pv1->queue_id < pv2->queue_id)
+    return -1;
+  else
+    return 0;
+}
+
+void
+vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+  u32 node_index = hi->input_node_index;
+  vnet_hw_if_rx_queue_t *rxq;
+  vnet_hw_if_rxq_poll_vector_t *pv, **d = 0;
+  vlib_node_state_t *per_thread_node_state = 0;
+  u32 n_threads = vec_len (vlib_mains);
+  int something_changed = 0;
+  clib_bitmap_t *pending_int = 0;
+  int last_int = -1;
+
+  log_debug ("update node '%U' triggered by interface %v",
+            format_vlib_node_name, vm, node_index, hi->name);
+
+  vec_validate (d, n_threads - 1);
+  vec_validate_init_empty (per_thread_node_state, n_threads - 1,
+                          VLIB_NODE_STATE_DISABLED);
+
+  /* find out desired node state on each thread */
+  pool_foreach (rxq, im->hw_if_rx_queues)
+    {
+      u32 ti = rxq->thread_index;
+
+      ASSERT (rxq->mode != VNET_HW_IF_RX_MODE_UNKNOWN);
+      ASSERT (rxq->mode != VNET_HW_IF_RX_MODE_DEFAULT);
+
+      hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+
+      if (hi->input_node_index != node_index)
+       continue;
+
+      if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+       per_thread_node_state[ti] = VLIB_NODE_STATE_POLLING;
+
+      if (per_thread_node_state[ti] == VLIB_NODE_STATE_POLLING)
+       continue;
+
+      if (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
+         rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
+       per_thread_node_state[ti] = VLIB_NODE_STATE_INTERRUPT;
+    }
+
+  /* construct per-thread polling vectors */
+  pool_foreach (rxq, im->hw_if_rx_queues)
+    {
+      u32 ti = rxq->thread_index;
+      uword flags;
+
+      hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+
+      if (hi->input_node_index != node_index)
+       continue;
+
+      flags = vnet_sw_interface_get_flags (vnm, hi->sw_if_index);
+      if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0)
+       {
+         log_debug ("skip interface %v (admin down)", hi->name);
+         continue;
+       }
+
+      if (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
+         rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
+       last_int = clib_max (last_int, rxq - im->hw_if_rx_queues);
+
+      if (per_thread_node_state[ti] != VLIB_NODE_STATE_POLLING)
+       continue;
+
+      vec_add2_aligned (d[ti], pv, 1, CLIB_CACHE_LINE_BYTES);
+      pv->dev_instance = rxq->dev_instance;
+      pv->queue_id = rxq->queue_id;
+    }
+
+  /* sort poll vectors and compare them with active ones to avoid
+   * unnecesary barrier */
+  for (int i = 0; i < n_threads; i++)
+    {
+      vlib_node_state_t old_state;
+      vec_sort_with_function (d[i], poll_data_sort);
+
+      old_state = vlib_node_get_state (vlib_mains[i], node_index);
+      if (per_thread_node_state[i] != old_state)
+       {
+         something_changed = 1;
+         log_debug ("state changed for node %U on thread %u from %s to %s",
+                    format_vlib_node_name, vm, node_index, i,
+                    node_state_str[old_state],
+                    node_state_str[per_thread_node_state[i]]);
+       }
+
+      /* check if something changed */
+      if (something_changed == 0)
+       {
+         vnet_hw_if_rx_node_runtime_t *rt;
+         rt = vlib_node_get_runtime_data (vlib_mains[i], node_index);
+         if (vec_len (rt->rxq_poll_vector) != vec_len (d[i]))
+           something_changed = 1;
+         else if (memcmp (d[i], rt->rxq_poll_vector,
+                          vec_len (d[i]) * sizeof (*d)))
+           something_changed = 1;
+         if (clib_interrupt_get_n_int (rt->rxq_interrupts) != last_int + 1)
+           something_changed = 1;
+       }
+    }
+
+  if (something_changed)
+    {
+      int with_barrier;
+
+      if (vlib_worker_thread_barrier_held ())
+       {
+         with_barrier = 0;
+         log_debug ("%s", "already running under the barrier");
+       }
+      else
+       with_barrier = 1;
+
+      if (with_barrier)
+       vlib_worker_thread_barrier_sync (vm);
+
+      for (int i = 0; i < n_threads; i++)
+       {
+         vlib_main_t *vm = vlib_mains[i];
+         vnet_hw_if_rx_node_runtime_t *rt;
+         rt = vlib_node_get_runtime_data (vm, node_index);
+         pv = rt->rxq_poll_vector;
+         rt->rxq_poll_vector = d[i];
+         d[i] = pv;
+
+         if (rt->rxq_interrupts)
+           {
+             void *in = rt->rxq_interrupts;
+             int int_num = -1;
+             while ((int_num = clib_interrupt_get_next (in, int_num)) != -1)
+               {
+                 clib_interrupt_clear (in, int_num);
+                 pending_int = clib_bitmap_set (pending_int, int_num, 1);
+               }
+           }
+
+         vlib_node_set_state (vm, node_index, per_thread_node_state[i]);
+
+         if (last_int >= 0)
+           clib_interrupt_resize (&rt->rxq_interrupts, last_int + 1);
+         else
+           clib_interrupt_free (&rt->rxq_interrupts);
+       }
+
+      if (with_barrier)
+       vlib_worker_thread_barrier_release (vm);
+    }
+  else
+    log_debug ("skipping update of node '%U', no changes detected",
+              format_vlib_node_name, vm, node_index);
+
+  if (pending_int)
+    {
+      int i;
+      clib_bitmap_foreach (i, pending_int)
+       {
+         vnet_hw_if_rx_queue_set_int_pending (vnm, i);
+       }
+      clib_bitmap_free (pending_int);
+    }
+
+  for (int i = 0; i < n_threads; i++)
+    vec_free (d[i]);
+
+  vec_free (d);
+  vec_free (per_thread_node_state);
+}
diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c
new file mode 100644 (file)
index 0000000..c0492dd
--- /dev/null
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vlib/unix/unix.h>
+
+VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = {
+  .class_name = "interface",
+  .subclass_name = "rx-queue",
+};
+
+#define log_debug(fmt, ...) vlib_log_debug (if_rxq_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...)   vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__)
+
+static u32
+next_thread_index (vnet_main_t *vnm, u32 thread_index)
+{
+  vnet_device_main_t *vdm = &vnet_device_main;
+  if (vdm->first_worker_thread_index == 0)
+    return 0;
+
+  if (thread_index != 0 && (thread_index < vdm->first_worker_thread_index ||
+                           thread_index > vdm->last_worker_thread_index))
+    {
+      thread_index = vdm->next_worker_thread_index++;
+      if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)
+       vdm->next_worker_thread_index = vdm->first_worker_thread_index;
+    }
+
+  return thread_index;
+}
+
+static u64
+rx_queue_key (u32 hw_if_index, u32 queue_id)
+{
+  return ((u64) hw_if_index << 32) | queue_id;
+}
+
+u32
+vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index,
+                                    u32 queue_id)
+{
+  vnet_interface_main_t *im = &vnm->interface_main;
+  u64 key = rx_queue_key (hw_if_index, queue_id);
+  uword *p = hash_get_mem (im->rxq_index_by_hw_if_index_and_queue_id, &key);
+  return p ? p[0] : ~0;
+}
+
+u32
+vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index, u32 queue_id,
+                             u32 thread_index)
+{
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+  vnet_hw_if_rx_queue_t *rxq;
+  u64 key = rx_queue_key (hw_if_index, queue_id);
+  u32 queue_index;
+
+  if (hash_get_mem (im->rxq_index_by_hw_if_index_and_queue_id, &key))
+    clib_panic ("Trying to register already registered queue id (%u) in the "
+               "interface %v\n",
+               queue_id, hi->name);
+
+  thread_index = next_thread_index (vnm, thread_index);
+
+  pool_get_zero (im->hw_if_rx_queues, rxq);
+  queue_index = rxq - im->hw_if_rx_queues;
+  vec_add1 (hi->rx_queue_indices, queue_index);
+  hash_set_mem_alloc (&im->rxq_index_by_hw_if_index_and_queue_id, &key,
+                     queue_index);
+  rxq->hw_if_index = hw_if_index;
+  rxq->dev_instance = hi->dev_instance;
+  rxq->queue_id = queue_id;
+  rxq->thread_index = thread_index;
+  rxq->mode = VNET_HW_IF_RX_MODE_POLLING;
+  rxq->file_index = ~0;
+
+  log_debug ("register: interface %s queue-id %u thread %u", hi->name,
+            queue_id, thread_index);
+
+  return queue_index;
+}
+
+void
+vnet_hw_if_unregister_rx_queue (vnet_main_t *vnm, u32 queue_index)
+{
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_hw_if_rx_queue_t *rxq;
+  rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+  u64 key;
+
+  key = ((u64) rxq->hw_if_index << 32) | rxq->queue_id;
+  hash_unset_mem_free (&im->rxq_index_by_hw_if_index_and_queue_id, &key);
+
+  for (int i = 0; i < vec_len (hi->rx_queue_indices); i++)
+    if (hi->rx_queue_indices[i] == queue_index)
+      {
+       vec_del1 (hi->rx_queue_indices, i);
+       break;
+      }
+
+  log_debug ("unregister: interface %s queue-id %u", hi->name, rxq->queue_id);
+  pool_put_index (im->hw_if_rx_queues, queue_index);
+}
+
+void
+vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
+{
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+  log_debug ("unregister_all: interface %s", hi->name);
+
+  for (int i = 0; i < vec_len (hi->rx_queue_indices); i++)
+    vnet_hw_if_unregister_rx_queue (vnm, hi->rx_queue_indices[i]);
+
+  vec_free (hi->rx_queue_indices);
+}
+
+void
+vnet_hw_if_set_rx_queue_file_index (vnet_main_t *vnm, u32 queue_index,
+                                   u32 file_index)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+
+  rxq->file_index = file_index;
+  clib_file_set_polling_thread (&file_main, file_index, rxq->thread_index);
+  log_debug ("set_file_index: interface %s queue-id %u file-index %u",
+            hi->name, rxq->queue_id, file_index);
+}
+
+void
+vnet_hw_if_set_input_node (vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+  hi->input_node_index = node_index;
+  log_debug ("set_input_node: node %U for interface %s", format_vlib_node_name,
+            vm, node_index, hi->name);
+}
+
+int
+vnet_hw_if_set_rx_queue_mode (vnet_main_t *vnm, u32 queue_index,
+                             vnet_hw_if_rx_mode mode)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+  vnet_device_class_t *dc = vnet_get_device_class (vnm, hi->dev_class_index);
+
+  ASSERT (mode != VNET_HW_IF_RX_MODE_UNKNOWN);
+
+  if (mode == VNET_HW_IF_RX_MODE_DEFAULT)
+    mode = hi->default_rx_mode;
+
+  if (rxq->mode == mode)
+    {
+      log_debug ("set_rx_queue_mode: interface %s queue-id %u mode "
+                "unchanged (%U)",
+                hi->name, rxq->queue_id, format_vnet_hw_if_rx_mode, mode);
+      return 0;
+    }
+
+  if (dc->rx_mode_change_function)
+    {
+      clib_error_t *err = dc->rx_mode_change_function (vnm, rxq->hw_if_index,
+                                                      rxq->queue_id, mode);
+      if (err)
+       {
+         log_err ("setting rx mode on the interface %s queue-id %u failed.\n"
+                  "   %U",
+                  hi->name, rxq->queue_id, format_clib_error, err);
+         clib_error_free (err);
+         return VNET_API_ERROR_UNSUPPORTED;
+       }
+    }
+
+  rxq->mode = mode;
+  log_debug ("set_rx_queue_mode: interface %s queue-id %u mode set to %U",
+            hi->name, rxq->queue_id, format_vnet_hw_if_rx_mode, mode);
+  return 0;
+}
+
+vnet_hw_if_rx_mode
+vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm, u32 queue_index)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  return rxq->mode;
+}
+
+void
+vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index,
+                                     u32 thread_index)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+
+  rxq->thread_index = thread_index;
+
+  if (rxq->file_index != ~0)
+    clib_file_set_polling_thread (&file_main, rxq->file_index, thread_index);
+
+  log_debug ("set_rx_queue_thread_index: interface %s queue-id %u "
+            "thread-index set to %u",
+            hi->name, rxq->queue_id, thread_index);
+}
+
+void
+vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
+                                        vlib_node_runtime_t *node)
+{
+  vnet_hw_if_rx_node_runtime_t *rt = (void *) node->runtime_data;
+  vnet_main_t *vnm = vnet_get_main ();
+  int int_num = -1;
+
+  ASSERT (node->state == VLIB_NODE_STATE_INTERRUPT);
+
+  vec_reset_length (rt->rxq_poll_vector);
+
+  while ((int_num = clib_interrupt_get_next (rt->rxq_interrupts, int_num)) !=
+        -1)
+    {
+      vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, int_num);
+      vnet_hw_if_rxq_poll_vector_t *pv;
+
+      clib_interrupt_clear (rt->rxq_interrupts, int_num);
+
+      vec_add2 (rt->rxq_poll_vector, pv, 1);
+      pv->dev_instance = rxq->dev_instance;
+      pv->queue_id = rxq->queue_id;
+    }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface/rx_queue_funcs.h b/src/vnet/interface/rx_queue_funcs.h
new file mode 100644 (file)
index 0000000..08d717b
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+/* funciton declarations */
+
+u32 vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index,
+                                        u32 queue_id);
+u32 vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index,
+                                 u32 queue_id, u32 thread_idnex);
+void vnet_hw_if_unregister_rx_queue (vnet_main_t *vnm, u32 queue_index);
+void vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index);
+void vnet_hw_if_set_rx_queue_file_index (vnet_main_t *vnm, u32 queue_index,
+                                        u32 file_index);
+void vnet_hw_if_set_input_node (vnet_main_t *vnm, u32 hw_if_index,
+                               u32 node_index);
+int vnet_hw_if_set_rx_queue_mode (vnet_main_t *vnm, u32 queue_index,
+                                 vnet_hw_if_rx_mode mode);
+vnet_hw_if_rx_mode vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm,
+                                                u32 queue_index);
+void vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index,
+                                          u32 thread_index);
+void vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index);
+void vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
+                                             vlib_node_runtime_t *node);
+
+/* inline functions */
+
+static_always_inline vnet_hw_if_rx_queue_t *
+vnet_hw_if_get_rx_queue (vnet_main_t *vnm, u32 queue_index)
+{
+  vnet_interface_main_t *im = &vnm->interface_main;
+  if (pool_is_free_index (im->hw_if_rx_queues, queue_index))
+    return 0;
+  return pool_elt_at_index (im->hw_if_rx_queues, queue_index);
+}
+
+static_always_inline void
+vnet_hw_if_rx_queue_set_int_pending (vnet_main_t *vnm, u32 queue_index)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+  vlib_main_t *vm = vlib_mains[rxq->thread_index];
+
+  vnet_hw_if_rx_node_runtime_t *rt;
+  rt = vlib_node_get_runtime_data (vm, hi->input_node_index);
+  if (vm == vlib_get_main ())
+    clib_interrupt_set (rt->rxq_interrupts, queue_index);
+  else
+    clib_interrupt_set_atomic (rt->rxq_interrupts, queue_index);
+  vlib_node_set_interrupt_pending (vm, hi->input_node_index);
+}
+
+static_always_inline vnet_hw_if_rxq_poll_vector_t *
+vnet_hw_if_get_rxq_poll_vector (vlib_main_t *vm, vlib_node_runtime_t *node)
+{
+  vnet_hw_if_rx_node_runtime_t *rt = (void *) node->runtime_data;
+
+  if (PREDICT_FALSE (node->state == VLIB_NODE_STATE_INTERRUPT))
+    vnet_hw_if_generate_rxq_int_poll_vector (vm, node);
+
+  return rt->rxq_poll_vector;
+}
+
+static_always_inline u8
+vnet_hw_if_get_rx_queue_numa_node (vnet_main_t *vnm, u32 queue_index)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
+  return hi->numa_node;
+}
+
+static_always_inline u32
+vnet_hw_if_get_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index)
+{
+  vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
+  return rxq->thread_index;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index a5b9d63..03e7436 100644 (file)
@@ -53,6 +53,7 @@
 #include <vnet/l2/l2_output.h>
 #include <vnet/l2/l2_input.h>
 #include <vnet/classify/vnet_classify.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 static int
 compare_interface_names (void *a1, void *a2)
@@ -1537,24 +1538,55 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
 {
   clib_error_t *error = 0;
   vnet_hw_interface_t *hw;
+  u32 *queue_indices = 0;
   int i;
 
   hw = vnet_get_hw_interface (vnm, hw_if_index);
 
-  if (queue_id_valid == 0)
+  /* to be deprecated */
+  if (vec_len (hw->rx_queue_indices) == 0)
     {
-      for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++)
+      if (queue_id_valid == 0)
        {
-         error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode);
-         if (error)
-           break;
+         for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++)
+           {
+             error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode);
+             if (error)
+               break;
+           }
+         hw->default_rx_mode = mode;
        }
-      hw->default_rx_mode = mode;
+      else
+       error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode);
+
+      return (error);
+    }
+
+  if (queue_id_valid)
+    {
+      u32 queue_index;
+      queue_index =
+       vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index, queue_id);
+      if (queue_index == ~0)
+       return clib_error_return (0, "unknown queue %u on interface %s",
+                                 queue_id, hw->name);
+      vec_add1 (queue_indices, queue_index);
     }
   else
-    error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode);
+    queue_indices = hw->rx_queue_indices;
 
-  return (error);
+  for (int i = 0; i < vec_len (queue_indices); i++)
+    {
+      int rv = vnet_hw_if_set_rx_queue_mode (vnm, queue_indices[i], mode);
+      if (rv)
+       goto done;
+    }
+
+done:
+  if (queue_indices != hw->rx_queue_indices)
+    vec_free (queue_indices);
+  vnet_hw_if_update_runtime_data (vnm, hw_if_index);
+  return error;
 }
 
 static clib_error_t *
@@ -1733,8 +1765,8 @@ set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_device_main_t *vdm = &vnet_device_main;
-  clib_error_t *error = 0;
-  vnet_hw_if_rx_mode mode = VNET_HW_IF_RX_MODE_UNKNOWN;
+  vnet_hw_interface_t *hw;
+  u32 queue_index;
   int rv;
 
   if (is_main)
@@ -1746,21 +1778,38 @@ set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
     return clib_error_return (0,
                              "please specify valid worker thread or main");
 
-  rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &mode);
+  hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+  /* to be deprecated */
+  if (vec_len (hw->rx_queue_indices) == 0)
+    {
+      clib_error_t *error = 0;
+      vnet_hw_if_rx_mode mode = VNET_HW_IF_RX_MODE_UNKNOWN;
+      rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &mode);
 
-  if (rv)
-    return clib_error_return (0, "not found");
+      if (rv)
+       return clib_error_return (0, "not found");
 
-  rv = vnet_hw_interface_unassign_rx_thread (vnm, hw_if_index, queue_id);
+      rv = vnet_hw_interface_unassign_rx_thread (vnm, hw_if_index, queue_id);
 
-  if (rv)
-    return clib_error_return (0, "not found");
+      if (rv)
+       return clib_error_return (0, "not found");
 
-  vnet_hw_interface_assign_rx_thread (vnm, hw_if_index, queue_id,
-                                     thread_index);
-  vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode);
+      vnet_hw_interface_assign_rx_thread (vnm, hw_if_index, queue_id,
+                                         thread_index);
+      vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode);
 
-  return (error);
+      return (error);
+    }
+
+  queue_index =
+    vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index, queue_id);
+  if (queue_index == ~0)
+    return clib_error_return (0, "unknown queue %u on interface %s", queue_id,
+                             hw->name);
+  vnet_hw_if_set_rx_queue_thread_index (vnm, queue_index, thread_index);
+  vnet_hw_if_update_runtime_data (vnm, hw_if_index);
+  return 0;
 }
 
 static clib_error_t *
index 9038e5d..dc2edff 100644 (file)
@@ -42,6 +42,7 @@
 #include <vnet/l2/l2_input.h>
 #include <vnet/l2/l2_output.h>
 #include <vnet/l2/l2_vtr.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 u8 *
 format_vtr (u8 * s, va_list * args)
@@ -192,6 +193,21 @@ format_vnet_hw_interface (u8 * s, va_list * args)
   s = format (s, "\n%ULink speed: %U", format_white_space, indent + 2,
              format_vnet_hw_interface_link_speed, hi->link_speed);
 
+  if (vec_len (hi->rx_queue_indices))
+    {
+      s = format (s, "\n%URX Queues:", format_white_space, indent + 2);
+      s = format (s, "\n%U%-6s%-15s%-10s", format_white_space, indent + 4,
+                 "queue", "thread", "mode");
+      for (int i = 0; i < vec_len (hi->rx_queue_indices); i++)
+       {
+         vnet_hw_if_rx_queue_t *rxq;
+         rxq = vnet_hw_if_get_rx_queue (vnm, hi->rx_queue_indices[i]);
+         s = format (s, "\n%U%-6u%-15U%-10U", format_white_space, indent + 4,
+                     rxq->queue_id, format_vlib_thread_name_and_index,
+                     rxq->thread_index, format_vnet_hw_if_rx_mode, rxq->mode);
+       }
+    }
+
   if (hi->rss_queues)
     {
       s = format (s, "\n%URSS queues: %U", format_white_space, indent + 2,
index 200cfb1..3dc99cc 100644 (file)
@@ -59,6 +59,7 @@ set(VPPINFRA_SRCS
   graph.c
   hash.c
   heap.c
+  interrupt.c
   longjmp.S
   macros.c
   maplog.c
@@ -134,6 +135,7 @@ set(VPPINFRA_HEADERS
   graph.h
   hash.h
   heap.h
+  interrupt.h
   lb_hash_hash.h
   llist.h
   lock.h
diff --git a/src/vppinfra/interrupt.c b/src/vppinfra/interrupt.c
new file mode 100644 (file)
index 0000000..20b7450
--- /dev/null
@@ -0,0 +1,92 @@
+
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/bitops.h> /* for count_set_bits */
+#include <vppinfra/vec.h>
+#include <vppinfra/interrupt.h>
+#include <vppinfra/format.h>
+
+__clib_export void
+clib_interrupt_init (void **data, uword n_int)
+{
+  clib_interrupt_header_t *h;
+  uword sz = sizeof (clib_interrupt_header_t);
+  uword data_size = round_pow2 (n_int, CLIB_CACHE_LINE_BYTES * 8) / 8;
+
+  sz += 2 * data_size;
+  h = data[0] = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+  clib_memset (data[0], 0, sz);
+  h->n_int = n_int;
+  h->n_uword_alloc = (data_size * 8) >> log2_uword_bits;
+}
+
+__clib_export void
+clib_interrupt_resize (void **data, uword n_int)
+{
+  clib_interrupt_header_t *h = data[0];
+
+  if (data[0] == 0)
+    {
+      clib_interrupt_init (data, n_int);
+      return;
+    }
+
+  if (n_int < h->n_int)
+    {
+      uword *old_bmp, *old_abp, v;
+      old_bmp = clib_interrupt_get_bitmap (data[0]);
+      old_abp = clib_interrupt_get_atomic_bitmap (data[0]);
+      for (uword i = 0; i < h->n_uword_alloc; i++)
+       {
+         v = old_abp[i];
+         old_abp[i] = 0;
+         if (n_int > ((i + 1) * uword_bits))
+           old_bmp[i] |= v;
+         else if (n_int > (i * uword_bits))
+           old_bmp[i] = (old_bmp[i] | v) & pow2_mask (n_int - i * uword_bits);
+         else
+           old_bmp[i] = 0;
+       }
+    }
+  else if (n_int > h->n_uword_alloc * uword_bits)
+    {
+      void *old = data[0];
+      uword *old_bmp, *old_abp, *new_bmp;
+      uword n_uwords = round_pow2 (h->n_int, uword_bits) / uword_bits;
+
+      clib_interrupt_init (data, n_int);
+      h = data[0];
+
+      new_bmp = clib_interrupt_get_bitmap (data[0]);
+      old_bmp = clib_interrupt_get_bitmap (old);
+      old_abp = clib_interrupt_get_atomic_bitmap (old);
+
+      for (uword i = 0; i < n_uwords; i++)
+       new_bmp[i] = old_bmp[i] | old_abp[i];
+
+      clib_mem_free (old);
+    }
+  h->n_int = n_int;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/interrupt.h b/src/vppinfra/interrupt.h
new file mode 100644 (file)
index 0000000..60c01fa
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_clib_interrupt_h
+#define included_clib_interrupt_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/bitops.h> /* for count_set_bits */
+#include <vppinfra/vec.h>
+
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  int n_int;
+  uword n_uword_alloc;
+} clib_interrupt_header_t;
+
+void clib_interrupt_init (void **data, uword n_interrupts);
+void clib_interrupt_resize (void **data, uword n_interrupts);
+
+static_always_inline void
+clib_interrupt_free (void **data)
+{
+  if (data[0])
+    {
+      clib_mem_free (data[0]);
+      data[0] = 0;
+    }
+}
+
+static_always_inline int
+clib_interrupt_get_n_int (void *d)
+{
+  clib_interrupt_header_t *h = d;
+  if (h)
+    return h->n_int;
+  return 0;
+}
+
+static_always_inline uword *
+clib_interrupt_get_bitmap (void *d)
+{
+  return d + sizeof (clib_interrupt_header_t);
+}
+
+static_always_inline uword *
+clib_interrupt_get_atomic_bitmap (void *d)
+{
+  clib_interrupt_header_t *h = d;
+  return clib_interrupt_get_bitmap (d) + h->n_uword_alloc;
+}
+
+static_always_inline void
+clib_interrupt_set (void *in, int int_num)
+{
+  uword *bmp = clib_interrupt_get_bitmap (in);
+  uword mask = 1ULL << (int_num & (uword_bits - 1));
+  bmp += int_num >> log2_uword_bits;
+
+  ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+
+  *bmp |= mask;
+}
+
+static_always_inline void
+clib_interrupt_set_atomic (void *in, int int_num)
+{
+  uword *bmp = clib_interrupt_get_atomic_bitmap (in);
+  uword mask = 1ULL << (int_num & (uword_bits - 1));
+  bmp += int_num >> log2_uword_bits;
+
+  ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+
+  __atomic_fetch_or (bmp, mask, __ATOMIC_RELAXED);
+}
+
+static_always_inline void
+clib_interrupt_clear (void *in, int int_num)
+{
+  uword *bmp = clib_interrupt_get_bitmap (in);
+  uword *abm = clib_interrupt_get_atomic_bitmap (in);
+  uword mask = 1ULL << (int_num & (uword_bits - 1));
+  uword off = int_num >> log2_uword_bits;
+
+  ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+
+  bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
+  bmp[off] &= ~mask;
+}
+
+static_always_inline int
+clib_interrupt_get_next (void *in, int last)
+{
+  uword *bmp = clib_interrupt_get_bitmap (in);
+  uword *abm = clib_interrupt_get_atomic_bitmap (in);
+  clib_interrupt_header_t *h = in;
+  uword bmp_uword, off;
+
+  ASSERT (last >= -1 && last < h->n_int);
+
+  off = (last + 1) >> log2_uword_bits;
+
+  last -= off << log2_uword_bits;
+  bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
+  bmp_uword = bmp[off] & ~pow2_mask (last + 1);
+
+next:
+  if (bmp_uword)
+    return (off << log2_uword_bits) + count_trailing_zeros (bmp_uword);
+
+  off++;
+
+  if (off > h->n_int >> log2_uword_bits)
+    return -1;
+
+  bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
+  bmp_uword = bmp[off];
+
+  goto next;
+}
+
+#endif /* included_clib_interrupt_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */