From 941005336ee8cec614a856089f3d873f7d98135c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 6 Nov 2020 23:25:57 +0100 Subject: [PATCH] interface: rx queue infra rework, part one Type: improvement Change-Id: I4008cadfd5141f921afbdc09a3ebcd1dcf88eb29 Signed-off-by: Damjan Marion --- src/plugins/avf/avf.h | 1 + src/plugins/avf/device.c | 24 ++- src/plugins/avf/input.c | 25 +-- src/plugins/dpdk/device/common.c | 8 +- src/plugins/dpdk/device/dpdk.h | 1 + src/plugins/dpdk/device/init.c | 21 ++- src/plugins/dpdk/device/node.c | 17 +- src/plugins/vmxnet3/input.c | 24 +-- src/plugins/vmxnet3/vmxnet3.c | 28 ++-- src/plugins/vmxnet3/vmxnet3.h | 1 + src/vlib/main.c | 70 ++------ src/vlib/node.c | 3 + src/vlib/node.h | 16 +- src/vlib/node_funcs.h | 31 +--- src/vlib/threads.c | 7 + src/vlib/unix/input.c | 4 +- src/vnet/CMakeLists.txt | 3 + src/vnet/devices/devices.h | 1 + src/vnet/devices/tap/tap.c | 13 +- src/vnet/devices/virtio/node.c | 48 +++--- src/vnet/devices/virtio/pci.c | 21 +-- src/vnet/devices/virtio/vhost_user.c | 51 ++---- src/vnet/devices/virtio/vhost_user.h | 2 +- src/vnet/devices/virtio/vhost_user_input.c | 76 ++++----- src/vnet/devices/virtio/virtio.c | 58 ++++--- src/vnet/devices/virtio/virtio.h | 4 +- src/vnet/devices/virtio/virtio_process.c | 31 ++-- src/vnet/interface.c | 12 +- src/vnet/interface.h | 42 +++++ src/vnet/interface/runtime.c | 229 ++++++++++++++++++++++++++ src/vnet/interface/rx_queue.c | 254 +++++++++++++++++++++++++++++ src/vnet/interface/rx_queue_funcs.h | 99 +++++++++++ src/vnet/interface_cli.c | 89 +++++++--- src/vnet/interface_format.c | 16 ++ src/vppinfra/CMakeLists.txt | 2 + src/vppinfra/interrupt.c | 92 +++++++++++ src/vppinfra/interrupt.h | 142 ++++++++++++++++ 37 files changed, 1212 insertions(+), 354 deletions(-) create mode 100644 src/vnet/interface/runtime.c create mode 100644 src/vnet/interface/rx_queue.c create mode 100644 src/vnet/interface/rx_queue_funcs.h create mode 100644 src/vppinfra/interrupt.c create mode 100644 src/vppinfra/interrupt.h diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h index 025fa6ea4e9..b3fcc259206 100644 --- a/src/plugins/avf/avf.h +++ b/src/plugins/avf/avf.h @@ -164,6 +164,7 @@ typedef struct u16 n_enqueued; u8 int_mode; u8 buffer_pool_index; + u32 queue_index; } avf_rxq_t; typedef struct diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c index ffd372d7a56..139f1c99ebb 100644 --- a/src/plugins/avf/device.c +++ b/src/plugins/avf/device.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -1373,6 +1374,7 @@ avf_irq_n_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) vnet_main_t *vnm = vnet_get_main (); uword pd = vlib_pci_get_private_data (vm, h); avf_device_t *ad = avf_get_device (pd); + avf_rxq_t *rxq = vec_elt_at_index (ad->rxqs, line - 1); if (ad->flags & AVF_DEVICE_F_ELOG) { @@ -1396,8 +1398,8 @@ avf_irq_n_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) line--; - if (ad->flags & AVF_DEVICE_F_RX_INT && ad->rxqs[line].int_mode) - vnet_device_input_set_interrupt_pending (vnm, ad->hw_if_index, line); + if (ad->flags & AVF_DEVICE_F_RX_INT && rxq->int_mode) + vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index); avf_irq_n_set_state (ad, line, AVF_IRQ_STATE_ENABLED); } @@ -1415,7 +1417,6 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier) if (with_barrier) vlib_worker_thread_barrier_sync (vm); vnet_hw_interface_set_flags (vnm, ad->hw_if_index, 0); - vnet_hw_interface_unassign_rx_thread (vnm, ad->hw_if_index, 0); ethernet_delete_interface (vnm, ad->hw_if_index); if (with_barrier) vlib_worker_thread_barrier_release (vm); @@ -1660,11 +1661,22 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args) vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, ad->hw_if_index); hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; - vnet_hw_interface_set_input_node (vnm, ad->hw_if_index, - avf_input_node.index); + vnet_hw_if_set_input_node (vnm, ad->hw_if_index, avf_input_node.index); for (i = 0; i < ad->n_rx_queues; i++) - vnet_hw_interface_assign_rx_thread (vnm, ad->hw_if_index, i, ~0); + { + u32 qi, fi; + qi = vnet_hw_if_register_rx_queue (vnm, ad->hw_if_index, i, + VNET_HW_IF_RXQ_THREAD_ANY); + + if (ad->flags & AVF_DEVICE_F_RX_INT) + { + fi = vlib_pci_get_msix_file_index (vm, ad->pci_dev_handle, i + 1); + vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi); + } + ad->rxqs[i].queue_index = qi; + } + vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index); if (pool_elts (am->devices) == 1) vlib_process_signal_event (vm, avf_process_node.index, diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c index 85f97ca3e49..5041f6ef4e7 100644 --- a/src/plugins/avf/input.c +++ b/src/plugins/avf/input.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include @@ -473,17 +473,18 @@ VLIB_NODE_FN (avf_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_rx = 0; - vnet_device_input_runtime_t *rt = (void *) node->runtime_data; - vnet_device_and_queue_t *dq; - - foreach_device_and_queue (dq, rt->devices_and_queues) - { - avf_device_t *ad; - ad = avf_get_device (dq->dev_instance); - if ((ad->flags & AVF_DEVICE_F_ADMIN_UP) == 0) - continue; - n_rx += avf_device_input_inline (vm, node, frame, ad, dq->queue_id); - } + vnet_hw_if_rxq_poll_vector_t *pv; + + pv = vnet_hw_if_get_rxq_poll_vector (vm, node); + + for (int i = 0; i < vec_len (pv); i++) + { + avf_device_t *ad = avf_get_device (pv[i].dev_instance); + if ((ad->flags & AVF_DEVICE_F_ADMIN_UP) == 0) + continue; + n_rx += avf_device_input_inline (vm, node, frame, ad, pv[i].queue_id); + } + return n_rx; } diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index 2521abde97a..e9b033ec1af 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -122,11 +123,8 @@ dpdk_device_setup (dpdk_device_t * xd) for (j = 0; j < xd->rx_q_used; j++) { dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j); - uword tidx = vnet_get_device_input_thread_index (dm->vnet_main, - xd->hw_if_index, j); - unsigned lcore = vlib_worker_threads[tidx].cpu_id; - u16 socket_id = rte_lcore_to_socket_id (lcore); - u8 bpidx = vlib_buffer_pool_get_default_for_numa (vm, socket_id); + u8 bpidx = vlib_buffer_pool_get_default_for_numa ( + vm, vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index)); vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx); struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx]; diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 504bac5b6ee..3712015d438 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -165,6 +165,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u8 buffer_pool_index; + u32 queue_index; } dpdk_rx_queue_t; typedef struct diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index a1eead20541..8e4bf558a19 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -414,6 +415,9 @@ dpdk_lib_init (dpdk_main_t * dm) else xd->rx_q_used = 1; + vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1, + CLIB_CACHE_LINE_BYTES); + xd->flags |= DPDK_DEVICE_FLAG_PMD; /* workaround for drivers not setting driver_name */ @@ -724,25 +728,30 @@ dpdk_lib_init (dpdk_main_t * dm) sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index); xd->sw_if_index = sw->sw_if_index; - vnet_hw_interface_set_input_node (dm->vnet_main, xd->hw_if_index, - dpdk_input_node.index); + vnet_hw_if_set_input_node (dm->vnet_main, xd->hw_if_index, + dpdk_input_node.index); if (devconf->workers) { int i; q = 0; clib_bitmap_foreach (i, devconf->workers) { - vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++, - vdm->first_worker_thread_index + i); + dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q); + rxq->queue_index = vnet_hw_if_register_rx_queue ( + dm->vnet_main, xd->hw_if_index, q++, + vdm->first_worker_thread_index + i); } } else for (q = 0; q < xd->rx_q_used; q++) { - vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q, /* any */ - ~1); + dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q); + rxq->queue_index = vnet_hw_if_register_rx_queue ( + dm->vnet_main, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY); } + vnet_hw_if_update_runtime_data (dm->vnet_main, xd->hw_if_index); + /*Get vnet hardware interface */ hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index); diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index e7d836fe0f8..2eec74eaab2 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -463,21 +464,21 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd; uword n_rx_packets = 0; - vnet_device_input_runtime_t *rt = (void *) node->runtime_data; - vnet_device_and_queue_t *dq; + vnet_hw_if_rxq_poll_vector_t *pv; u32 thread_index = node->thread_index; /* * Poll all devices on this cpu for input/interrupts. */ - /* *INDENT-OFF* */ - foreach_device_and_queue (dq, rt->devices_and_queues) + + pv = vnet_hw_if_get_rxq_poll_vector (vm, node); + + for (int i = 0; i < vec_len (pv); i++) { - xd = vec_elt_at_index(dm->devices, dq->dev_instance); - n_rx_packets += dpdk_device_input (vm, dm, xd, node, thread_index, - dq->queue_id); + xd = vec_elt_at_index (dm->devices, pv[i].dev_instance); + n_rx_packets += + dpdk_device_input (vm, dm, xd, node, thread_index, pv[i].queue_id); } - /* *INDENT-ON* */ return n_rx_packets; } diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c index bed4c1e3292..affc3691af1 100644 --- a/src/plugins/vmxnet3/input.c +++ b/src/plugins/vmxnet3/input.c @@ -23,7 +23,7 @@ #include #include #include - +#include #include #define foreach_vmxnet3_input_error \ @@ -469,17 +469,17 @@ VLIB_NODE_FN (vmxnet3_input_node) (vlib_main_t * vm, { u32 n_rx = 0; vmxnet3_main_t *vmxm = &vmxnet3_main; - vnet_device_input_runtime_t *rt = (void *) node->runtime_data; - vnet_device_and_queue_t *dq; - - foreach_device_and_queue (dq, rt->devices_and_queues) - { - vmxnet3_device_t *vd; - vd = vec_elt_at_index (vmxm->devices, dq->dev_instance); - if ((vd->flags & VMXNET3_DEVICE_F_ADMIN_UP) == 0) - continue; - n_rx += vmxnet3_device_input_inline (vm, node, frame, vd, dq->queue_id); - } + vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node); + vnet_hw_if_rxq_poll_vector_t *pve; + + vec_foreach (pve, pv) + { + vmxnet3_device_t *vd; + vd = vec_elt_at_index (vmxm->devices, pve->dev_instance); + if ((vd->flags & VMXNET3_DEVICE_F_ADMIN_UP) == 0) + continue; + n_rx += vmxnet3_device_input_inline (vm, node, frame, vd, pve->queue_id); + } return n_rx; } diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c index 8ec10cd1a73..2c7b970d58f 100644 --- a/src/plugins/vmxnet3/vmxnet3.c +++ b/src/plugins/vmxnet3/vmxnet3.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #define PCI_VENDOR_ID_VMWARE 0x15ad @@ -540,9 +540,10 @@ vmxnet3_rxq_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) uword pd = vlib_pci_get_private_data (vm, h); vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, pd); u16 qid = line; + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); if (vec_len (vd->rxqs) > qid && vd->rxqs[qid].int_mode != 0) - vnet_device_input_set_interrupt_pending (vnm, vd->hw_if_index, qid); + vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index); } static void @@ -812,25 +813,26 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) hw->flags |= (VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD); - vnet_hw_interface_set_input_node (vnm, vd->hw_if_index, - vmxnet3_input_node.index); + vnet_hw_if_set_input_node (vnm, vd->hw_if_index, vmxnet3_input_node.index); /* Disable interrupts */ vmxnet3_disable_interrupt (vd); vec_foreach_index (qid, vd->rxqs) { vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); - u32 thread_index; - u32 numa_node; + u32 qi, fi; - vnet_hw_interface_assign_rx_thread (vnm, vd->hw_if_index, qid, ~0); - thread_index = vnet_get_device_input_thread_index (vnm, vd->hw_if_index, - qid); - numa_node = vlib_mains[thread_index]->numa_node; + qi = vnet_hw_if_register_rx_queue (vnm, vd->hw_if_index, qid, + VNET_HW_IF_RXQ_THREAD_ANY); + fi = vlib_pci_get_msix_file_index (vm, vd->pci_dev_handle, qid); + vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi); + rxq->queue_index = qi; rxq->buffer_pool_index = - vlib_buffer_pool_get_default_for_numa (vm, numa_node); + vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index); vmxnet3_rxq_refill_ring0 (vm, vd, rxq); vmxnet3_rxq_refill_ring1 (vm, vd, rxq); } + vnet_hw_if_update_runtime_data (vnm, vd->hw_if_index); + vd->flags |= VMXNET3_DEVICE_F_INITIALIZED; vmxnet3_enable_interrupt (vd); @@ -855,7 +857,7 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) vnet_main_t *vnm = vnet_get_main (); vmxnet3_main_t *vmxm = &vmxnet3_main; u32 i, bi; - u16 desc_idx, qid; + u16 desc_idx; /* Quiesce the device */ vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV); @@ -866,8 +868,6 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) if (vd->hw_if_index) { vnet_hw_interface_set_flags (vnm, vd->hw_if_index, 0); - vec_foreach_index (qid, vd->rxqs) - vnet_hw_interface_unassign_rx_thread (vnm, vd->hw_if_index, qid); ethernet_delete_interface (vnm, vd->hw_if_index); } diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index 8790e0f136e..e8d2be0e552 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -516,6 +516,7 @@ typedef struct u16 size; u8 int_mode; u8 buffer_pool_index; + u32 queue_index; vmxnet3_rx_ring rx_ring[VMXNET3_RX_RING_SIZE]; vmxnet3_rx_desc *rx_desc[VMXNET3_RX_RING_SIZE]; vmxnet3_rx_comp *rx_comp; diff --git a/src/vlib/main.c b/src/vlib/main.c index 6369f39b09a..c76d874e991 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -1708,27 +1708,6 @@ vl_api_send_pending_rpc_requests (vlib_main_t * vm) { } -static_always_inline u64 -dispatch_pending_interrupts (vlib_main_t * vm, vlib_node_main_t * nm, - u64 cpu_time_now, - vlib_node_interrupt_t * interrupts) -{ - vlib_node_runtime_t *n; - - for (int i = 0; i < _vec_len (interrupts); i++) - { - vlib_node_interrupt_t *in; - in = vec_elt_at_index (interrupts, i); - n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - in->node_runtime_index); - n->interrupt_data = in->data; - cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, - VLIB_NODE_STATE_INTERRUPT, /* frame */ 0, - cpu_time_now); - } - return cpu_time_now; -} - static inline void pcap_postmortem_reset (vlib_main_t * vm) { @@ -1752,7 +1731,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) f64 now; vlib_frame_queue_main_t *fqm; u32 frame_queue_check_counter = 0; - vlib_node_interrupt_t *empty_int_list = 0; /* Initialize pending node vector. */ if (is_main) @@ -1771,12 +1749,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) cpu_time_now = clib_cpu_time_now (); /* Pre-allocate interupt runtime indices and lock. */ - vec_alloc (nm->pending_local_interrupts, 32); - vec_alloc (nm->pending_remote_interrupts, 32); - vec_alloc (empty_int_list, 32); - vec_alloc_aligned (nm->pending_remote_interrupts_notify, 1, - CLIB_CACHE_LINE_BYTES); - clib_spinlock_init (&nm->pending_interrupt_lock); + vec_alloc_aligned (nm->pending_interrupts, 1, CLIB_CACHE_LINE_BYTES); /* Pre-allocate expired nodes. */ if (!nm->polling_threshold_vector_length) @@ -1874,35 +1847,22 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0)) vm->queue_signal_callback (vm); - /* handle local interruots */ - if (_vec_len (nm->pending_local_interrupts)) + if (__atomic_load_n (nm->pending_interrupts, __ATOMIC_ACQUIRE)) { - vlib_node_interrupt_t *interrupts = nm->pending_local_interrupts; - nm->pending_local_interrupts = empty_int_list; - cpu_time_now = dispatch_pending_interrupts (vm, nm, cpu_time_now, - interrupts); - empty_int_list = interrupts; - vec_reset_length (empty_int_list); - } + int int_num = -1; + *nm->pending_interrupts = 0; - /* handle remote interruots */ - if (PREDICT_FALSE (_vec_len (nm->pending_remote_interrupts))) - { - vlib_node_interrupt_t *interrupts; - - /* at this point it is known that - * vec_len (nm->pending_local_interrupts) is zero so we quickly swap - * local and remote vector under the spinlock */ - clib_spinlock_lock (&nm->pending_interrupt_lock); - interrupts = nm->pending_remote_interrupts; - nm->pending_remote_interrupts = empty_int_list; - *nm->pending_remote_interrupts_notify = 0; - clib_spinlock_unlock (&nm->pending_interrupt_lock); - - cpu_time_now = dispatch_pending_interrupts (vm, nm, cpu_time_now, - interrupts); - empty_int_list = interrupts; - vec_reset_length (empty_int_list); + while ((int_num = + clib_interrupt_get_next (nm->interrupts, int_num)) != -1) + { + vlib_node_runtime_t *n; + clib_interrupt_clear (nm->interrupts, int_num); + n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], + int_num); + cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, + VLIB_NODE_STATE_INTERRUPT, + /* frame */ 0, cpu_time_now); + } } /* Input nodes may have added work to the pending vector. diff --git a/src/vlib/node.c b/src/vlib/node.c index cf65f297d6a..13889d13ca0 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -447,6 +447,9 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) { vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1, /* align */ CLIB_CACHE_LINE_BYTES); + if (n->type == VLIB_NODE_TYPE_INPUT) + clib_interrupt_resize (&nm->interrupts, + vec_len (nm->nodes_by_type[n->type])); n->runtime_index = rt - nm->nodes_by_type[n->type]; } diff --git a/src/vlib/node.h b/src/vlib/node.h index 6b9a2df95d3..1ec5a7a041d 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -502,10 +502,6 @@ typedef struct vlib_node_runtime_t u16 state; /**< Input node state. */ - u32 interrupt_data; /**< Data passed together with interrupt. - Valid only when state is - VLIB_NODE_STATE_INTERRUPT */ - u16 n_next_nodes; u16 cached_next_index; /**< Next frame index that vector @@ -668,12 +664,6 @@ vlib_timing_wheel_data_get_index (u32 d) return d / 2; } -typedef struct -{ - u32 node_runtime_index; - u32 data; -} vlib_node_interrupt_t; - typedef struct { /* Public nodes. */ @@ -690,10 +680,8 @@ typedef struct vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE]; /* Node runtime indices for input nodes with pending interrupts. */ - vlib_node_interrupt_t *pending_local_interrupts; - vlib_node_interrupt_t *pending_remote_interrupts; - volatile u32 *pending_remote_interrupts_notify; - clib_spinlock_t pending_interrupt_lock; + void *interrupts; + volatile u32 *pending_interrupts; /* Input nodes are switched from/to interrupt to/from polling mode when average vector length goes above/below polling/interrupt diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index b33f4960a90..a12aea4e462 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -47,6 +47,7 @@ #include #include +#include #ifdef CLIB_SANITIZE_ADDR #include @@ -224,37 +225,19 @@ vlib_node_get_state (vlib_main_t * vm, u32 node_index) } always_inline void -vlib_node_set_interrupt_pending_with_data (vlib_main_t * vm, u32 node_index, - u32 data) +vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index) { vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vec_elt (nm->nodes, node_index); - vlib_node_interrupt_t *i; + ASSERT (n->type == VLIB_NODE_TYPE_INPUT); - if (vm == vlib_get_main ()) - { - /* local thread */ - vec_add2 (nm->pending_local_interrupts, i, 1); - i->node_runtime_index = n->runtime_index; - i->data = data; - } + if (vm != vlib_get_main ()) + clib_interrupt_set_atomic (nm->interrupts, n->runtime_index); else - { - /* remote thread */ - clib_spinlock_lock (&nm->pending_interrupt_lock); - vec_add2 (nm->pending_remote_interrupts, i, 1); - i->node_runtime_index = n->runtime_index; - i->data = data; - *nm->pending_remote_interrupts_notify = 1; - clib_spinlock_unlock (&nm->pending_interrupt_lock); - } -} + clib_interrupt_set (nm->interrupts, n->runtime_index); -always_inline void -vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index) -{ - vlib_node_set_interrupt_pending_with_data (vm, node_index, 0); + __atomic_store_n (nm->pending_interrupts, 1, __ATOMIC_RELEASE); } always_inline vlib_process_t * diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 7efddff54e8..ea63653c53c 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -863,6 +864,9 @@ start_workers (vlib_main_t * vm) nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], CLIB_CACHE_LINE_BYTES); + clib_interrupt_init ( + &nm_clone->interrupts, + vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); @@ -1178,6 +1182,9 @@ vlib_worker_thread_node_refork (void) nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], CLIB_CACHE_LINE_BYTES); + clib_interrupt_resize ( + &nm_clone->interrupts, + vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 7531dd19749..63981487049 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -249,8 +249,8 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (nanosleep (&ts, &tsrem) < 0) ts = tsrem; - if (*vlib_worker_threads->wait_at_barrier - || *nm->pending_remote_interrupts_notify) + if (*vlib_worker_threads->wait_at_barrier || + *nm->pending_interrupts) goto done; } } diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index c6c2b2e70e7..0e1d9c44b05 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -36,6 +36,8 @@ list(APPEND VNET_SOURCES interface_cli.c interface_format.c interface_output.c + interface/rx_queue.c + interface/runtime.c interface_stats.c misc.c ) @@ -55,6 +57,7 @@ list(APPEND VNET_HEADERS flow/flow.h global_funcs.h handoff.h + interface/rx_queue_funcs.h interface.h interface_funcs.h interface_output.h diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index 917c872b3b8..02eb5cf050d 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -68,6 +68,7 @@ typedef struct { vnet_device_and_queue_t *devices_and_queues; vlib_node_state_t enabled_node_state; + u32 pad; } vnet_device_input_runtime_t; extern vnet_device_main_t vnet_device_main; diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index 11c24808f7b..7e7d3d5e8c0 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -739,16 +739,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) { virtio_set_packet_coalesce (vif); } - vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, - virtio_input_node.index); - for (i = 0; i < vif->num_rxqs; i++) - { - vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0); - vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i, - VNET_HW_IF_RX_MODE_DEFAULT); - virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i)); - } + virtio_vring_set_rx_queues (vm, vif); vif->per_interface_next_index = ~0; vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; @@ -788,7 +780,6 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *mm = &virtio_main; - int i; virtio_if_t *vif; vnet_hw_interface_t *hw; @@ -804,8 +795,6 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index) /* bring down the interface */ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0); - for (i = 0; i < vif->num_rxqs; i++) - vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i); if (vif->type == VIRTIO_IF_TYPE_TAP) ethernet_delete_interface (vnm, vif->hw_if_index); diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 1e75ca47586..788cc617f61 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -26,15 +26,14 @@ #include #include #include -#include #include #include +#include #include #include #include #include - #define foreach_virtio_input_error \ _(BUFFER_ALLOC, "buffer alloc error") \ _(UNKNOWN, "unknown") @@ -638,30 +637,27 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm, vlib_frame_t * frame) { u32 n_rx = 0; - virtio_main_t *nm = &virtio_main; - vnet_device_input_runtime_t *rt = (void *) node->runtime_data; - vnet_device_and_queue_t *dq; - - foreach_device_and_queue (dq, rt->devices_and_queues) - { - virtio_if_t *vif; - vif = vec_elt_at_index (nm->interfaces, dq->dev_instance); - if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP) - { - if (vif->type == VIRTIO_IF_TYPE_TAP) - n_rx += virtio_device_input_inline (vm, node, frame, vif, - dq->queue_id, - VIRTIO_IF_TYPE_TAP); - else if (vif->type == VIRTIO_IF_TYPE_PCI) - n_rx += virtio_device_input_inline (vm, node, frame, vif, - dq->queue_id, - VIRTIO_IF_TYPE_PCI); - else if (vif->type == VIRTIO_IF_TYPE_TUN) - n_rx += virtio_device_input_inline (vm, node, frame, vif, - dq->queue_id, - VIRTIO_IF_TYPE_TUN); - } - } + virtio_main_t *vim = &virtio_main; + vnet_hw_if_rxq_poll_vector_t *p, + *pv = vnet_hw_if_get_rxq_poll_vector (vm, node); + + vec_foreach (p, pv) + { + virtio_if_t *vif; + vif = vec_elt_at_index (vim->interfaces, p->dev_instance); + if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP) + { + if (vif->type == VIRTIO_IF_TYPE_TAP) + n_rx += virtio_device_input_inline ( + vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TAP); + else if (vif->type == VIRTIO_IF_TYPE_PCI) + n_rx += virtio_device_input_inline ( + vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_PCI); + else if (vif->type == VIRTIO_IF_TYPE_TUN) + n_rx += virtio_device_input_inline ( + vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TUN); + } + } return n_rx; } diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c index 93ea70b3b53..908aba75962 100644 --- a/src/vnet/devices/virtio/pci.c +++ b/src/vnet/devices/virtio/pci.c @@ -24,6 +24,7 @@ #include #include #include +#include #define PCI_VENDOR_ID_VIRTIO 0x1af4 #define PCI_DEVICE_ID_VIRTIO_NIC 0x1000 @@ -115,7 +116,8 @@ virtio_pci_irq_queue_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, line--; u16 qid = line; - vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid); + virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid); + vnet_hw_if_rx_queue_set_int_pending (vnm, vring->queue_index); } static void @@ -1519,17 +1521,8 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) } } - vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, - virtio_input_node.index); - u32 i = 0; - vec_foreach_index (i, vif->rxq_vrings) - { - vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0); - virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i)); - /* Set default rx mode to POLLING */ - vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i, - VNET_HW_IF_RX_MODE_POLLING); - } + virtio_vring_set_rx_queues (vm, vif); + if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) { vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; @@ -1584,10 +1577,6 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) if (vif->hw_if_index) { vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); - vec_foreach_index (i, vif->rxq_vrings) - { - vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i); - } ethernet_delete_interface (vnm, vif->hw_if_index); } diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/vnet/devices/virtio/vhost_user.c index daa126064c5..b45b18b8433 100644 --- a/src/vnet/devices/virtio/vhost_user.c +++ b/src/vnet/devices/virtio/vhost_user.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -165,17 +166,19 @@ vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid) ASSERT ((qid & 1) == 1); // should be odd // Assign new queue mappings for the interface - vnet_hw_interface_set_input_node (vnm, vui->hw_if_index, - vhost_user_input_node.index); - vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, q, ~0); + vnet_hw_if_set_input_node (vnm, vui->hw_if_index, + vhost_user_input_node.index); + txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q, + VNET_HW_IF_RXQ_THREAD_ANY); if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN) /* Set polling as the default */ txvq->mode = VNET_HW_IF_RX_MODE_POLLING; txvq->qid = q; - rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, q, txvq->mode); + rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode); if (rv) vu_log_warn (vui, "unable to set rx mode for interface %d, " "queue %d: rc=%d", vui->hw_if_index, q, rv); + vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index); } /** @brief Returns whether at least one TX and one RX vring are enabled */ @@ -213,15 +216,20 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) { u32 qid; vnet_main_t *vnm = vnet_get_main (); + vhost_user_vring_t *txvq; qid = ifq & 0xff; if ((qid & 1) == 0) /* Only care about the odd number, or TX, virtqueue */ return; - if (vhost_user_intf_ready (vui)) - // qid >> 1 is to convert virtqueue number to vring queue index - vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1); + // qid >> 1 is to convert virtqueue number to vring queue index + qid >>= 1; + txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + if (vhost_user_intf_ready (vui) && + ((txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) || + (txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT))) + vnet_hw_if_rx_queue_set_int_pending (vnm, txvq->queue_index); } static clib_error_t * @@ -1370,24 +1378,6 @@ vhost_user_term_if (vhost_user_intf_t * vui) for (q = 0; q < vui->num_qid; q++) { - // Remove existing queue mapping for the interface - if (q & 1) - { - int rv; - vnet_main_t *vnm = vnet_get_main (); - vhost_user_vring_t *txvq = &vui->vrings[q]; - - if (txvq->qid != -1) - { - rv = vnet_hw_interface_unassign_rx_thread (vnm, - vui->hw_if_index, - q >> 1); - if (rv) - vu_log_warn (vui, "unable to unassign interface %d, " - "queue %d: rc=%d", vui->hw_if_index, q >> 1, rv); - } - } - clib_spinlock_free (&vui->vrings[q].vring_lock); } @@ -2224,19 +2214,14 @@ show_vhost_user_command_fn (vlib_main_t * vm, { vnet_main_t *vnm = vnet_get_main (); uword thread_index; - vnet_hw_if_rx_mode mode; vhost_user_vring_t *txvq = &vui->vrings[qid]; if (txvq->qid == -1) continue; thread_index = - vnet_get_device_input_thread_index (vnm, vui->hw_if_index, - qid >> 1); - vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, qid >> 1, - &mode); - vlib_cli_output (vm, " thread %d on vring %d, %U\n", - thread_index, qid, - format_vnet_hw_if_rx_mode, mode); + vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index); + vlib_cli_output (vm, " thread %d on vring %d, %U\n", thread_index, + qid, format_vnet_hw_if_rx_mode, txvq->mode); } vlib_cli_output (vm, " tx placement: %s\n", diff --git a/src/vnet/devices/virtio/vhost_user.h b/src/vnet/devices/virtio/vhost_user.h index 604e5571141..06c78bce857 100644 --- a/src/vnet/devices/virtio/vhost_user.h +++ b/src/vnet/devices/virtio/vhost_user.h @@ -227,9 +227,9 @@ typedef struct u16 used_wrap_counter; u16 avail_wrap_counter; - u16 last_kick; u8 first_kick; + u32 queue_index; } vhost_user_vring_t; #define VHOST_USER_EVENT_START_TIMER 1 diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c index 7ea70c629f8..62b59f69ba9 100644 --- a/src/vnet/devices/virtio/vhost_user_input.c +++ b/src/vnet/devices/virtio/vhost_user_input.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -372,11 +373,9 @@ vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node, } static_always_inline u32 -vhost_user_if_input (vlib_main_t * vm, - vhost_user_main_t * vum, - vhost_user_intf_t * vui, - u16 qid, vlib_node_runtime_t * node, - vnet_hw_if_rx_mode mode, u8 enable_csum) +vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum, + vhost_user_intf_t *vui, u16 qid, + vlib_node_runtime_t *node, u8 enable_csum) { vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; vnet_feature_main_t *fm = &feature_main; @@ -411,7 +410,7 @@ vhost_user_if_input (vlib_main_t * vm, * When the traffic subsides, the scheduler switches the node back to * interrupt mode. We must tell the driver we want interrupt. */ - if (PREDICT_FALSE (mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) + if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) { if ((node->flags & VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || @@ -1081,10 +1080,9 @@ vhost_user_assemble_packet (vring_packed_desc_t * desc_table, } static_always_inline u32 -vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum, - vhost_user_intf_t * vui, u16 qid, - vlib_node_runtime_t * node, - vnet_hw_if_rx_mode mode, u8 enable_csum) +vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum, + vhost_user_intf_t *vui, u16 qid, + vlib_node_runtime_t *node, u8 enable_csum) { vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; vnet_feature_main_t *fm = &feature_main; @@ -1126,7 +1124,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum, * When the traffic subsides, the scheduler switches the node back to * interrupt mode. We must tell the driver we want interrupt. */ - if (PREDICT_FALSE (mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) + if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) { if ((node->flags & VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || @@ -1415,39 +1413,31 @@ VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm, vhost_user_main_t *vum = &vhost_user_main; uword n_rx_packets = 0; vhost_user_intf_t *vui; - vnet_device_input_runtime_t *rt = - (vnet_device_input_runtime_t *) node->runtime_data; - vnet_device_and_queue_t *dq; + vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node); + vnet_hw_if_rxq_poll_vector_t *pve; - vec_foreach (dq, rt->devices_and_queues) - { - if ((node->state == VLIB_NODE_STATE_POLLING) || - clib_atomic_swap_acq_n (&dq->interrupt_pending, 0)) - { - vui = - pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance); - if (vhost_user_is_packed_ring_supported (vui)) - { - if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM)) - n_rx_packets += vhost_user_if_input_packed (vm, vum, vui, - dq->queue_id, node, - dq->mode, 1); - else - n_rx_packets += vhost_user_if_input_packed (vm, vum, vui, - dq->queue_id, node, - dq->mode, 0); - } - else - { - if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM)) - n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id, - node, dq->mode, 1); - else - n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id, - node, dq->mode, 0); - } - } - } + vec_foreach (pve, pv) + { + vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance); + if (vhost_user_is_packed_ring_supported (vui)) + { + if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM)) + n_rx_packets += vhost_user_if_input_packed ( + vm, vum, vui, pve->queue_id, node, 1); + else + n_rx_packets += vhost_user_if_input_packed ( + vm, vum, vui, pve->queue_id, node, 0); + } + else + { + if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM)) + n_rx_packets += + vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1); + else + n_rx_packets += + vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0); + } + } return n_rx_packets; } diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 99f581a1f1b..e84490ba575 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -31,6 +31,7 @@ #include #include #include +#include virtio_main_t virtio_main; @@ -44,17 +45,11 @@ virtio_main_t virtio_main; static clib_error_t * call_read_ready (clib_file_t * uf) { - virtio_main_t *nm = &virtio_main; vnet_main_t *vnm = vnet_get_main (); - u16 qid = uf->private_data & 0xFFFF; - virtio_if_t *vif = - vec_elt_at_index (nm->interfaces, uf->private_data >> 16); u64 b; CLIB_UNUSED (ssize_t size) = read (uf->file_descriptor, &b, sizeof (b)); - if ((qid & 1) == 0) - vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, - RX_QUEUE_ACCESS (qid)); + vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data); return 0; } @@ -64,7 +59,6 @@ clib_error_t * virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) { virtio_vring_t *vring; - clib_file_t t = { 0 }; int i; if (!is_pow2 (sz)) @@ -123,13 +117,6 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) virtio_log_debug (vif, "vring %u size %u call_fd %d kick_fd %d", idx, vring->size, vring->call_fd, vring->kick_fd); - t.read_function = call_read_ready; - t.file_descriptor = vring->call_fd; - t.private_data = vif->dev_instance << 16 | idx; - t.description = format (0, "%U vring %u", format_virtio_device_name, - vif->dev_instance, idx); - vring->call_file_index = clib_file_add (&file_main, &t); - return 0; } @@ -233,19 +220,38 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size) } void -virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, u32 idx) +virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif) { vnet_main_t *vnm = vnet_get_main (); - u32 thread_index; - virtio_vring_t *vring = - vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx)); - thread_index = - vnet_get_device_input_thread_index (vnm, vif->hw_if_index, - RX_QUEUE_ACCESS (idx)); - vring->buffer_pool_index = - vlib_buffer_pool_get_default_for_numa (vm, - vlib_mains - [thread_index]->numa_node); + virtio_vring_t *vring; + + vnet_hw_if_set_input_node (vnm, vif->hw_if_index, virtio_input_node.index); + + vec_foreach (vring, vif->rxq_vrings) + { + vring->queue_index = vnet_hw_if_register_rx_queue ( + vnm, vif->hw_if_index, RX_QUEUE_ACCESS (vring->queue_id), + VNET_HW_IF_RXQ_THREAD_ANY); + vring->buffer_pool_index = vlib_buffer_pool_get_default_for_numa ( + vm, vnet_hw_if_get_rx_queue_numa_node (vnm, vring->queue_index)); + if (vif->type == VIRTIO_IF_TYPE_TAP || vif->type == VIRTIO_IF_TYPE_TUN) + { + + clib_file_t f = { + .read_function = call_read_ready, + .flags = UNIX_FILE_EVENT_EDGE_TRIGGERED, + .file_descriptor = vring->call_fd, + .private_data = vring->queue_index, + .description = format (0, "%U vring %u", format_virtio_device_name, + vif->dev_instance, vring->queue_id), + }; + + vring->call_file_index = clib_file_add (&file_main, &f); + vnet_hw_if_set_rx_queue_file_index (vnm, vring->queue_index, + vring->call_file_index); + } + } + vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index); } inline void diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index 035dc9ca40d..c149ce482e6 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -82,6 +82,7 @@ typedef struct u32 *buffers; u16 size; u16 queue_id; + u32 queue_index; u16 desc_in_use; u16 desc_next; u16 last_used_idx; @@ -230,8 +231,7 @@ clib_error_t *virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx); clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx); -void virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, - u32 idx); +void virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif); extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring); extern void virtio_set_net_hdr_size (virtio_if_t * vif); extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, diff --git a/src/vnet/devices/virtio/virtio_process.c b/src/vnet/devices/virtio/virtio_process.c index 1689459f1cc..f347ef2ab57 100644 --- a/src/vnet/devices/virtio/virtio_process.c +++ b/src/vnet/devices/virtio/virtio_process.c @@ -18,6 +18,7 @@ #include #include #include +#include static uword virtio_send_interrupt_process (vlib_main_t * vm, @@ -45,22 +46,20 @@ virtio_send_interrupt_process (vlib_main_t * vm, break; case ~0: - /* *INDENT-OFF* */ - pool_foreach (vif, vim->interfaces) { - if (vif->packet_coalesce || vif->packet_buffering) - { - virtio_vring_t *vring; - vec_foreach (vring, vif->rxq_vrings) - { - if (vring->mode == VNET_HW_IF_RX_MODE_INTERRUPT || - vring->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) - vnet_device_input_set_interrupt_pending ( - vnet_get_main (), vif->hw_if_index, - RX_QUEUE_ACCESS (vring->queue_id)); - } - } - } - /* *INDENT-ON* */ + pool_foreach (vif, vim->interfaces) + { + if (vif->packet_coalesce || vif->packet_buffering) + { + virtio_vring_t *vring; + vec_foreach (vring, vif->rxq_vrings) + { + if (vring->mode == VNET_HW_IF_RX_MODE_INTERRUPT || + vring->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) + vnet_hw_if_rx_queue_set_int_pending ( + vnet_get_main (), vring->queue_index); + } + } + } break; default: diff --git a/src/vnet/interface.c b/src/vnet/interface.c index 82dc29b3744..a03ea28b3af 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -42,15 +42,18 @@ #include #include #include +#include /* *INDENT-OFF* */ VLIB_REGISTER_LOG_CLASS (if_default_log, static) = { .class_name = "interface", + .default_syslog_level = VLIB_LOG_LEVEL_DEBUG, }; /* *INDENT-ON* */ #define log_debug(fmt,...) vlib_log_debug(if_default_log.class, fmt, __VA_ARGS__) #define log_err(fmt,...) vlib_log_err(if_default_log.class, fmt, __VA_ARGS__) + typedef enum vnet_interface_helper_flags_t_ { VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE = (1 << 0), @@ -493,6 +496,7 @@ vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index, hi->flags & ~VNET_HW_INTERFACE_FLAG_LINK_UP, helper_flags); + vnet_hw_if_update_runtime_data (vnm, si->hw_if_index); } } @@ -1022,6 +1026,10 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index) /* Call delete callbacks. */ call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0); + /* delete rx queues */ + vnet_hw_if_unregister_all_rx_queues (vnm, hw_if_index); + vnet_hw_if_update_runtime_data (vnm, hw_if_index); + /* Delete any sub-interfaces. */ { u32 id, sw_if_index; @@ -1072,7 +1080,7 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index) vec_free (hw->hw_address); vec_free (hw->input_node_thread_index_by_queue); vec_free (hw->dq_runtime_index_by_queue); - + vec_free (hw->rx_queue_indices); pool_put (im->hw_interfaces, hw); } @@ -1376,6 +1384,8 @@ vnet_interface_init (vlib_main_t * vm) im->hw_interface_class_by_name = hash_create_string ( /* size */ 0, sizeof (uword)); + im->rxq_index_by_hw_if_index_and_queue_id = + hash_create_mem (0, sizeof (u64), sizeof (u32)); im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof (u64), sizeof (uword)); { diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 71d6a775708..03c65d769ee 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -525,6 +525,29 @@ typedef enum vnet_hw_interface_flags_t_ (VNET_HW_INTERFACE_FLAG_HALF_DUPLEX | \ VNET_HW_INTERFACE_FLAG_FULL_DUPLEX) +typedef struct +{ + /* hw interface index */ + u32 hw_if_index; + + /* device instance */ + u32 dev_instance; + + /* index of thread pollling this queue */ + u32 thread_index; + + /* file index of queue interrupt line */ + u32 file_index; + + /* hardware queue identifier */ + u32 queue_id; + + /* mode */ + vnet_hw_if_rx_mode mode : 8; +#define VNET_HW_IF_RXQ_THREAD_ANY ~0 +#define VNET_HW_IF_RXQ_NO_RX_INTERRUPT ~0 +} vnet_hw_if_rx_queue_t; + /* Hardware-interface. This corresponds to a physical wire that packets flow over. */ typedef struct vnet_hw_interface_t @@ -609,6 +632,9 @@ typedef struct vnet_hw_interface_t /* device input device_and_queue runtime index */ uword *dq_runtime_index_by_queue; + /* rx queues */ + u32 *rx_queue_indices; + /* numa node that hardware device connects to */ u8 numa_node; @@ -621,6 +647,18 @@ typedef struct vnet_hw_interface_t u32 trace_classify_table_index; } vnet_hw_interface_t; +typedef struct +{ + u32 dev_instance; + u32 queue_id; +} vnet_hw_if_rxq_poll_vector_t; + +typedef struct +{ + vnet_hw_if_rxq_poll_vector_t *rxq_poll_vector; + void *rxq_interrupts; +} vnet_hw_if_rx_node_runtime_t; + extern vnet_device_class_t vnet_local_interface_device_class; typedef enum @@ -857,6 +895,10 @@ typedef struct /* Hardware interfaces. */ vnet_hw_interface_t *hw_interfaces; + /* Hardware interface RX queues */ + vnet_hw_if_rx_queue_t *hw_if_rx_queues; + uword *rxq_index_by_hw_if_index_and_queue_id; + /* Hash table mapping HW interface name to index. */ uword *hw_interface_by_name; diff --git a/src/vnet/interface/runtime.c b/src/vnet/interface/runtime.c new file mode 100644 index 00000000000..c1b096f3c86 --- /dev/null +++ b/src/vnet/interface/runtime.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = { + .class_name = "interface", + .subclass_name = "runtime", +}; + +#define log_debug(fmt, ...) vlib_log_debug (if_rxq_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__) + +static char *node_state_str[] = { + [VLIB_NODE_STATE_DISABLED] = "disabled", + [VLIB_NODE_STATE_POLLING] = "polling", + [VLIB_NODE_STATE_INTERRUPT] = "interrupt", +}; + +static int +poll_data_sort (void *a1, void *a2) +{ + vnet_hw_if_rxq_poll_vector_t *pv1 = a1; + vnet_hw_if_rxq_poll_vector_t *pv2 = a2; + + if (pv1->dev_instance > pv2->dev_instance) + return 1; + else if (pv1->dev_instance < pv2->dev_instance) + return -1; + else if (pv1->queue_id > pv2->queue_id) + return 1; + else if (pv1->queue_id < pv2->queue_id) + return -1; + else + return 0; +} + +void +vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + u32 node_index = hi->input_node_index; + vnet_hw_if_rx_queue_t *rxq; + vnet_hw_if_rxq_poll_vector_t *pv, **d = 0; + vlib_node_state_t *per_thread_node_state = 0; + u32 n_threads = vec_len (vlib_mains); + int something_changed = 0; + clib_bitmap_t *pending_int = 0; + int last_int = -1; + + log_debug ("update node '%U' triggered by interface %v", + format_vlib_node_name, vm, node_index, hi->name); + + vec_validate (d, n_threads - 1); + vec_validate_init_empty (per_thread_node_state, n_threads - 1, + VLIB_NODE_STATE_DISABLED); + + /* find out desired node state on each thread */ + pool_foreach (rxq, im->hw_if_rx_queues) + { + u32 ti = rxq->thread_index; + + ASSERT (rxq->mode != VNET_HW_IF_RX_MODE_UNKNOWN); + ASSERT (rxq->mode != VNET_HW_IF_RX_MODE_DEFAULT); + + hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + + if (hi->input_node_index != node_index) + continue; + + if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING) + per_thread_node_state[ti] = VLIB_NODE_STATE_POLLING; + + if (per_thread_node_state[ti] == VLIB_NODE_STATE_POLLING) + continue; + + if (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT || + rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) + per_thread_node_state[ti] = VLIB_NODE_STATE_INTERRUPT; + } + + /* construct per-thread polling vectors */ + pool_foreach (rxq, im->hw_if_rx_queues) + { + u32 ti = rxq->thread_index; + uword flags; + + hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + + if (hi->input_node_index != node_index) + continue; + + flags = vnet_sw_interface_get_flags (vnm, hi->sw_if_index); + if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0) + { + log_debug ("skip interface %v (admin down)", hi->name); + continue; + } + + if (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT || + rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) + last_int = clib_max (last_int, rxq - im->hw_if_rx_queues); + + if (per_thread_node_state[ti] != VLIB_NODE_STATE_POLLING) + continue; + + vec_add2_aligned (d[ti], pv, 1, CLIB_CACHE_LINE_BYTES); + pv->dev_instance = rxq->dev_instance; + pv->queue_id = rxq->queue_id; + } + + /* sort poll vectors and compare them with active ones to avoid + * unnecesary barrier */ + for (int i = 0; i < n_threads; i++) + { + vlib_node_state_t old_state; + vec_sort_with_function (d[i], poll_data_sort); + + old_state = vlib_node_get_state (vlib_mains[i], node_index); + if (per_thread_node_state[i] != old_state) + { + something_changed = 1; + log_debug ("state changed for node %U on thread %u from %s to %s", + format_vlib_node_name, vm, node_index, i, + node_state_str[old_state], + node_state_str[per_thread_node_state[i]]); + } + + /* check if something changed */ + if (something_changed == 0) + { + vnet_hw_if_rx_node_runtime_t *rt; + rt = vlib_node_get_runtime_data (vlib_mains[i], node_index); + if (vec_len (rt->rxq_poll_vector) != vec_len (d[i])) + something_changed = 1; + else if (memcmp (d[i], rt->rxq_poll_vector, + vec_len (d[i]) * sizeof (*d))) + something_changed = 1; + if (clib_interrupt_get_n_int (rt->rxq_interrupts) != last_int + 1) + something_changed = 1; + } + } + + if (something_changed) + { + int with_barrier; + + if (vlib_worker_thread_barrier_held ()) + { + with_barrier = 0; + log_debug ("%s", "already running under the barrier"); + } + else + with_barrier = 1; + + if (with_barrier) + vlib_worker_thread_barrier_sync (vm); + + for (int i = 0; i < n_threads; i++) + { + vlib_main_t *vm = vlib_mains[i]; + vnet_hw_if_rx_node_runtime_t *rt; + rt = vlib_node_get_runtime_data (vm, node_index); + pv = rt->rxq_poll_vector; + rt->rxq_poll_vector = d[i]; + d[i] = pv; + + if (rt->rxq_interrupts) + { + void *in = rt->rxq_interrupts; + int int_num = -1; + while ((int_num = clib_interrupt_get_next (in, int_num)) != -1) + { + clib_interrupt_clear (in, int_num); + pending_int = clib_bitmap_set (pending_int, int_num, 1); + } + } + + vlib_node_set_state (vm, node_index, per_thread_node_state[i]); + + if (last_int >= 0) + clib_interrupt_resize (&rt->rxq_interrupts, last_int + 1); + else + clib_interrupt_free (&rt->rxq_interrupts); + } + + if (with_barrier) + vlib_worker_thread_barrier_release (vm); + } + else + log_debug ("skipping update of node '%U', no changes detected", + format_vlib_node_name, vm, node_index); + + if (pending_int) + { + int i; + clib_bitmap_foreach (i, pending_int) + { + vnet_hw_if_rx_queue_set_int_pending (vnm, i); + } + clib_bitmap_free (pending_int); + } + + for (int i = 0; i < n_threads; i++) + vec_free (d[i]); + + vec_free (d); + vec_free (per_thread_node_state); +} diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c new file mode 100644 index 00000000000..c0492dd555e --- /dev/null +++ b/src/vnet/interface/rx_queue.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = { + .class_name = "interface", + .subclass_name = "rx-queue", +}; + +#define log_debug(fmt, ...) vlib_log_debug (if_rxq_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__) + +static u32 +next_thread_index (vnet_main_t *vnm, u32 thread_index) +{ + vnet_device_main_t *vdm = &vnet_device_main; + if (vdm->first_worker_thread_index == 0) + return 0; + + if (thread_index != 0 && (thread_index < vdm->first_worker_thread_index || + thread_index > vdm->last_worker_thread_index)) + { + thread_index = vdm->next_worker_thread_index++; + if (vdm->next_worker_thread_index > vdm->last_worker_thread_index) + vdm->next_worker_thread_index = vdm->first_worker_thread_index; + } + + return thread_index; +} + +static u64 +rx_queue_key (u32 hw_if_index, u32 queue_id) +{ + return ((u64) hw_if_index << 32) | queue_id; +} + +u32 +vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index, + u32 queue_id) +{ + vnet_interface_main_t *im = &vnm->interface_main; + u64 key = rx_queue_key (hw_if_index, queue_id); + uword *p = hash_get_mem (im->rxq_index_by_hw_if_index_and_queue_id, &key); + return p ? p[0] : ~0; +} + +u32 +vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index, u32 queue_id, + u32 thread_index) +{ + vnet_interface_main_t *im = &vnm->interface_main; + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + vnet_hw_if_rx_queue_t *rxq; + u64 key = rx_queue_key (hw_if_index, queue_id); + u32 queue_index; + + if (hash_get_mem (im->rxq_index_by_hw_if_index_and_queue_id, &key)) + clib_panic ("Trying to register already registered queue id (%u) in the " + "interface %v\n", + queue_id, hi->name); + + thread_index = next_thread_index (vnm, thread_index); + + pool_get_zero (im->hw_if_rx_queues, rxq); + queue_index = rxq - im->hw_if_rx_queues; + vec_add1 (hi->rx_queue_indices, queue_index); + hash_set_mem_alloc (&im->rxq_index_by_hw_if_index_and_queue_id, &key, + queue_index); + rxq->hw_if_index = hw_if_index; + rxq->dev_instance = hi->dev_instance; + rxq->queue_id = queue_id; + rxq->thread_index = thread_index; + rxq->mode = VNET_HW_IF_RX_MODE_POLLING; + rxq->file_index = ~0; + + log_debug ("register: interface %s queue-id %u thread %u", hi->name, + queue_id, thread_index); + + return queue_index; +} + +void +vnet_hw_if_unregister_rx_queue (vnet_main_t *vnm, u32 queue_index) +{ + vnet_interface_main_t *im = &vnm->interface_main; + vnet_hw_if_rx_queue_t *rxq; + rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + u64 key; + + key = ((u64) rxq->hw_if_index << 32) | rxq->queue_id; + hash_unset_mem_free (&im->rxq_index_by_hw_if_index_and_queue_id, &key); + + for (int i = 0; i < vec_len (hi->rx_queue_indices); i++) + if (hi->rx_queue_indices[i] == queue_index) + { + vec_del1 (hi->rx_queue_indices, i); + break; + } + + log_debug ("unregister: interface %s queue-id %u", hi->name, rxq->queue_id); + pool_put_index (im->hw_if_rx_queues, queue_index); +} + +void +vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index) +{ + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + + log_debug ("unregister_all: interface %s", hi->name); + + for (int i = 0; i < vec_len (hi->rx_queue_indices); i++) + vnet_hw_if_unregister_rx_queue (vnm, hi->rx_queue_indices[i]); + + vec_free (hi->rx_queue_indices); +} + +void +vnet_hw_if_set_rx_queue_file_index (vnet_main_t *vnm, u32 queue_index, + u32 file_index) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + + rxq->file_index = file_index; + clib_file_set_polling_thread (&file_main, file_index, rxq->thread_index); + log_debug ("set_file_index: interface %s queue-id %u file-index %u", + hi->name, rxq->queue_id, file_index); +} + +void +vnet_hw_if_set_input_node (vnet_main_t *vnm, u32 hw_if_index, u32 node_index) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->input_node_index = node_index; + log_debug ("set_input_node: node %U for interface %s", format_vlib_node_name, + vm, node_index, hi->name); +} + +int +vnet_hw_if_set_rx_queue_mode (vnet_main_t *vnm, u32 queue_index, + vnet_hw_if_rx_mode mode) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + vnet_device_class_t *dc = vnet_get_device_class (vnm, hi->dev_class_index); + + ASSERT (mode != VNET_HW_IF_RX_MODE_UNKNOWN); + + if (mode == VNET_HW_IF_RX_MODE_DEFAULT) + mode = hi->default_rx_mode; + + if (rxq->mode == mode) + { + log_debug ("set_rx_queue_mode: interface %s queue-id %u mode " + "unchanged (%U)", + hi->name, rxq->queue_id, format_vnet_hw_if_rx_mode, mode); + return 0; + } + + if (dc->rx_mode_change_function) + { + clib_error_t *err = dc->rx_mode_change_function (vnm, rxq->hw_if_index, + rxq->queue_id, mode); + if (err) + { + log_err ("setting rx mode on the interface %s queue-id %u failed.\n" + " %U", + hi->name, rxq->queue_id, format_clib_error, err); + clib_error_free (err); + return VNET_API_ERROR_UNSUPPORTED; + } + } + + rxq->mode = mode; + log_debug ("set_rx_queue_mode: interface %s queue-id %u mode set to %U", + hi->name, rxq->queue_id, format_vnet_hw_if_rx_mode, mode); + return 0; +} + +vnet_hw_if_rx_mode +vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm, u32 queue_index) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + return rxq->mode; +} + +void +vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index, + u32 thread_index) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + + rxq->thread_index = thread_index; + + if (rxq->file_index != ~0) + clib_file_set_polling_thread (&file_main, rxq->file_index, thread_index); + + log_debug ("set_rx_queue_thread_index: interface %s queue-id %u " + "thread-index set to %u", + hi->name, rxq->queue_id, thread_index); +} + +void +vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm, + vlib_node_runtime_t *node) +{ + vnet_hw_if_rx_node_runtime_t *rt = (void *) node->runtime_data; + vnet_main_t *vnm = vnet_get_main (); + int int_num = -1; + + ASSERT (node->state == VLIB_NODE_STATE_INTERRUPT); + + vec_reset_length (rt->rxq_poll_vector); + + while ((int_num = clib_interrupt_get_next (rt->rxq_interrupts, int_num)) != + -1) + { + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, int_num); + vnet_hw_if_rxq_poll_vector_t *pv; + + clib_interrupt_clear (rt->rxq_interrupts, int_num); + + vec_add2 (rt->rxq_poll_vector, pv, 1); + pv->dev_instance = rxq->dev_instance; + pv->queue_id = rxq->queue_id; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/interface/rx_queue_funcs.h b/src/vnet/interface/rx_queue_funcs.h new file mode 100644 index 00000000000..08d717bb68c --- /dev/null +++ b/src/vnet/interface/rx_queue_funcs.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +/* funciton declarations */ + +u32 vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index, + u32 queue_id); +u32 vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index, + u32 queue_id, u32 thread_idnex); +void vnet_hw_if_unregister_rx_queue (vnet_main_t *vnm, u32 queue_index); +void vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index); +void vnet_hw_if_set_rx_queue_file_index (vnet_main_t *vnm, u32 queue_index, + u32 file_index); +void vnet_hw_if_set_input_node (vnet_main_t *vnm, u32 hw_if_index, + u32 node_index); +int vnet_hw_if_set_rx_queue_mode (vnet_main_t *vnm, u32 queue_index, + vnet_hw_if_rx_mode mode); +vnet_hw_if_rx_mode vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm, + u32 queue_index); +void vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index, + u32 thread_index); +void vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index); +void vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm, + vlib_node_runtime_t *node); + +/* inline functions */ + +static_always_inline vnet_hw_if_rx_queue_t * +vnet_hw_if_get_rx_queue (vnet_main_t *vnm, u32 queue_index) +{ + vnet_interface_main_t *im = &vnm->interface_main; + if (pool_is_free_index (im->hw_if_rx_queues, queue_index)) + return 0; + return pool_elt_at_index (im->hw_if_rx_queues, queue_index); +} + +static_always_inline void +vnet_hw_if_rx_queue_set_int_pending (vnet_main_t *vnm, u32 queue_index) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + vlib_main_t *vm = vlib_mains[rxq->thread_index]; + + vnet_hw_if_rx_node_runtime_t *rt; + rt = vlib_node_get_runtime_data (vm, hi->input_node_index); + if (vm == vlib_get_main ()) + clib_interrupt_set (rt->rxq_interrupts, queue_index); + else + clib_interrupt_set_atomic (rt->rxq_interrupts, queue_index); + vlib_node_set_interrupt_pending (vm, hi->input_node_index); +} + +static_always_inline vnet_hw_if_rxq_poll_vector_t * +vnet_hw_if_get_rxq_poll_vector (vlib_main_t *vm, vlib_node_runtime_t *node) +{ + vnet_hw_if_rx_node_runtime_t *rt = (void *) node->runtime_data; + + if (PREDICT_FALSE (node->state == VLIB_NODE_STATE_INTERRUPT)) + vnet_hw_if_generate_rxq_int_poll_vector (vm, node); + + return rt->rxq_poll_vector; +} + +static_always_inline u8 +vnet_hw_if_get_rx_queue_numa_node (vnet_main_t *vnm, u32 queue_index) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index); + return hi->numa_node; +} + +static_always_inline u32 +vnet_hw_if_get_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index) +{ + vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index); + return rxq->thread_index; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index a5b9d63205f..03e7436fead 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -53,6 +53,7 @@ #include #include #include +#include static int compare_interface_names (void *a1, void *a2) @@ -1537,24 +1538,55 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index, { clib_error_t *error = 0; vnet_hw_interface_t *hw; + u32 *queue_indices = 0; int i; hw = vnet_get_hw_interface (vnm, hw_if_index); - if (queue_id_valid == 0) + /* to be deprecated */ + if (vec_len (hw->rx_queue_indices) == 0) { - for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) + if (queue_id_valid == 0) { - error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); - if (error) - break; + for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) + { + error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); + if (error) + break; + } + hw->default_rx_mode = mode; } - hw->default_rx_mode = mode; + else + error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode); + + return (error); + } + + if (queue_id_valid) + { + u32 queue_index; + queue_index = + vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index, queue_id); + if (queue_index == ~0) + return clib_error_return (0, "unknown queue %u on interface %s", + queue_id, hw->name); + vec_add1 (queue_indices, queue_index); } else - error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode); + queue_indices = hw->rx_queue_indices; - return (error); + for (int i = 0; i < vec_len (queue_indices); i++) + { + int rv = vnet_hw_if_set_rx_queue_mode (vnm, queue_indices[i], mode); + if (rv) + goto done; + } + +done: + if (queue_indices != hw->rx_queue_indices) + vec_free (queue_indices); + vnet_hw_if_update_runtime_data (vnm, hw_if_index); + return error; } static clib_error_t * @@ -1733,8 +1765,8 @@ set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id, { vnet_main_t *vnm = vnet_get_main (); vnet_device_main_t *vdm = &vnet_device_main; - clib_error_t *error = 0; - vnet_hw_if_rx_mode mode = VNET_HW_IF_RX_MODE_UNKNOWN; + vnet_hw_interface_t *hw; + u32 queue_index; int rv; if (is_main) @@ -1746,21 +1778,38 @@ set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id, return clib_error_return (0, "please specify valid worker thread or main"); - rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &mode); + hw = vnet_get_hw_interface (vnm, hw_if_index); + + /* to be deprecated */ + if (vec_len (hw->rx_queue_indices) == 0) + { + clib_error_t *error = 0; + vnet_hw_if_rx_mode mode = VNET_HW_IF_RX_MODE_UNKNOWN; + rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &mode); - if (rv) - return clib_error_return (0, "not found"); + if (rv) + return clib_error_return (0, "not found"); - rv = vnet_hw_interface_unassign_rx_thread (vnm, hw_if_index, queue_id); + rv = vnet_hw_interface_unassign_rx_thread (vnm, hw_if_index, queue_id); - if (rv) - return clib_error_return (0, "not found"); + if (rv) + return clib_error_return (0, "not found"); - vnet_hw_interface_assign_rx_thread (vnm, hw_if_index, queue_id, - thread_index); - vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode); + vnet_hw_interface_assign_rx_thread (vnm, hw_if_index, queue_id, + thread_index); + vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode); - return (error); + return (error); + } + + queue_index = + vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index, queue_id); + if (queue_index == ~0) + return clib_error_return (0, "unknown queue %u on interface %s", queue_id, + hw->name); + vnet_hw_if_set_rx_queue_thread_index (vnm, queue_index, thread_index); + vnet_hw_if_update_runtime_data (vnm, hw_if_index); + return 0; } static clib_error_t * diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c index 9038e5d96e7..dc2edff7c67 100644 --- a/src/vnet/interface_format.c +++ b/src/vnet/interface_format.c @@ -42,6 +42,7 @@ #include #include #include +#include u8 * format_vtr (u8 * s, va_list * args) @@ -192,6 +193,21 @@ format_vnet_hw_interface (u8 * s, va_list * args) s = format (s, "\n%ULink speed: %U", format_white_space, indent + 2, format_vnet_hw_interface_link_speed, hi->link_speed); + if (vec_len (hi->rx_queue_indices)) + { + s = format (s, "\n%URX Queues:", format_white_space, indent + 2); + s = format (s, "\n%U%-6s%-15s%-10s", format_white_space, indent + 4, + "queue", "thread", "mode"); + for (int i = 0; i < vec_len (hi->rx_queue_indices); i++) + { + vnet_hw_if_rx_queue_t *rxq; + rxq = vnet_hw_if_get_rx_queue (vnm, hi->rx_queue_indices[i]); + s = format (s, "\n%U%-6u%-15U%-10U", format_white_space, indent + 4, + rxq->queue_id, format_vlib_thread_name_and_index, + rxq->thread_index, format_vnet_hw_if_rx_mode, rxq->mode); + } + } + if (hi->rss_queues) { s = format (s, "\n%URSS queues: %U", format_white_space, indent + 2, diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 200cfb1eded..3dc99cc9db6 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -59,6 +59,7 @@ set(VPPINFRA_SRCS graph.c hash.c heap.c + interrupt.c longjmp.S macros.c maplog.c @@ -134,6 +135,7 @@ set(VPPINFRA_HEADERS graph.h hash.h heap.h + interrupt.h lb_hash_hash.h llist.h lock.h diff --git a/src/vppinfra/interrupt.c b/src/vppinfra/interrupt.c new file mode 100644 index 00000000000..20b7450ceed --- /dev/null +++ b/src/vppinfra/interrupt.c @@ -0,0 +1,92 @@ + +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include /* for count_set_bits */ +#include +#include +#include + +__clib_export void +clib_interrupt_init (void **data, uword n_int) +{ + clib_interrupt_header_t *h; + uword sz = sizeof (clib_interrupt_header_t); + uword data_size = round_pow2 (n_int, CLIB_CACHE_LINE_BYTES * 8) / 8; + + sz += 2 * data_size; + h = data[0] = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES); + clib_memset (data[0], 0, sz); + h->n_int = n_int; + h->n_uword_alloc = (data_size * 8) >> log2_uword_bits; +} + +__clib_export void +clib_interrupt_resize (void **data, uword n_int) +{ + clib_interrupt_header_t *h = data[0]; + + if (data[0] == 0) + { + clib_interrupt_init (data, n_int); + return; + } + + if (n_int < h->n_int) + { + uword *old_bmp, *old_abp, v; + old_bmp = clib_interrupt_get_bitmap (data[0]); + old_abp = clib_interrupt_get_atomic_bitmap (data[0]); + for (uword i = 0; i < h->n_uword_alloc; i++) + { + v = old_abp[i]; + old_abp[i] = 0; + if (n_int > ((i + 1) * uword_bits)) + old_bmp[i] |= v; + else if (n_int > (i * uword_bits)) + old_bmp[i] = (old_bmp[i] | v) & pow2_mask (n_int - i * uword_bits); + else + old_bmp[i] = 0; + } + } + else if (n_int > h->n_uword_alloc * uword_bits) + { + void *old = data[0]; + uword *old_bmp, *old_abp, *new_bmp; + uword n_uwords = round_pow2 (h->n_int, uword_bits) / uword_bits; + + clib_interrupt_init (data, n_int); + h = data[0]; + + new_bmp = clib_interrupt_get_bitmap (data[0]); + old_bmp = clib_interrupt_get_bitmap (old); + old_abp = clib_interrupt_get_atomic_bitmap (old); + + for (uword i = 0; i < n_uwords; i++) + new_bmp[i] = old_bmp[i] | old_abp[i]; + + clib_mem_free (old); + } + h->n_int = n_int; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/interrupt.h b/src/vppinfra/interrupt.h new file mode 100644 index 00000000000..60c01fa0248 --- /dev/null +++ b/src/vppinfra/interrupt.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_clib_interrupt_h +#define included_clib_interrupt_h + +#include +#include /* for count_set_bits */ +#include + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + int n_int; + uword n_uword_alloc; +} clib_interrupt_header_t; + +void clib_interrupt_init (void **data, uword n_interrupts); +void clib_interrupt_resize (void **data, uword n_interrupts); + +static_always_inline void +clib_interrupt_free (void **data) +{ + if (data[0]) + { + clib_mem_free (data[0]); + data[0] = 0; + } +} + +static_always_inline int +clib_interrupt_get_n_int (void *d) +{ + clib_interrupt_header_t *h = d; + if (h) + return h->n_int; + return 0; +} + +static_always_inline uword * +clib_interrupt_get_bitmap (void *d) +{ + return d + sizeof (clib_interrupt_header_t); +} + +static_always_inline uword * +clib_interrupt_get_atomic_bitmap (void *d) +{ + clib_interrupt_header_t *h = d; + return clib_interrupt_get_bitmap (d) + h->n_uword_alloc; +} + +static_always_inline void +clib_interrupt_set (void *in, int int_num) +{ + uword *bmp = clib_interrupt_get_bitmap (in); + uword mask = 1ULL << (int_num & (uword_bits - 1)); + bmp += int_num >> log2_uword_bits; + + ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int); + + *bmp |= mask; +} + +static_always_inline void +clib_interrupt_set_atomic (void *in, int int_num) +{ + uword *bmp = clib_interrupt_get_atomic_bitmap (in); + uword mask = 1ULL << (int_num & (uword_bits - 1)); + bmp += int_num >> log2_uword_bits; + + ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int); + + __atomic_fetch_or (bmp, mask, __ATOMIC_RELAXED); +} + +static_always_inline void +clib_interrupt_clear (void *in, int int_num) +{ + uword *bmp = clib_interrupt_get_bitmap (in); + uword *abm = clib_interrupt_get_atomic_bitmap (in); + uword mask = 1ULL << (int_num & (uword_bits - 1)); + uword off = int_num >> log2_uword_bits; + + ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int); + + bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST); + bmp[off] &= ~mask; +} + +static_always_inline int +clib_interrupt_get_next (void *in, int last) +{ + uword *bmp = clib_interrupt_get_bitmap (in); + uword *abm = clib_interrupt_get_atomic_bitmap (in); + clib_interrupt_header_t *h = in; + uword bmp_uword, off; + + ASSERT (last >= -1 && last < h->n_int); + + off = (last + 1) >> log2_uword_bits; + + last -= off << log2_uword_bits; + bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST); + bmp_uword = bmp[off] & ~pow2_mask (last + 1); + +next: + if (bmp_uword) + return (off << log2_uword_bits) + count_trailing_zeros (bmp_uword); + + off++; + + if (off > h->n_int >> log2_uword_bits) + return -1; + + bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST); + bmp_uword = bmp[off]; + + goto next; +} + +#endif /* included_clib_interrupt_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- 2.16.6