vhost-user: avoid per-packet feature arc lookups
[vpp.git] / src / vnet / devices / virtio / vhost_user_input.c
index 2614048..97f6792 100644 (file)
@@ -101,7 +101,7 @@ vhost_user_rx_trace (vhost_trace_t * t,
   virtio_net_hdr_mrg_rxbuf_t *hdr;
   u32 hint = 0;
 
-  memset (t, 0, sizeof (*t));
+  clib_memset (t, 0, sizeof (*t));
   t->device_index = vui - vum->vhost_user_interfaces;
   t->qid = qid;
 
@@ -162,8 +162,8 @@ vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
          CLIB_PREFETCH (src2, 64, LOAD);
          CLIB_PREFETCH (src3, 64, LOAD);
 
-         clib_memcpy ((void *) cpy[0].dst, src0, cpy[0].len);
-         clib_memcpy ((void *) cpy[1].dst, src1, cpy[1].len);
+         clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
+         clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
          copy_len -= 2;
          cpy += 2;
        }
@@ -172,7 +172,7 @@ vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
     {
       if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
        return 1;
-      clib_memcpy ((void *) cpy->dst, src0, cpy->len);
+      clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
       copy_len -= 1;
       cpy += 1;
     }
@@ -249,6 +249,7 @@ vhost_user_if_input (vlib_main_t * vm,
                     vnet_hw_interface_rx_mode mode)
 {
   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+  vnet_feature_main_t *fm = &feature_main;
   u16 n_rx_packets = 0;
   u32 n_rx_bytes = 0;
   u16 n_left;
@@ -256,8 +257,10 @@ vhost_user_if_input (vlib_main_t * vm,
   u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
   u32 n_trace = vlib_get_trace_count (vm, node);
   u32 map_hint = 0;
-  u16 thread_index = vm->thread_index;
+  vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
   u16 copy_len = 0;
+  u8 feature_arc_idx = fm->device_input_feature_arc_index;
+  u32 current_config_index = ~(u32) 0;
 
   /* The descriptor table is not ready yet */
   if (PREDICT_FALSE (txvq->avail == 0))
@@ -338,40 +341,47 @@ vhost_user_if_input (vlib_main_t * vm,
    * processing cost really comes from the memory copy.
    * The assumption is that big packets will fit in 40 buffers.
    */
-  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 ||
-                    vum->cpus[thread_index].rx_buffers_len < 40))
+  if (PREDICT_FALSE (cpu->rx_buffers_len < n_left + 1 ||
+                    cpu->rx_buffers_len < 40))
     {
-      u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
-      vum->cpus[thread_index].rx_buffers_len +=
-       vlib_buffer_alloc_from_free_list (vm,
-                                         vum->cpus[thread_index].rx_buffers +
-                                         curr_len,
+      u32 curr_len = cpu->rx_buffers_len;
+      cpu->rx_buffers_len +=
+       vlib_buffer_alloc_from_free_list (vm, cpu->rx_buffers + curr_len,
                                          VHOST_USER_RX_BUFFERS_N - curr_len,
                                          VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
       if (PREDICT_FALSE
-         (vum->cpus[thread_index].rx_buffers_len <
-          VHOST_USER_RX_BUFFER_STARVATION))
+         (cpu->rx_buffers_len < VHOST_USER_RX_BUFFER_STARVATION))
        {
          /* In case of buffer starvation, discard some packets from the queue
           * and log the event.
           * We keep doing best effort for the remaining packets. */
-         u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
-           n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
+         u32 flush = (n_left + 1 > cpu->rx_buffers_len) ?
+           n_left + 1 - cpu->rx_buffers_len : 1;
          flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
 
          n_left -= flush;
          vlib_increment_simple_counter (vnet_main.
                                         interface_main.sw_if_counters +
                                         VNET_INTERFACE_COUNTER_DROP,
-                                        vlib_get_thread_index (),
-                                        vui->sw_if_index, flush);
+                                        vm->thread_index, vui->sw_if_index,
+                                        flush);
 
          vlib_error_count (vm, vhost_user_input_node.index,
                            VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
        }
     }
 
+  if (PREDICT_FALSE (vnet_have_features (feature_arc_idx, vui->sw_if_index)))
+    {
+      vnet_feature_config_main_t *cm;
+      cm = &fm->feature_config_mains[feature_arc_idx];
+      current_config_index = vec_elt (cm->config_index_by_sw_if_index,
+                                     vui->sw_if_index);
+      vnet_get_config_data (&cm->config_main, &current_config_index,
+                           &next_index, 0);
+    }
+
   while (n_left > 0)
     {
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -384,7 +394,7 @@ vhost_user_if_input (vlib_main_t * vm,
          u32 desc_data_offset;
          vring_desc_t *desc_table = txvq->desc;
 
-         if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
+         if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
            {
              /* Not enough rx_buffers
               * Note: We yeld on 1 so we don't need to do an additional
@@ -396,19 +406,15 @@ vhost_user_if_input (vlib_main_t * vm,
 
          desc_current =
            txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
-         vum->cpus[thread_index].rx_buffers_len--;
-         bi_current = (vum->cpus[thread_index].rx_buffers)
-           [vum->cpus[thread_index].rx_buffers_len];
+         cpu->rx_buffers_len--;
+         bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
          b_head = b_current = vlib_get_buffer (vm, bi_current);
          to_next[0] = bi_current;      //We do that now so we can forget about bi_current
          to_next++;
          n_left_to_next--;
 
-         vlib_prefetch_buffer_with_index (vm,
-                                          (vum->
-                                           cpus[thread_index].rx_buffers)
-                                          [vum->cpus[thread_index].
-                                           rx_buffers_len - 1], LOAD);
+         vlib_prefetch_buffer_with_index
+           (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD);
 
          /* Just preset the used descriptor id and length for later */
          txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
@@ -483,8 +489,7 @@ vhost_user_if_input (vlib_main_t * vm,
              if (PREDICT_FALSE
                  (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
                {
-                 if (PREDICT_FALSE
-                     (vum->cpus[thread_index].rx_buffers_len == 0))
+                 if (PREDICT_FALSE (cpu->rx_buffers_len == 0))
                    {
                      /* Cancel speculation */
                      to_next--;
@@ -497,19 +502,14 @@ vhost_user_if_input (vlib_main_t * vm,
                       * not an issue as they would still be valid. Useless,
                       * but valid.
                       */
-                     vhost_user_input_rewind_buffers (vm,
-                                                      &vum->cpus
-                                                      [thread_index],
-                                                      b_head);
+                     vhost_user_input_rewind_buffers (vm, cpu, b_head);
                      n_left = 0;
                      goto stop;
                    }
 
                  /* Get next output */
-                 vum->cpus[thread_index].rx_buffers_len--;
-                 u32 bi_next =
-                   (vum->cpus[thread_index].rx_buffers)[vum->cpus
-                                                        [thread_index].rx_buffers_len];
+                 cpu->rx_buffers_len--;
+                 u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len];
                  b_current->next_buffer = bi_next;
                  b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
                  bi_current = bi_next;
@@ -517,7 +517,7 @@ vhost_user_if_input (vlib_main_t * vm,
                }
 
              /* Prepare a copy order executed later for the data */
-             vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
+             vhost_copy_t *cpy = &cpu->copy[copy_len];
              copy_len++;
              u32 desc_data_l =
                desc_table[desc_current].len - desc_data_offset;
@@ -552,18 +552,11 @@ vhost_user_if_input (vlib_main_t * vm,
          vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
          b_head->error = 0;
 
-         {
-           u32 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-
-           /* redirect if feature path enabled */
-           vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0,
-                                               b_head);
-
-           u32 bi = to_next[-1];       //Cannot use to_next[-1] in the macro
-           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                            to_next, n_left_to_next,
-                                            bi, next0);
-         }
+         if (current_config_index != ~(u32) 0)
+           {
+             b_head->current_config_index = current_config_index;
+             vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
+           }
 
          n_left--;
 
@@ -574,9 +567,8 @@ vhost_user_if_input (vlib_main_t * vm,
           */
          if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
            {
-             if (PREDICT_FALSE
-                 (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
-                                         copy_len, &map_hint)))
+             if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy,
+                                                       copy_len, &map_hint)))
                {
                  vlib_error_count (vm, node->node_index,
                                    VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
@@ -594,9 +586,8 @@ vhost_user_if_input (vlib_main_t * vm,
     }
 
   /* Do the memory copies */
-  if (PREDICT_FALSE
-      (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
-                             copy_len, &map_hint)))
+  if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len,
+                                           &map_hint)))
     {
       vlib_error_count (vm, node->node_index,
                        VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
@@ -620,10 +611,10 @@ vhost_user_if_input (vlib_main_t * vm,
   /* increase rx counters */
   vlib_increment_combined_counter
     (vnet_main.interface_main.combined_sw_if_counters
-     + VNET_INTERFACE_COUNTER_RX,
-     vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+     + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
+     n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+  vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -641,13 +632,13 @@ VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
 
   vec_foreach (dq, rt->devices_and_queues)
   {
-    if (clib_smp_swap (&dq->interrupt_pending, 0) ||
-       (node->state == VLIB_NODE_STATE_POLLING))
+    if ((node->state == VLIB_NODE_STATE_POLLING) ||
+       clib_atomic_swap_acq_n (&dq->interrupt_pending, 0))
       {
        vui =
          pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
-       n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
-                                           dq->mode);
+       n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
+                                            dq->mode);
       }
   }