vlib: multiarch vlib_frame_queue_dequeue() 85/32185/3
authorDamjan Marion <damarion@cisco.com>
Sat, 1 May 2021 12:56:13 +0000 (14:56 +0200)
committerDamjan Marion <dmarion@me.com>
Mon, 3 May 2021 11:09:38 +0000 (11:09 +0000)
Fact that it needs to copy buffer indices justifies this move.

Type: improvement
Change-Id: I5eb815ccc4cca0ef70b092eb83a49b713efdcbeb
Signed-off-by: Damjan Marion <damarion@cisco.com>
src/vlib/buffer_funcs.c
src/vlib/buffer_funcs.h
src/vlib/main.c
src/vlib/threads.c
src/vlib/threads.h

index a0edd7e..fcef2d8 100644 (file)
@@ -361,6 +361,127 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
 
 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
 
+/*
+ * Check the frame queue to see if any frames are available.
+ * If so, pull the packets off the frames and put them to
+ * the handoff node.
+ */
+u32 __clib_section (".vlib_frame_queue_dequeue_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+  u32 thread_id = vm->thread_index;
+  vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
+  vlib_frame_queue_elt_t *elt;
+  u32 *from, *to;
+  vlib_frame_t *f;
+  int msg_type;
+  int processed = 0;
+  u32 vectors = 0;
+
+  ASSERT (fq);
+  ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
+
+  if (PREDICT_FALSE (fqm->node_index == ~0))
+    return 0;
+  /*
+   * Gather trace data for frame queues
+   */
+  if (PREDICT_FALSE (fq->trace))
+    {
+      frame_queue_trace_t *fqt;
+      frame_queue_nelt_counter_t *fqh;
+      u32 elix;
+
+      fqt = &fqm->frame_queue_traces[thread_id];
+
+      fqt->nelts = fq->nelts;
+      fqt->head = fq->head;
+      fqt->head_hint = fq->head_hint;
+      fqt->tail = fq->tail;
+      fqt->threshold = fq->vector_threshold;
+      fqt->n_in_use = fqt->tail - fqt->head;
+      if (fqt->n_in_use >= fqt->nelts)
+       {
+         // if beyond max then use max
+         fqt->n_in_use = fqt->nelts - 1;
+       }
+
+      /* Record the number of elements in use in the histogram */
+      fqh = &fqm->frame_queue_histogram[thread_id];
+      fqh->count[fqt->n_in_use]++;
+
+      /* Record a snapshot of the elements in use */
+      for (elix = 0; elix < fqt->nelts; elix++)
+       {
+         elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
+         if (1 || elt->valid)
+           {
+             fqt->n_vectors[elix] = elt->n_vectors;
+           }
+       }
+      fqt->written = 1;
+    }
+
+  while (1)
+    {
+      vlib_buffer_t *b;
+      if (fq->head == fq->tail)
+       {
+         fq->head_hint = fq->head;
+         return processed;
+       }
+
+      elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
+
+      if (!elt->valid)
+       {
+         fq->head_hint = fq->head;
+         return processed;
+       }
+
+      from = elt->buffer_index;
+      msg_type = elt->msg_type;
+
+      ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
+      ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
+
+      f = vlib_get_frame_to_node (vm, fqm->node_index);
+
+      /* If the first vector is traced, set the frame trace flag */
+      b = vlib_get_buffer (vm, from[0]);
+      if (b->flags & VLIB_BUFFER_IS_TRACED)
+       f->frame_flags |= VLIB_NODE_FLAG_TRACE;
+
+      to = vlib_frame_vector_args (f);
+
+      vlib_buffer_copy_indices (to, from, elt->n_vectors);
+
+      vectors += elt->n_vectors;
+      f->n_vectors = elt->n_vectors;
+      vlib_put_frame_to_node (vm, fqm->node_index, f);
+
+      elt->valid = 0;
+      elt->n_vectors = 0;
+      elt->msg_type = 0xfefefefe;
+      CLIB_MEMORY_BARRIER ();
+      fq->head++;
+      processed++;
+
+      /*
+       * Limit the number of packets pushed into the graph
+       */
+      if (vectors >= fq->vector_threshold)
+       {
+         fq->head_hint = fq->head;
+         return processed;
+       }
+    }
+  ASSERT (0);
+  return processed;
+}
+CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
+
 #ifndef CLIB_MARCH_VARIANT
 vlib_buffer_func_main_t vlib_buffer_func_main;
 
@@ -374,6 +495,8 @@ vlib_buffer_funcs_init (vlib_main_t *vm)
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
   bfm->buffer_enqueue_to_thread_fn =
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
+  bfm->frame_queue_dequeue_fn =
+    CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
   return 0;
 }
 
index 7829986..d579e1b 100644 (file)
@@ -62,11 +62,16 @@ typedef void (vlib_buffer_enqueue_to_single_next_fn_t) (
 typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
   vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices,
   u16 *thread_indices, u32 n_packets, int drop_on_congestion);
+
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
+                                            vlib_frame_queue_main_t *fqm);
+
 typedef struct
 {
   vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
   vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
   vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
+  vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
 } vlib_buffer_func_main_t;
 
 extern vlib_buffer_func_main_t vlib_buffer_func_main;
index f833aa2..c7c4aba 100644 (file)
@@ -1581,6 +1581,8 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
       if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter))
        {
          u32 processed = 0;
+         vlib_frame_queue_dequeue_fn_t *fn =
+           vlib_buffer_func_main.frame_queue_dequeue_fn;
 
          if (vm->check_frame_queues)
            {
@@ -1589,7 +1591,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
            }
 
          vec_foreach (fqm, tm->frame_queue_mains)
-           processed += vlib_frame_queue_dequeue (vm, fqm);
+           processed += (fn) (vm, fqm);
 
          /* No handoff queue work found? */
          if (processed)
index 11d5a72..c65460e 100644 (file)
@@ -1644,145 +1644,6 @@ vlib_worker_wait_one_loop (void)
   return;
 }
 
-/*
- * Check the frame queue to see if any frames are available.
- * If so, pull the packets off the frames and put them to
- * the handoff node.
- */
-int
-vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
-{
-  u32 thread_id = vm->thread_index;
-  vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
-  vlib_frame_queue_elt_t *elt;
-  u32 *from, *to;
-  vlib_frame_t *f;
-  int msg_type;
-  int processed = 0;
-  u32 n_left_to_node;
-  u32 vectors = 0;
-
-  ASSERT (fq);
-  ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
-
-  if (PREDICT_FALSE (fqm->node_index == ~0))
-    return 0;
-  /*
-   * Gather trace data for frame queues
-   */
-  if (PREDICT_FALSE (fq->trace))
-    {
-      frame_queue_trace_t *fqt;
-      frame_queue_nelt_counter_t *fqh;
-      u32 elix;
-
-      fqt = &fqm->frame_queue_traces[thread_id];
-
-      fqt->nelts = fq->nelts;
-      fqt->head = fq->head;
-      fqt->head_hint = fq->head_hint;
-      fqt->tail = fq->tail;
-      fqt->threshold = fq->vector_threshold;
-      fqt->n_in_use = fqt->tail - fqt->head;
-      if (fqt->n_in_use >= fqt->nelts)
-       {
-         // if beyond max then use max
-         fqt->n_in_use = fqt->nelts - 1;
-       }
-
-      /* Record the number of elements in use in the histogram */
-      fqh = &fqm->frame_queue_histogram[thread_id];
-      fqh->count[fqt->n_in_use]++;
-
-      /* Record a snapshot of the elements in use */
-      for (elix = 0; elix < fqt->nelts; elix++)
-       {
-         elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
-         if (1 || elt->valid)
-           {
-             fqt->n_vectors[elix] = elt->n_vectors;
-           }
-       }
-      fqt->written = 1;
-    }
-
-  while (1)
-    {
-      vlib_buffer_t *b;
-      if (fq->head == fq->tail)
-       {
-         fq->head_hint = fq->head;
-         return processed;
-       }
-
-      elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
-
-      if (!elt->valid)
-       {
-         fq->head_hint = fq->head;
-         return processed;
-       }
-
-      from = elt->buffer_index;
-      msg_type = elt->msg_type;
-
-      ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
-      ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
-
-      f = vlib_get_frame_to_node (vm, fqm->node_index);
-
-      /* If the first vector is traced, set the frame trace flag */
-      b = vlib_get_buffer (vm, from[0]);
-      if (b->flags & VLIB_BUFFER_IS_TRACED)
-       f->frame_flags |= VLIB_NODE_FLAG_TRACE;
-
-      to = vlib_frame_vector_args (f);
-
-      n_left_to_node = elt->n_vectors;
-
-      while (n_left_to_node >= 4)
-       {
-         to[0] = from[0];
-         to[1] = from[1];
-         to[2] = from[2];
-         to[3] = from[3];
-         to += 4;
-         from += 4;
-         n_left_to_node -= 4;
-       }
-
-      while (n_left_to_node > 0)
-       {
-         to[0] = from[0];
-         to++;
-         from++;
-         n_left_to_node--;
-       }
-
-      vectors += elt->n_vectors;
-      f->n_vectors = elt->n_vectors;
-      vlib_put_frame_to_node (vm, fqm->node_index, f);
-
-      elt->valid = 0;
-      elt->n_vectors = 0;
-      elt->msg_type = 0xfefefefe;
-      CLIB_MEMORY_BARRIER ();
-      fq->head++;
-      processed++;
-
-      /*
-       * Limit the number of packets pushed into the graph
-       */
-      if (vectors >= fq->vector_threshold)
-       {
-         fq->head_hint = fq->head;
-         return processed;
-       }
-    }
-  ASSERT (0);
-  return processed;
-}
-
 void
 vlib_worker_thread_fn (void *arg)
 {
index 1da9d22..9d9d387 100644 (file)
@@ -179,9 +179,6 @@ int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
                              u32 frame_queue_index, vlib_frame_t * frame,
                              vlib_frame_queue_msg_type_t type);
 
-int
-vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm);
-
 void vlib_worker_thread_node_runtime_update (void);
 
 void vlib_create_worker_threads (vlib_main_t * vm, int n,