vlib: implement aux data handoff 66/36166/4
authorMohammed Hawari <mohammed@hawari.fr>
Wed, 18 May 2022 08:08:47 +0000 (10:08 +0200)
committerDamjan Marion <dmarion@me.com>
Tue, 24 May 2022 14:03:00 +0000 (14:03 +0000)
Type: improvement
Change-Id: I20b41537a249a55f01004e45392b34adaa8fd792
Signed-off-by: Mohammed Hawari <mohammed@hawari.fr>
src/vlib/buffer_funcs.c
src/vlib/buffer_funcs.h
src/vlib/buffer_node.h
src/vlib/main.c
src/vlib/threads.c
src/vlib/threads.h
src/vppinfra/cpu.h

index 32c2d1b..4ad652b 100644 (file)
@@ -202,7 +202,8 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm,
                                      vlib_node_runtime_t *node,
                                      vlib_frame_queue_main_t *fqm,
                                      u32 *buffer_indices, u16 *thread_indices,
-                                     u32 n_packets, int drop_on_congestion)
+                                     u32 n_packets, int drop_on_congestion,
+                                     int with_aux, u32 *aux_data)
 {
   u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0;
   vlib_frame_bitmap_t mask, used_elts = {};
@@ -218,6 +219,9 @@ more:
 
   n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop,
                              buffer_indices, mask, n_packets);
+  if (with_aux)
+    clib_compress_u32 (hf ? hf->aux_data : drop_list + n_drop, aux_data, mask,
+                      n_packets);
 
   if (hf)
     {
@@ -269,7 +273,7 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
     {
       n_enq += vlib_buffer_enqueue_to_thread_inline (
        vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
-       drop_on_congestion);
+       drop_on_congestion, 0 /* with_aux */, NULL);
       buffer_indices += VLIB_FRAME_SIZE;
       thread_indices += VLIB_FRAME_SIZE;
       n_packets -= VLIB_FRAME_SIZE;
@@ -278,24 +282,58 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
   if (n_packets == 0)
     return n_enq;
 
-  n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices,
-                                                thread_indices, n_packets,
-                                                drop_on_congestion);
+  n_enq += vlib_buffer_enqueue_to_thread_inline (
+    vm, node, fqm, buffer_indices, thread_indices, n_packets,
+    drop_on_congestion, 0 /* with_aux */, NULL);
+
+  return n_enq;
+}
+
+u32 __clib_section (".vlib_buffer_enqueue_to_thread_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_with_aux_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+ u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets,
+ int drop_on_congestion)
+{
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vlib_frame_queue_main_t *fqm;
+  u32 n_enq = 0;
+
+  fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+
+  while (n_packets >= VLIB_FRAME_SIZE)
+    {
+      n_enq += vlib_buffer_enqueue_to_thread_inline (
+       vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
+       drop_on_congestion, 1 /* with_aux */, aux);
+      buffer_indices += VLIB_FRAME_SIZE;
+      thread_indices += VLIB_FRAME_SIZE;
+      n_packets -= VLIB_FRAME_SIZE;
+    }
+
+  if (n_packets == 0)
+    return n_enq;
+
+  n_enq += vlib_buffer_enqueue_to_thread_inline (
+    vm, node, fqm, buffer_indices, thread_indices, n_packets,
+    drop_on_congestion, 1 /* with_aux */, aux);
 
   return n_enq;
 }
 
 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
+CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_with_aux_fn);
 
-u32 __clib_section (".vlib_frame_queue_dequeue_fn")
-CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
-(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+static_always_inline u32
+vlib_frame_queue_dequeue_inline (vlib_main_t *vm, vlib_frame_queue_main_t *fqm,
+                                u8 with_aux)
 {
   u32 thread_id = vm->thread_index;
   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
   u32 mask = fq->nelts - 1;
   vlib_frame_queue_elt_t *elt;
-  u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0;
+  u32 n_free, n_copy, *from, *from_aux, *to = 0, *to_aux = 0, processed = 0,
+                                       vectors = 0;
   vlib_frame_t *f = 0;
 
   ASSERT (fq);
@@ -352,13 +390,16 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
        break;
 
       from = elt->buffer_index + elt->offset;
-
+      if (with_aux)
+       from_aux = elt->aux_data + elt->offset;
       ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE);
 
       if (f == 0)
        {
          f = vlib_get_frame_to_node (vm, fqm->node_index);
          to = vlib_frame_vector_args (f);
+         if (with_aux)
+           to_aux = vlib_frame_aux_args (f);
          n_free = VLIB_FRAME_SIZE;
        }
 
@@ -369,6 +410,12 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
 
       vlib_buffer_copy_indices (to, from, n_copy);
       to += n_copy;
+      if (with_aux)
+       {
+         vlib_buffer_copy_indices (to_aux, from_aux, n_copy);
+         to_aux += n_copy;
+       }
+
       n_free -= n_copy;
       vectors += n_copy;
 
@@ -408,8 +455,24 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
   return processed;
 }
 
+u32 __clib_section (".vlib_frame_queue_dequeue_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+  return vlib_frame_queue_dequeue_inline (vm, fqm, 0 /* with_aux */);
+}
+
 CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
 
+u32 __clib_section (".vlib_frame_queue_dequeue_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_with_aux_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+  return vlib_frame_queue_dequeue_inline (vm, fqm, 1 /* with_aux */);
+}
+
+CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_with_aux_fn);
+
 #ifndef CLIB_MARCH_VARIANT
 vlib_buffer_func_main_t vlib_buffer_func_main;
 
@@ -423,8 +486,8 @@ vlib_buffer_funcs_init (vlib_main_t *vm)
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
   bfm->buffer_enqueue_to_thread_fn =
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
-  bfm->frame_queue_dequeue_fn =
-    CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
+  bfm->buffer_enqueue_to_thread_with_aux_fn =
+    CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_with_aux_fn);
   return 0;
 }
 
index 30fe234..00dce80 100644 (file)
@@ -65,15 +65,18 @@ typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
   u32 *buffer_indices, u16 *thread_indices, u32 n_packets,
   int drop_on_congestion);
 
-typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
-                                            vlib_frame_queue_main_t *fqm);
+typedef u32 (vlib_buffer_enqueue_to_thread_with_aux_fn_t) (
+  vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+  u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets,
+  int drop_on_congestion);
 
 typedef struct
 {
   vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
   vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
   vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
-  vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
+  vlib_buffer_enqueue_to_thread_with_aux_fn_t
+    *buffer_enqueue_to_thread_with_aux_fn;
 } vlib_buffer_func_main_t;
 
 extern vlib_buffer_func_main_t vlib_buffer_func_main;
index 10ebd25..a4c259f 100644 (file)
@@ -391,6 +391,20 @@ vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node,
               n_packets, drop_on_congestion);
 }
 
+static_always_inline u32
+vlib_buffer_enqueue_to_thread_with_aux (vlib_main_t *vm,
+                                       vlib_node_runtime_t *node,
+                                       u32 frame_queue_index,
+                                       u32 *buffer_indices, u32 *aux,
+                                       u16 *thread_indices, u32 n_packets,
+                                       int drop_on_congestion)
+{
+  vlib_buffer_enqueue_to_thread_with_aux_fn_t *fn;
+  fn = vlib_buffer_func_main.buffer_enqueue_to_thread_with_aux_fn;
+  return (fn) (vm, node, frame_queue_index, buffer_indices, aux,
+              thread_indices, n_packets, drop_on_congestion);
+}
+
 #endif /* included_vlib_buffer_node_h */
 
 /*
index 41d18e2..9c7d6f5 100644 (file)
@@ -1519,8 +1519,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
       if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter))
        {
          u32 processed = 0;
-         vlib_frame_queue_dequeue_fn_t *fn =
-           vlib_buffer_func_main.frame_queue_dequeue_fn;
+         vlib_frame_queue_dequeue_fn_t *fn;
 
          if (vm->check_frame_queues)
            {
@@ -1529,7 +1528,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
            }
 
          vec_foreach (fqm, tm->frame_queue_mains)
-           processed += (fn) (vm, fqm);
+           {
+             fn = fqm->frame_queue_dequeue_fn;
+             processed += (fn) (vm, fqm);
+           }
 
          /* No handoff queue work found? */
          if (processed)
index 57ba39a..6c39e68 100644 (file)
@@ -1587,12 +1587,18 @@ VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
 };
 /* *INDENT-ON* */
 
+extern clib_march_fn_registration
+  *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations;
+extern clib_march_fn_registration
+  *vlib_frame_queue_dequeue_fn_march_fn_registrations;
 u32
 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
 {
   vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vlib_main_t *vm = vlib_get_main ();
   vlib_frame_queue_main_t *fqm;
   vlib_frame_queue_t *fq;
+  vlib_node_t *node;
   int i;
   u32 num_threads;
 
@@ -1604,6 +1610,19 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
 
   vec_add2 (tm->frame_queue_mains, fqm, 1);
 
+  node = vlib_get_node (vm, fqm->node_index);
+  ASSERT (node);
+  if (node->aux_offset)
+    {
+      fqm->frame_queue_dequeue_fn =
+       CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_with_aux_fn);
+    }
+  else
+    {
+      fqm->frame_queue_dequeue_fn =
+       CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_fn);
+    }
+
   fqm->node_index = node_index;
   fqm->frame_queue_nelts = frame_queue_nelts;
 
index b25d476..97df3d2 100644 (file)
@@ -75,6 +75,7 @@ typedef struct
 
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
   u32 buffer_index[VLIB_FRAME_SIZE];
+  u32 aux_data[VLIB_FRAME_SIZE];
 }
 vlib_frame_queue_elt_t;
 
@@ -133,7 +134,10 @@ typedef struct
 }
 vlib_frame_queue_t;
 
-typedef struct
+struct vlib_frame_queue_main_t_;
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (
+  vlib_main_t *vm, struct vlib_frame_queue_main_t_ *fqm);
+typedef struct vlib_frame_queue_main_t_
 {
   u32 node_index;
   u32 frame_queue_nelts;
@@ -143,6 +147,7 @@ typedef struct
   /* for frame queue tracing */
   frame_queue_trace_t *frame_queue_traces;
   frame_queue_nelt_counter_t *frame_queue_histogram;
+  vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
 } vlib_frame_queue_main_t;
 
 typedef struct
index 329e5cc..d123f39 100644 (file)
@@ -84,6 +84,9 @@ clib_march_select_fn_ptr (clib_march_fn_registration * r)
 #define CLIB_MARCH_FN_POINTER(fn)                                             \
   (__typeof__ (fn) *) clib_march_select_fn_ptr (fn##_march_fn_registrations);
 
+#define CLIB_MARCH_FN_VOID_POINTER(fn)                                        \
+  clib_march_select_fn_ptr (fn##_march_fn_registrations);
+
 #define _CLIB_MARCH_FN_REGISTRATION(fn) \
 static clib_march_fn_registration \
 CLIB_MARCH_SFX(fn##_march_fn_registration) = \