From 78fd7e810c24b9d638ac7c7f08edabf692543743 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Fri, 20 Jul 2018 18:47:05 +0200
Subject: [PATCH] Add congestion drop in interface handoff

This prevents deadlock in case when worker A sends to B and worker B
sends to A

Change-Id: Id9436960f932c58325fe4f5ef8ec67b50031aeda
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vlib/buffer_node.h | 53 +++++++++++++++++++++++++++++++-------------------
 src/vlib/threads.c     | 15 +++++++++++++-
 src/vlib/threads.h     | 10 ++++++++++
 src/vnet/handoff.c     | 30 +++++++++++++++++++++++++---
 4 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h
index cfdb0567a0c..93ffb1e9dce 100644
--- a/src/vlib/buffer_node.h
+++ b/src/vlib/buffer_node.h
@@ -443,28 +443,23 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 }
 
-static_always_inline void
+static_always_inline u32
 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
 			       u32 * buffer_indices, u16 * thread_indices,
-			       u32 n_left)
+			       u32 n_packets, int drop_on_congestion)
 {
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-  static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_thread_index =
-    0;
-  static __thread vlib_frame_queue_t **congested_handoff_queue_by_thread_index
-    = 0;
+  vlib_frame_queue_main_t *fqm;
+  vlib_frame_queue_per_thread_data_t *ptd;
+  u32 n_left = n_packets;
+  u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
   vlib_frame_queue_elt_t *hf = 0;
   u32 n_left_to_next_thread = 0, *to_next_thread = 0;
   u32 next_thread_index, current_thread_index = ~0;
   int i;
 
-  if (PREDICT_FALSE (handoff_queue_elt_by_thread_index == 0))
-    {
-      vec_validate (handoff_queue_elt_by_thread_index, tm->n_vlib_mains - 1);
-      vec_validate_init_empty (congested_handoff_queue_by_thread_index,
-			       tm->n_vlib_mains - 1,
-			       (vlib_frame_queue_t *) (~0));
-    }
+  fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+  ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
 
   while (n_left)
     {
@@ -472,12 +467,24 @@ vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
 
       if (next_thread_index != current_thread_index)
 	{
+
+	  if (drop_on_congestion &&
+	      is_vlib_frame_queue_congested
+	      (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
+	       ptd->congested_handoff_queue_by_thread_index))
+	    {
+	      dbi[0] = buffer_indices[0];
+	      dbi++;
+	      n_drop++;
+	      goto next;
+	    }
+
 	  if (hf)
 	    hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
 
 	  hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
 						  next_thread_index,
-						  handoff_queue_elt_by_thread_index);
+						  ptd->handoff_queue_elt_by_thread_index);
 
 	  n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
 	  to_next_thread = &hf->buffer_index[hf->n_vectors];
@@ -493,11 +500,12 @@ vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
 	  hf->n_vectors = VLIB_FRAME_SIZE;
 	  vlib_put_frame_queue_elt (hf);
 	  current_thread_index = ~0;
-	  handoff_queue_elt_by_thread_index[next_thread_index] = 0;
+	  ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
 	  hf = 0;
 	}
 
       /* next */
+    next:
       thread_indices += 1;
       buffer_indices += 1;
       n_left -= 1;
@@ -507,11 +515,11 @@ vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
 
   /* Ship frames to the thread nodes */
-  for (i = 0; i < vec_len (handoff_queue_elt_by_thread_index); i++)
+  for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
     {
-      if (handoff_queue_elt_by_thread_index[i])
+      if (ptd->handoff_queue_elt_by_thread_index[i])
 	{
-	  hf = handoff_queue_elt_by_thread_index[i];
+	  hf = ptd->handoff_queue_elt_by_thread_index[i];
 	  /*
 	   * It works better to let the handoff node
 	   * rate-adapt, always ship the handoff queue element.
@@ -519,14 +527,19 @@ vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
 	  if (1 || hf->n_vectors == hf->last_n_vectors)
 	    {
 	      vlib_put_frame_queue_elt (hf);
-	      handoff_queue_elt_by_thread_index[i] = 0;
+	      ptd->handoff_queue_elt_by_thread_index[i] = 0;
 	    }
 	  else
 	    hf->last_n_vectors = hf->n_vectors;
 	}
-      congested_handoff_queue_by_thread_index[i] =
+      ptd->congested_handoff_queue_by_thread_index[i] =
 	(vlib_frame_queue_t *) (~0);
     }
+
+  if (drop_on_congestion && n_drop)
+    vlib_buffer_free (vm, drop_list, n_drop);
+
+  return n_packets - n_drop;
 }
 
 #endif /* included_vlib_buffer_node_h */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index c53c5d6c170..70f22b10fa1 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -24,7 +24,7 @@
 
 DECLARE_CJ_GLOBAL_LOG;
 
-#define FRAME_QUEUE_NELTS 32
+#define FRAME_QUEUE_NELTS 64
 
 u32
 vl (void *p)
@@ -1795,16 +1795,29 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
   if (frame_queue_nelts == 0)
     frame_queue_nelts = FRAME_QUEUE_NELTS;
 
+  ASSERT (frame_queue_nelts >= 8);
+
   vec_add2 (tm->frame_queue_mains, fqm, 1);
 
   fqm->node_index = node_index;
+  fqm->frame_queue_nelts = frame_queue_nelts;
+  fqm->queue_hi_thresh = frame_queue_nelts - 2;
 
   vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1);
+  vec_validate (fqm->per_thread_data, tm->n_vlib_mains - 1);
   _vec_len (fqm->vlib_frame_queues) = 0;
   for (i = 0; i < tm->n_vlib_mains; i++)
     {
+      vlib_frame_queue_per_thread_data_t *ptd;
       fq = vlib_frame_queue_alloc (frame_queue_nelts);
       vec_add1 (fqm->vlib_frame_queues, fq);
+
+      ptd = vec_elt_at_index (fqm->per_thread_data, i);
+      vec_validate (ptd->handoff_queue_elt_by_thread_index,
+		    tm->n_vlib_mains - 1);
+      vec_validate_init_empty (ptd->congested_handoff_queue_by_thread_index,
+			       tm->n_vlib_mains - 1,
+			       (vlib_frame_queue_t *) (~0));
     }
 
   return (fqm - tm->frame_queue_mains);
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index f78ec1b9fa5..b47a633c2cc 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -161,10 +161,20 @@ typedef struct
 }
 vlib_frame_queue_t;
 
+typedef struct
+{
+  vlib_frame_queue_elt_t **handoff_queue_elt_by_thread_index;
+  vlib_frame_queue_t **congested_handoff_queue_by_thread_index;
+} vlib_frame_queue_per_thread_data_t;
+
 typedef struct
 {
   u32 node_index;
+  u32 frame_queue_nelts;
+  u32 queue_hi_thresh;
+
   vlib_frame_queue_t **vlib_frame_queues;
+  vlib_frame_queue_per_thread_data_t *per_thread_data;
 
   /* for frame queue tracing */
   frame_queue_trace_t *frame_queue_traces;
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c
index 11b877e500d..4e635e5d1b0 100644
--- a/src/vnet/handoff.c
+++ b/src/vnet/handoff.c
@@ -49,6 +49,23 @@ typedef struct
   u32 buffer_index;
 } worker_handoff_trace_t;
 
+#define foreach_worker_handoff_error			\
+  _(CONGESTION_DROP, "congestion drop")
+
+typedef enum
+{
+#define _(sym,str) WORKER_HANDOFF_ERROR_##sym,
+  foreach_worker_handoff_error
+#undef _
+    WORKER_HANDOFF_N_ERROR,
+} worker_handoff_error_t;
+
+static char *worker_handoff_error_strings[] = {
+#define _(sym,string) string,
+  foreach_worker_handoff_error
+#undef _
+};
+
 /* packet trace format function */
 static u8 *
 format_worker_handoff_trace (u8 * s, va_list * args)
@@ -71,7 +88,7 @@ worker_handoff_node_fn (vlib_main_t * vm,
 {
   handoff_main_t *hm = &handoff_main;
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
-  u32 n_left_from, *from;
+  u32 n_enq, n_left_from, *from;
   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
 
   from = vlib_frame_vector_args (frame);
@@ -130,8 +147,13 @@ worker_handoff_node_fn (vlib_main_t * vm,
       b += 1;
     }
 
-  vlib_buffer_enqueue_to_thread (vm, hm->frame_queue_index, from,
-				 thread_indices, frame->n_vectors);
+  n_enq = vlib_buffer_enqueue_to_thread (vm, hm->frame_queue_index, from,
+					 thread_indices, frame->n_vectors, 1);
+
+  if (n_enq < frame->n_vectors)
+    vlib_node_increment_counter (vm, node->node_index,
+				 WORKER_HANDOFF_ERROR_CONGESTION_DROP,
+				 frame->n_vectors - n_enq);
   return frame->n_vectors;
 }
 
@@ -142,6 +164,8 @@ VLIB_REGISTER_NODE (worker_handoff_node) = {
   .vector_size = sizeof (u32),
   .format_trace = format_worker_handoff_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(worker_handoff_error_strings),
+  .error_strings = worker_handoff_error_strings,
 
   .n_next_nodes = 1,
   .next_nodes = {
-- 
2.16.6