X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Fvirtio%2Fvhost_user_input.c;h=97303ce379188621fd325ddb059671aa0355a5c3;hb=96e8cd0e1d6c21e5f47470c146958a9845ee29a6;hp=87a01cefd5bc41af2865a3ae69d4a346cdd8961f;hpb=6a8bfd43a057da68d43074d0abc3c598c5ccb55a;p=vpp.git

diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c
index 87a01cefd5b..97303ce3791 100644
--- a/src/vnet/devices/virtio/vhost_user_input.c
+++ b/src/vnet/devices/virtio/vhost_user_input.c
@@ -92,10 +92,10 @@ static __clib_unused char *vhost_user_input_func_error_strings[] = {
 static_always_inline void
 vhost_user_rx_trace (vhost_trace_t * t,
 		     vhost_user_intf_t * vui, u16 qid,
-		     vlib_buffer_t * b, vhost_user_vring_t * txvq)
+		     vlib_buffer_t * b, vhost_user_vring_t * txvq,
+		     u16 last_avail_idx)
 {
   vhost_user_main_t *vum = &vhost_user_main;
-  u32 last_avail_idx = txvq->last_avail_idx;
   u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
   vring_desc_t *hdr_desc = 0;
   virtio_net_hdr_mrg_rxbuf_t *hdr;
@@ -195,25 +195,27 @@ vhost_user_rx_discard_packet (vlib_main_t * vm,
    */
   u32 discarded_packets = 0;
   u32 avail_idx = txvq->avail->idx;
+  u16 mask = txvq->qsz_mask;
+  u16 last_avail_idx = txvq->last_avail_idx;
+  u16 last_used_idx = txvq->last_used_idx;
   while (discarded_packets != discard_max)
     {
       if (avail_idx == txvq->last_avail_idx)
 	goto out;
 
-      u16 desc_chain_head =
-	txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
-      txvq->last_avail_idx++;
-      txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
-	desc_chain_head;
-      txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
-      vhost_user_log_dirty_ring (vui, txvq,
-				 ring[txvq->last_used_idx & txvq->qsz_mask]);
-      txvq->last_used_idx++;
+      u16 desc_chain_head = txvq->avail->ring[last_avail_idx & mask];
+      last_avail_idx++;
+      txvq->used->ring[last_used_idx & mask].id = desc_chain_head;
+      txvq->used->ring[last_used_idx & mask].len = 0;
+      vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
+      last_used_idx++;
       discarded_packets++;
     }
 
 out:
-  CLIB_MEMORY_BARRIER ();
+  txvq->last_avail_idx = last_avail_idx;
+  txvq->last_used_idx = last_used_idx;
+  CLIB_MEMORY_STORE_BARRIER ();
   txvq->used->idx = txvq->last_used_idx;
   vhost_user_log_dirty_ring (vui, txvq, idx);
   return discarded_packets;
@@ -222,7 +224,7 @@ out:
 /*
  * In case of overflow, we need to rewind the array of allocated buffers.
  */
-static __clib_unused void
+static_always_inline void
 vhost_user_input_rewind_buffers (vlib_main_t * vm,
 				 vhost_cpu_t * cpu, vlib_buffer_t * b_head)
 {
@@ -241,7 +243,7 @@ vhost_user_input_rewind_buffers (vlib_main_t * vm,
   cpu->rx_buffers_len++;
 }
 
-static __clib_unused u32
+static_always_inline u32
 vhost_user_if_input (vlib_main_t * vm,
 		     vhost_user_main_t * vum,
 		     vhost_user_intf_t * vui,
@@ -261,10 +263,11 @@ vhost_user_if_input (vlib_main_t * vm,
   u16 copy_len = 0;
   u8 feature_arc_idx = fm->device_input_feature_arc_index;
   u32 current_config_index = ~(u32) 0;
+  u16 mask = txvq->qsz_mask;
 
   /* The descriptor table is not ready yet */
   if (PREDICT_FALSE (txvq->avail == 0))
-    return 0;
+    goto done;
 
   {
     /* do we have pending interrupts ? */
@@ -299,13 +302,13 @@ vhost_user_if_input (vlib_main_t * vm,
     }
 
   if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
-    return 0;
+    goto done;
 
   n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
 
   /* nothing to do */
   if (PREDICT_FALSE (n_left == 0))
-    return 0;
+    goto done;
 
   if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
     {
@@ -318,10 +321,10 @@ vhost_user_if_input (vlib_main_t * vm,
        */
       vhost_user_rx_discard_packet (vm, vui, txvq,
 				    VHOST_USER_DOWN_DISCARD_COUNT);
-      return 0;
+      goto done;
     }
 
-  if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1)))
+  if (PREDICT_FALSE (n_left == (mask + 1)))
     {
       /*
        * Informational error logging when VPP is not
@@ -382,223 +385,220 @@ vhost_user_if_input (vlib_main_t * vm,
 			    &next_index, 0);
     }
 
+  u16 last_avail_idx = txvq->last_avail_idx;
+  u16 last_used_idx = txvq->last_used_idx;
+
+  vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+  if (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)
+    {
+      /* give some hints to ethernet-input */
+      vlib_next_frame_t *nf;
+      vlib_frame_t *f;
+      ethernet_input_frame_t *ef;
+      nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+      f = vlib_get_frame (vm, nf->frame_index);
+      f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+      ef = vlib_frame_scalar_args (f);
+      ef->sw_if_index = vui->sw_if_index;
+      ef->hw_if_index = vui->hw_if_index;
+    }
+
   while (n_left > 0)
     {
-      vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      vlib_buffer_t *b_head, *b_current;
+      u32 bi_current;
+      u16 desc_current;
+      u32 desc_data_offset;
+      vring_desc_t *desc_table = txvq->desc;
 
-      if (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)
+      if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
 	{
-	  /* give some hints to ethernet-input */
-	  vlib_next_frame_t *nf;
-	  vlib_frame_t *f;
-	  ethernet_input_frame_t *ef;
-	  nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
-	  f = vlib_get_frame (vm, nf->frame_index);
-	  f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
-
-	  ef = vlib_frame_scalar_args (f);
-	  ef->sw_if_index = vui->sw_if_index;
-	  ef->hw_if_index = vui->hw_if_index;
+	  /* Not enough rx_buffers
+	   * Note: We yeld on 1 so we don't need to do an additional
+	   * check for the next buffer prefetch.
+	   */
+	  n_left = 0;
+	  break;
 	}
 
-      while (n_left > 0 && n_left_to_next > 0)
+      desc_current = txvq->avail->ring[last_avail_idx & mask];
+      cpu->rx_buffers_len--;
+      bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
+      b_head = b_current = vlib_get_buffer (vm, bi_current);
+      to_next[0] = bi_current;	//We do that now so we can forget about bi_current
+      to_next++;
+      n_left_to_next--;
+
+      vlib_prefetch_buffer_with_index
+	(vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD);
+
+      /* Just preset the used descriptor id and length for later */
+      txvq->used->ring[last_used_idx & mask].id = desc_current;
+      txvq->used->ring[last_used_idx & mask].len = 0;
+      vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
+
+      /* The buffer should already be initialized */
+      b_head->total_length_not_including_first_buffer = 0;
+      b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+      if (PREDICT_FALSE (n_trace))
 	{
-	  vlib_buffer_t *b_head, *b_current;
-	  u32 bi_current;
-	  u16 desc_current;
-	  u32 desc_data_offset;
-	  vring_desc_t *desc_table = txvq->desc;
+	  //TODO: next_index is not exactly known at that point
+	  vlib_trace_buffer (vm, node, next_index, b_head,
+			     /* follow_chain */ 0);
+	  vhost_trace_t *t0 =
+	    vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
+	  vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx);
+	  n_trace--;
+	  vlib_set_trace_count (vm, node, n_trace);
+	}
 
-	  if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
+      /* This depends on the setup but is very consistent
+       * So I think the CPU branch predictor will make a pretty good job
+       * at optimizing the decision. */
+      if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+	{
+	  desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
+				      &map_hint);
+	  desc_current = 0;
+	  if (PREDICT_FALSE (desc_table == 0))
 	    {
-	      /* Not enough rx_buffers
-	       * Note: We yeld on 1 so we don't need to do an additional
-	       * check for the next buffer prefetch.
-	       */
-	      n_left = 0;
-	      break;
+	      vlib_error_count (vm, node->node_index,
+				VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+	      goto out;
 	    }
+	}
 
-	  desc_current =
-	    txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
-	  cpu->rx_buffers_len--;
-	  bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
-	  b_head = b_current = vlib_get_buffer (vm, bi_current);
-	  to_next[0] = bi_current;	//We do that now so we can forget about bi_current
-	  to_next++;
-	  n_left_to_next--;
-
-	  vlib_prefetch_buffer_with_index
-	    (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD);
-
-	  /* Just preset the used descriptor id and length for later */
-	  txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
-	    desc_current;
-	  txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
-	  vhost_user_log_dirty_ring (vui, txvq,
-				     ring[txvq->last_used_idx &
-					  txvq->qsz_mask]);
-
-	  /* The buffer should already be initialized */
-	  b_head->total_length_not_including_first_buffer = 0;
-	  b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
-
-	  if (PREDICT_FALSE (n_trace))
-	    {
-	      //TODO: next_index is not exactly known at that point
-	      vlib_trace_buffer (vm, node, next_index, b_head,
-				 /* follow_chain */ 0);
-	      vhost_trace_t *t0 =
-		vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
-	      vhost_user_rx_trace (t0, vui, qid, b_head, txvq);
-	      n_trace--;
-	      vlib_set_trace_count (vm, node, n_trace);
-	    }
+      if (PREDICT_TRUE (vui->is_any_layout) ||
+	  (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT)))
+	{
+	  /* ANYLAYOUT or single buffer */
+	  desc_data_offset = vui->virtio_net_hdr_sz;
+	}
+      else
+	{
+	  /* CSR case without ANYLAYOUT, skip 1st buffer */
+	  desc_data_offset = desc_table[desc_current].len;
+	}
 
-	  /* This depends on the setup but is very consistent
-	   * So I think the CPU branch predictor will make a pretty good job
-	   * at optimizing the decision. */
-	  if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+      while (1)
+	{
+	  /* Get more input if necessary. Or end of packet. */
+	  if (desc_data_offset == desc_table[desc_current].len)
 	    {
-	      desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
-					  &map_hint);
-	      desc_current = 0;
-	      if (PREDICT_FALSE (desc_table == 0))
+	      if (PREDICT_FALSE (desc_table[desc_current].flags &
+				 VIRTQ_DESC_F_NEXT))
+		{
+		  desc_current = desc_table[desc_current].next;
+		  desc_data_offset = 0;
+		}
+	      else
 		{
-		  vlib_error_count (vm, node->node_index,
-				    VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
 		  goto out;
 		}
 	    }
 
-	  if (PREDICT_TRUE (vui->is_any_layout) ||
-	      (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT)))
-	    {
-	      /* ANYLAYOUT or single buffer */
-	      desc_data_offset = vui->virtio_net_hdr_sz;
-	    }
-	  else
-	    {
-	      /* CSR case without ANYLAYOUT, skip 1st buffer */
-	      desc_data_offset = desc_table[desc_current].len;
-	    }
-
-	  while (1)
+	  /* Get more output if necessary. Or end of packet. */
+	  if (PREDICT_FALSE
+	      (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
 	    {
-	      /* Get more input if necessary. Or end of packet. */
-	      if (desc_data_offset == desc_table[desc_current].len)
+	      if (PREDICT_FALSE (cpu->rx_buffers_len == 0))
 		{
-		  if (PREDICT_FALSE (desc_table[desc_current].flags &
-				     VIRTQ_DESC_F_NEXT))
-		    {
-		      desc_current = desc_table[desc_current].next;
-		      desc_data_offset = 0;
-		    }
-		  else
-		    {
-		      goto out;
-		    }
+		  /* Cancel speculation */
+		  to_next--;
+		  n_left_to_next++;
+
+		  /*
+		   * Checking if there are some left buffers.
+		   * If not, just rewind the used buffers and stop.
+		   * Note: Scheduled copies are not cancelled. This is
+		   * not an issue as they would still be valid. Useless,
+		   * but valid.
+		   */
+		  vhost_user_input_rewind_buffers (vm, cpu, b_head);
+		  n_left = 0;
+		  goto stop;
 		}
 
-	      /* Get more output if necessary. Or end of packet. */
-	      if (PREDICT_FALSE
-		  (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
-		{
-		  if (PREDICT_FALSE (cpu->rx_buffers_len == 0))
-		    {
-		      /* Cancel speculation */
-		      to_next--;
-		      n_left_to_next++;
-
-		      /*
-		       * Checking if there are some left buffers.
-		       * If not, just rewind the used buffers and stop.
-		       * Note: Scheduled copies are not cancelled. This is
-		       * not an issue as they would still be valid. Useless,
-		       * but valid.
-		       */
-		      vhost_user_input_rewind_buffers (vm, cpu, b_head);
-		      n_left = 0;
-		      goto stop;
-		    }
-
-		  /* Get next output */
-		  cpu->rx_buffers_len--;
-		  u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len];
-		  b_current->next_buffer = bi_next;
-		  b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
-		  bi_current = bi_next;
-		  b_current = vlib_get_buffer (vm, bi_current);
-		}
-
-	      /* Prepare a copy order executed later for the data */
-	      vhost_copy_t *cpy = &cpu->copy[copy_len];
-	      copy_len++;
-	      u32 desc_data_l =
-		desc_table[desc_current].len - desc_data_offset;
-	      cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length;
-	      cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
-	      cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
-				  b_current->current_length);
-	      cpy->src = desc_table[desc_current].addr + desc_data_offset;
-
-	      desc_data_offset += cpy->len;
-
-	      b_current->current_length += cpy->len;
-	      b_head->total_length_not_including_first_buffer += cpy->len;
+	      /* Get next output */
+	      cpu->rx_buffers_len--;
+	      u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len];
+	      b_current->next_buffer = bi_next;
+	      b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
+	      bi_current = bi_next;
+	      b_current = vlib_get_buffer (vm, bi_current);
 	    }
 
-	out:
-	  CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD);
+	  /* Prepare a copy order executed later for the data */
+	  vhost_copy_t *cpy = &cpu->copy[copy_len];
+	  copy_len++;
+	  u32 desc_data_l = desc_table[desc_current].len - desc_data_offset;
+	  cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length;
+	  cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
+	  cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
+			      b_current->current_length);
+	  cpy->src = desc_table[desc_current].addr + desc_data_offset;
 
-	  n_rx_bytes += b_head->total_length_not_including_first_buffer;
-	  n_rx_packets++;
+	  desc_data_offset += cpy->len;
 
-	  b_head->total_length_not_including_first_buffer -=
-	    b_head->current_length;
+	  b_current->current_length += cpy->len;
+	  b_head->total_length_not_including_first_buffer += cpy->len;
+	}
 
-	  /* consume the descriptor and return it as used */
-	  txvq->last_avail_idx++;
-	  txvq->last_used_idx++;
+    out:
 
-	  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
+      n_rx_bytes += b_head->total_length_not_including_first_buffer;
+      n_rx_packets++;
 
-	  vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
-	  vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-	  b_head->error = 0;
+      b_head->total_length_not_including_first_buffer -=
+	b_head->current_length;
 
-	  if (current_config_index != ~(u32) 0)
-	    {
-	      b_head->current_config_index = current_config_index;
-	      vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
-	    }
+      /* consume the descriptor and return it as used */
+      last_avail_idx++;
+      last_used_idx++;
 
-	  n_left--;
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
 
-	  /*
-	   * Although separating memory copies from virtio ring parsing
-	   * is beneficial, we can offer to perform the copies from time
-	   * to time in order to free some space in the ring.
-	   */
-	  if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
-	    {
-	      if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy,
-							copy_len, &map_hint)))
-		{
-		  vlib_error_count (vm, node->node_index,
-				    VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
-		}
-	      copy_len = 0;
+      vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
+      vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+      b_head->error = 0;
+
+      if (current_config_index != ~(u32) 0)
+	{
+	  b_head->current_config_index = current_config_index;
+	  vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
+	}
+
+      n_left--;
 
-	      /* give buffers back to driver */
-	      CLIB_MEMORY_BARRIER ();
-	      txvq->used->idx = txvq->last_used_idx;
-	      vhost_user_log_dirty_ring (vui, txvq, idx);
+      /*
+       * Although separating memory copies from virtio ring parsing
+       * is beneficial, we can offer to perform the copies from time
+       * to time in order to free some space in the ring.
+       */
+      if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
+	{
+	  if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy,
+						    copy_len, &map_hint)))
+	    {
+	      vlib_error_count (vm, node->node_index,
+				VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
 	    }
+	  copy_len = 0;
+
+	  /* give buffers back to driver */
+	  CLIB_MEMORY_STORE_BARRIER ();
+	  txvq->used->idx = last_used_idx;
+	  vhost_user_log_dirty_ring (vui, txvq, idx);
 	}
-    stop:
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
+stop:
+  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+  txvq->last_used_idx = last_used_idx;
+  txvq->last_avail_idx = last_avail_idx;
 
   /* Do the memory copies */
   if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len,
@@ -609,7 +609,7 @@ vhost_user_if_input (vlib_main_t * vm,
     }
 
   /* give buffers back to driver */
-  CLIB_MEMORY_BARRIER ();
+  CLIB_MEMORY_STORE_BARRIER ();
   txvq->used->idx = txvq->last_used_idx;
   vhost_user_log_dirty_ring (vui, txvq, idx);
 
@@ -631,6 +631,7 @@ vhost_user_if_input (vlib_main_t * vm,
 
   vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
 
+done:
   return n_rx_packets;
 }