pg: Reduce the inclusion of pg.h

[vpp.git] / src / vnet / devices / virtio / vhost_user_input.c
diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c

index dd89909..6abc1fc 100644 (file)
--- a/src/vnet/devices/virtio/vhost_user_input.c
+++ b/src/vnet/devices/virtio/vhost_user_input.c
@@ -33,16 +33,18 @@
  #include <vlib/vlib.h>
  #include <vlib/unix/unix.h>
  
-#include <vnet/ip/ip.h>
-
  #include <vnet/ethernet/ethernet.h>
  #include <vnet/devices/devices.h>
  #include <vnet/feature/feature.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/interface/rx_queue_funcs.h>
  
-#include <vnet/devices/virtio/virtio.h>
  #include <vnet/devices/virtio/vhost_user.h>
  #include <vnet/devices/virtio/vhost_user_inline.h>
  
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
  /*
   * When an RX queue is down but active, received packets
   * must be discarded. This value controls up to how many
@@ -108,18 +110,18 @@ vhost_user_rx_trace (vhost_trace_t * t,
    t->qid = qid;
  
    hdr_desc = &txvq->desc[desc_current];
-  if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+  if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
      {
        t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
        /* Header is the first here */
        hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
      }
-  if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
+  if (txvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
      {
        t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
      }
-  if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
-      !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
+  if (!(txvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
+      !(txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
      {
        t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
      }
@@ -254,6 +256,7 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
    ethernet_header_t *eh = (ethernet_header_t *) b0_data;
    u16 ethertype = clib_net_to_host_u16 (eh->type);
    u16 l2hdr_sz = sizeof (ethernet_header_t);
+  vnet_buffer_oflags_t oflags = 0;
  
    if (ethernet_frame_is_tagged (ethertype))
      {
@@ -279,7 +282,8 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
      {
        ip4_header_t *ip4 = (ip4_header_t *) (b0_data + l2hdr_sz);
        l4_proto = ip4->protocol;
-      b0->flags |= VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+      b0->flags |= VNET_BUFFER_F_IS_IP4;
+      oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
      }
    else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
      {
@@ -293,16 +297,12 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
        tcp_header_t *tcp = (tcp_header_t *)
         (b0_data + vnet_buffer (b0)->l4_hdr_offset);
        l4_hdr_sz = tcp_header_bytes (tcp);
-      tcp->checksum = 0;
-      b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+      oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
      }
    else if (l4_proto == IP_PROTOCOL_UDP)
      {
-      udp_header_t *udp =
-       (udp_header_t *) (b0_data + vnet_buffer (b0)->l4_hdr_offset);
-      l4_hdr_sz = sizeof (*udp);
-      udp->checksum = 0;
-      b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+      l4_hdr_sz = sizeof (udp_header_t);
+      oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
      }
  
    if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP)
@@ -323,19 +323,23 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
        vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
        b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
      }
+
+  if (oflags)
+    vnet_buffer_offload_flags_set (b0, oflags);
  }
  
  static_always_inline void
-vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_vring_t * txvq,
+vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_intf_t * vui,
+                              vhost_user_vring_t * txvq,
                                vhost_user_vring_t * rxvq)
  {
    f64 now = vlib_time_now (vm);
  
    if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
-    vhost_user_send_call (vm, txvq);
+    vhost_user_send_call (vm, vui, txvq);
  
    if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
-    vhost_user_send_call (vm, rxvq);
+    vhost_user_send_call (vm, vui, rxvq);
  }
  
  static_always_inline void
@@ -377,11 +381,9 @@ vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
  }
  
  static_always_inline u32
-vhost_user_if_input (vlib_main_t * vm,
-                    vhost_user_main_t * vum,
-                    vhost_user_intf_t * vui,
-                    u16 qid, vlib_node_runtime_t * node,
-                    vnet_hw_interface_rx_mode mode, u8 enable_csum)
+vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
+                    vhost_user_intf_t *vui, u16 qid,
+                    vlib_node_runtime_t *node, u8 enable_csum)
  {
    vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
    vnet_feature_main_t *fm = &feature_main;
@@ -406,7 +408,7 @@ vhost_user_if_input (vlib_main_t * vm,
    {
      /* do we have pending interrupts ? */
      vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
-    vhost_user_input_do_interrupt (vm, txvq, rxvq);
+    vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
    }
  
    /*
@@ -416,7 +418,7 @@ vhost_user_if_input (vlib_main_t * vm,
     * When the traffic subsides, the scheduler switches the node back to
     * interrupt mode. We must tell the driver we want interrupt.
     */
-  if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+  if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
      {
        if ((node->flags &
            VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
@@ -546,10 +548,10 @@ vhost_user_if_input (vlib_main_t * vm,
        b_head->total_length_not_including_first_buffer = 0;
        b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
  
-      if (PREDICT_FALSE (n_trace))
+      if (PREDICT_FALSE
+         (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b_head,
+                                            /* follow_chain */ 0)))
         {
-         vlib_trace_buffer (vm, node, next_index, b_head,
-                            /* follow_chain */ 0);
           vhost_trace_t *t0 =
             vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
           vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx);
@@ -560,7 +562,7 @@ vhost_user_if_input (vlib_main_t * vm,
        /* This depends on the setup but is very consistent
         * So I think the CPU branch predictor will make a pretty good job
         * at optimizing the decision. */
-      if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+      if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
         {
           desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
                                       &map_hint);
@@ -591,7 +593,7 @@ vhost_user_if_input (vlib_main_t * vm,
           if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
             {
               if ((desc_data_offset == desc_table[desc_current].len) &&
-                 (desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT))
+                 (desc_table[desc_current].flags & VRING_DESC_F_NEXT))
                 {
                   current = desc_table[desc_current].next;
                   b_data = map_guest_mem (vui, desc_table[current].addr,
@@ -617,7 +619,7 @@ vhost_user_if_input (vlib_main_t * vm,
           if (desc_data_offset == desc_table[desc_current].len)
             {
               if (PREDICT_FALSE (desc_table[desc_current].flags &
-                                VIRTQ_DESC_F_NEXT))
+                                VRING_DESC_F_NEXT))
                 {
                   desc_current = desc_table[desc_current].next;
                   desc_data_offset = 0;
@@ -687,8 +689,6 @@ vhost_user_if_input (vlib_main_t * vm,
        last_avail_idx++;
        last_used_idx++;
  
-      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
-
        vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
        vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
        b_head->error = 0;
@@ -748,7 +748,7 @@ stop:
        txvq->n_since_last_int += n_rx_packets;
  
        if (txvq->n_since_last_int > vum->coalesce_frames)
-       vhost_user_send_call (vm, txvq);
+       vhost_user_send_call (vm, vui, txvq);
      }
  
    /* increase rx counters */
@@ -776,10 +776,10 @@ vhost_user_mark_desc_consumed (vhost_user_intf_t * vui,
      {
        if (txvq->used_wrap_counter)
         desc_table[(desc_head + desc_idx) & mask].flags |=
-         (VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
+         (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
        else
         desc_table[(desc_head + desc_idx) & mask].flags &=
-         ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
+         ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
        vhost_user_advance_last_used_idx (txvq);
      }
  }
@@ -799,18 +799,18 @@ vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
    t->qid = qid;
  
    hdr_desc = &txvq->packed_desc[desc_current];
-  if (txvq->packed_desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+  if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
      {
        t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
        /* Header is the first here */
        hdr_desc = map_guest_mem (vui, txvq->packed_desc[desc_current].addr,
                                 &hint);
      }
-  if (txvq->packed_desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
+  if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
      t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
  
-  if (!(txvq->packed_desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
-      !(txvq->packed_desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
+  if (!(txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
+      !(txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
      t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
  
    t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
@@ -1018,7 +1018,7 @@ vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
    u32 desc_len = 0;
    u16 mask = txvq->qsz_mask;
  
-  while (desc_table[*current].flags & VIRTQ_DESC_F_NEXT)
+  while (desc_table[*current].flags & VRING_DESC_F_NEXT)
      {
        desc_len += desc_table[*current].len;
        (*n_left)++;
@@ -1086,10 +1086,9 @@ vhost_user_assemble_packet (vring_packed_desc_t * desc_table,
  }
  
  static_always_inline u32
-vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
-                           vhost_user_intf_t * vui, u16 qid,
-                           vlib_node_runtime_t * node,
-                           vnet_hw_interface_rx_mode mode, u8 enable_csum)
+vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
+                           vhost_user_intf_t *vui, u16 qid,
+                           vlib_node_runtime_t *node, u8 enable_csum)
  {
    vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
    vnet_feature_main_t *fm = &feature_main;
@@ -1122,7 +1121,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
  
    /* do we have pending interrupts ? */
    vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
-  vhost_user_input_do_interrupt (vm, txvq, rxvq);
+  vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
  
    /*
     * For adaptive mode, it is optimized to reduce interrupts.
@@ -1131,7 +1130,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
     * When the traffic subsides, the scheduler switches the node back to
     * interrupt mode. We must tell the driver we want interrupt.
     */
-  if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+  if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
      {
        if ((node->flags &
            VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
@@ -1177,7 +1176,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
    while (vhost_user_packed_desc_available (txvq, current) &&
          (n_left < VLIB_FRAME_SIZE))
      {
-      if (desc_table[current].flags & VIRTQ_DESC_F_INDIRECT)
+      if (desc_table[current].flags & VRING_DESC_F_INDIRECT)
         {
           buffers_required +=
             vhost_user_compute_indirect_desc_len (vui, txvq, buffer_data_size,
@@ -1244,7 +1243,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
        desc_data_offset = vui->virtio_net_hdr_sz;
        n_descs_to_process = 1;
  
-      if (desc_table[desc_idx].flags & VIRTQ_DESC_F_INDIRECT)
+      if (desc_table[desc_idx].flags & VRING_DESC_F_INDIRECT)
         {
           n_descs = desc_table[desc_idx].len >> 4;
           desc_table = map_guest_mem (vui, desc_table[desc_idx].addr,
@@ -1298,7 +1297,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
            * loop. So count how many descriptors in the chain.
            */
           n_descs_to_process = 1;
-         while (desc_table[desc_idx].flags & VIRTQ_DESC_F_NEXT)
+         while (desc_table[desc_idx].flags & VRING_DESC_F_NEXT)
             {
               vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
                                           &b_current, &next, &b, &bi_current,
@@ -1320,8 +1319,6 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
        b_head->total_length_not_including_first_buffer -=
         b_head->current_length;
  
-      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
-
        vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
        vnet_buffer (b_head)->sw_if_index[VLIB_TX] = ~0;
        b_head->error = 0;
@@ -1368,17 +1365,19 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
        b = cpu->rx_buffers_pdesc;
        while (n_trace && left)
         {
-         vhost_trace_t *t0;
-
-         vlib_trace_buffer (vm, node, next_index, b[0],
-                            /* follow_chain */ 0);
-         t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
-         b++;
-         vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
-         last_used_idx = (last_used_idx + 1) & mask;
-         n_trace--;
+         if (PREDICT_TRUE
+             (vlib_trace_buffer
+              (vm, node, next_index, b[0], /* follow_chain */ 0)))
+           {
+             vhost_trace_t *t0;
+             t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
+             vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
+             last_used_idx = (last_used_idx + 1) & mask;
+             n_trace--;
+             vlib_set_trace_count (vm, node, n_trace);
+           }
           left--;
-         vlib_set_trace_count (vm, node, n_trace);
+         b++;
         }
      }
  
@@ -1393,7 +1392,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
      {
        txvq->n_since_last_int += n_rx_packets;
        if (txvq->n_since_last_int > vum->coalesce_frames)
-       vhost_user_send_call (vm, txvq);
+       vhost_user_send_call (vm, vui, txvq);
      }
  
    /* increase rx counters */
@@ -1418,39 +1417,31 @@ VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
    vhost_user_main_t *vum = &vhost_user_main;
    uword n_rx_packets = 0;
    vhost_user_intf_t *vui;
-  vnet_device_input_runtime_t *rt =
-    (vnet_device_input_runtime_t *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
+  vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+  vnet_hw_if_rxq_poll_vector_t *pve;
  
-  vec_foreach (dq, rt->devices_and_queues)
-  {
-    if ((node->state == VLIB_NODE_STATE_POLLING) ||
-       clib_atomic_swap_acq_n (&dq->interrupt_pending, 0))
-      {
-       vui =
-         pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
-       if (vhost_user_is_packed_ring_supported (vui))
-         {
-           if (vui->features & (1ULL << FEAT_VIRTIO_NET_F_CSUM))
-             n_rx_packets += vhost_user_if_input_packed (vm, vum, vui,
-                                                         dq->queue_id, node,
-                                                         dq->mode, 1);
-           else
-             n_rx_packets += vhost_user_if_input_packed (vm, vum, vui,
-                                                         dq->queue_id, node,
-                                                         dq->mode, 0);
-         }
-       else
-         {
-           if (vui->features & (1ULL << FEAT_VIRTIO_NET_F_CSUM))
-             n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id,
-                                                  node, dq->mode, 1);
-           else
-             n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id,
-                                                  node, dq->mode, 0);
-         }
-      }
-  }
+  vec_foreach (pve, pv)
+    {
+      vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance);
+      if (vhost_user_is_packed_ring_supported (vui))
+       {
+         if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+           n_rx_packets += vhost_user_if_input_packed (
+             vm, vum, vui, pve->queue_id, node, 1);
+         else
+           n_rx_packets += vhost_user_if_input_packed (
+             vm, vum, vui, pve->queue_id, node, 0);
+       }
+      else
+       {
+         if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+           n_rx_packets +=
+             vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1);
+         else
+           n_rx_packets +=
+             vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0);
+       }
+    }
  
    return n_rx_packets;
  }