avf: support generic flow
[vpp.git] / src / vnet / devices / virtio / vhost_user_input.c
index 8d0ee4a..841a979 100644 (file)
 #include <vlib/vlib.h>
 #include <vlib/unix/unix.h>
 
-#include <vnet/ip/ip.h>
-
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/devices/devices.h>
 #include <vnet/feature/feature.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/interface/rx_queue_funcs.h>
 
 #include <vnet/devices/virtio/vhost_user.h>
 #include <vnet/devices/virtio/vhost_user_inline.h>
 
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
 /*
  * When an RX queue is down but active, received packets
  * must be discarded. This value controls up to how many
@@ -98,8 +102,8 @@ vhost_user_rx_trace (vhost_trace_t * t,
 {
   vhost_user_main_t *vum = &vhost_user_main;
   u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
-  vring_desc_t *hdr_desc = 0;
-  virtio_net_hdr_mrg_rxbuf_t *hdr;
+  vnet_virtio_vring_desc_t *hdr_desc = 0;
+  vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
   u32 hint = 0;
 
   clib_memset (t, 0, sizeof (*t));
@@ -160,8 +164,8 @@ vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
              (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
            return 1;
 
-         CLIB_PREFETCH (src2, 64, LOAD);
-         CLIB_PREFETCH (src3, 64, LOAD);
+         clib_prefetch_load (src2);
+         clib_prefetch_load (src3);
 
          clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
          clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
@@ -245,14 +249,15 @@ vhost_user_input_rewind_buffers (vlib_main_t * vm,
 }
 
 static_always_inline void
-vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
-                             virtio_net_hdr_t * hdr)
+vhost_user_handle_rx_offload (vlib_buffer_t *b0, u8 *b0_data,
+                             vnet_virtio_net_hdr_t *hdr)
 {
   u8 l4_hdr_sz = 0;
   u8 l4_proto = 0;
   ethernet_header_t *eh = (ethernet_header_t *) b0_data;
   u16 ethertype = clib_net_to_host_u16 (eh->type);
   u16 l2hdr_sz = sizeof (ethernet_header_t);
+  vnet_buffer_oflags_t oflags = 0;
 
   if (ethernet_frame_is_tagged (ethertype))
     {
@@ -278,7 +283,8 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
     {
       ip4_header_t *ip4 = (ip4_header_t *) (b0_data + l2hdr_sz);
       l4_proto = ip4->protocol;
-      b0->flags |= VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+      b0->flags |= VNET_BUFFER_F_IS_IP4;
+      oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
     }
   else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
     {
@@ -292,16 +298,12 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
       tcp_header_t *tcp = (tcp_header_t *)
        (b0_data + vnet_buffer (b0)->l4_hdr_offset);
       l4_hdr_sz = tcp_header_bytes (tcp);
-      tcp->checksum = 0;
-      b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+      oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
     }
   else if (l4_proto == IP_PROTOCOL_UDP)
     {
-      udp_header_t *udp =
-       (udp_header_t *) (b0_data + vnet_buffer (b0)->l4_hdr_offset);
-      l4_hdr_sz = sizeof (*udp);
-      udp->checksum = 0;
-      b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+      l4_hdr_sz = sizeof (udp_header_t);
+      oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
     }
 
   if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP)
@@ -322,19 +324,23 @@ vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
       vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
       b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
     }
+
+  if (oflags)
+    vnet_buffer_offload_flags_set (b0, oflags);
 }
 
 static_always_inline void
-vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_vring_t * txvq,
+vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_intf_t * vui,
+                              vhost_user_vring_t * txvq,
                               vhost_user_vring_t * rxvq)
 {
   f64 now = vlib_time_now (vm);
 
   if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
-    vhost_user_send_call (vm, txvq);
+    vhost_user_send_call (vm, vui, txvq);
 
   if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
-    vhost_user_send_call (vm, rxvq);
+    vhost_user_send_call (vm, vui, rxvq);
 }
 
 static_always_inline void
@@ -376,11 +382,9 @@ vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
 }
 
 static_always_inline u32
-vhost_user_if_input (vlib_main_t * vm,
-                    vhost_user_main_t * vum,
-                    vhost_user_intf_t * vui,
-                    u16 qid, vlib_node_runtime_t * node,
-                    vnet_hw_if_rx_mode mode, u8 enable_csum)
+vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
+                    vhost_user_intf_t *vui, u16 qid,
+                    vlib_node_runtime_t *node, u8 enable_csum)
 {
   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
   vnet_feature_main_t *fm = &feature_main;
@@ -405,7 +409,7 @@ vhost_user_if_input (vlib_main_t * vm,
   {
     /* do we have pending interrupts ? */
     vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
-    vhost_user_input_do_interrupt (vm, txvq, rxvq);
+    vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
   }
 
   /*
@@ -415,7 +419,7 @@ vhost_user_if_input (vlib_main_t * vm,
    * When the traffic subsides, the scheduler switches the node back to
    * interrupt mode. We must tell the driver we want interrupt.
    */
-  if (PREDICT_FALSE (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+  if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
     {
       if ((node->flags &
           VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
@@ -513,7 +517,7 @@ vhost_user_if_input (vlib_main_t * vm,
       u32 bi_current;
       u16 desc_current;
       u32 desc_data_offset;
-      vring_desc_t *desc_table = txvq->desc;
+      vnet_virtio_vring_desc_t *desc_table = txvq->desc;
 
       if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
        {
@@ -545,10 +549,10 @@ vhost_user_if_input (vlib_main_t * vm,
       b_head->total_length_not_including_first_buffer = 0;
       b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
 
-      if (PREDICT_FALSE (n_trace))
+      if (PREDICT_FALSE
+         (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b_head,
+                                            /* follow_chain */ 0)))
        {
-         vlib_trace_buffer (vm, node, next_index, b_head,
-                            /* follow_chain */ 0);
          vhost_trace_t *t0 =
            vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
          vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx);
@@ -576,7 +580,7 @@ vhost_user_if_input (vlib_main_t * vm,
 
       if (enable_csum)
        {
-         virtio_net_hdr_mrg_rxbuf_t *hdr;
+         vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
          u8 *b_data;
          u16 current;
 
@@ -686,8 +690,6 @@ vhost_user_if_input (vlib_main_t * vm,
       last_avail_idx++;
       last_used_idx++;
 
-      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
-
       vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
       vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
       b_head->error = 0;
@@ -747,7 +749,7 @@ stop:
       txvq->n_since_last_int += n_rx_packets;
 
       if (txvq->n_since_last_int > vum->coalesce_frames)
-       vhost_user_send_call (vm, txvq);
+       vhost_user_send_call (vm, vui, txvq);
     }
 
   /* increase rx counters */
@@ -767,7 +769,7 @@ vhost_user_mark_desc_consumed (vhost_user_intf_t * vui,
                               vhost_user_vring_t * txvq, u16 desc_head,
                               u16 n_descs_processed)
 {
-  vring_packed_desc_t *desc_table = txvq->packed_desc;
+  vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
   u16 desc_idx;
   u16 mask = txvq->qsz_mask;
 
@@ -789,8 +791,8 @@ vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
                            u16 desc_current)
 {
   vhost_user_main_t *vum = &vhost_user_main;
-  vring_packed_desc_t *hdr_desc;
-  virtio_net_hdr_mrg_rxbuf_t *hdr;
+  vnet_virtio_vring_packed_desc_t *hdr_desc;
+  vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
   u32 hint = 0;
 
   clib_memset (t, 0, sizeof (*t));
@@ -871,10 +873,10 @@ vhost_user_input_copy_packed (vhost_user_intf_t * vui, vhost_copy_t * cpy,
       bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
       if (PREDICT_FALSE (bad))
        goto one_by_one;
-      CLIB_PREFETCH (src4, 64, LOAD);
-      CLIB_PREFETCH (src5, 64, LOAD);
-      CLIB_PREFETCH (src6, 64, LOAD);
-      CLIB_PREFETCH (src7, 64, LOAD);
+      clib_prefetch_load (src4);
+      clib_prefetch_load (src5);
+      clib_prefetch_load (src6);
+      clib_prefetch_load (src7);
 
       while (PREDICT_TRUE (copy_len >= 8))
        {
@@ -891,10 +893,10 @@ vhost_user_input_copy_packed (vhost_user_intf_t * vui, vhost_copy_t * cpy,
          if (PREDICT_FALSE (bad))
            break;
 
-         CLIB_PREFETCH (src4, 64, LOAD);
-         CLIB_PREFETCH (src5, 64, LOAD);
-         CLIB_PREFETCH (src6, 64, LOAD);
-         CLIB_PREFETCH (src7, 64, LOAD);
+         clib_prefetch_load (src4);
+         clib_prefetch_load (src5);
+         clib_prefetch_load (src6);
+         clib_prefetch_load (src7);
 
          clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
          clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
@@ -921,12 +923,13 @@ one_by_one:
 }
 
 static_always_inline u32
-vhost_user_do_offload (vhost_user_intf_t * vui,
-                      vring_packed_desc_t * desc_table, u16 desc_current,
-                      u16 mask, vlib_buffer_t * b_head, u32 * map_hint)
+vhost_user_do_offload (vhost_user_intf_t *vui,
+                      vnet_virtio_vring_packed_desc_t *desc_table,
+                      u16 desc_current, u16 mask, vlib_buffer_t *b_head,
+                      u32 *map_hint)
 {
   u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
-  virtio_net_hdr_mrg_rxbuf_t *hdr;
+  vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
   u8 *b_data;
   u32 desc_data_offset = vui->virtio_net_hdr_sz;
 
@@ -987,7 +990,7 @@ vhost_user_compute_indirect_desc_len (vhost_user_intf_t * vui,
                                      u32 buffer_data_size, u16 desc_current,
                                      u32 * map_hint)
 {
-  vring_packed_desc_t *desc_table = txvq->packed_desc;
+  vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
   u32 desc_len = 0;
   u16 desc_data_offset = vui->virtio_net_hdr_sz;
   u16 desc_idx = desc_current;
@@ -1013,7 +1016,7 @@ vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
                                     u32 buffer_data_size, u16 * current,
                                     u16 * n_left)
 {
-  vring_packed_desc_t *desc_table = txvq->packed_desc;
+  vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
   u32 desc_len = 0;
   u16 mask = txvq->qsz_mask;
 
@@ -1036,14 +1039,13 @@ vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
 }
 
 static_always_inline void
-vhost_user_assemble_packet (vring_packed_desc_t * desc_table,
-                           u16 * desc_idx, vlib_buffer_t * b_head,
-                           vlib_buffer_t ** b_current, u32 ** next,
-                           vlib_buffer_t *** b, u32 * bi_current,
-                           vhost_cpu_t * cpu, u16 * copy_len,
-                           u32 * buffers_used, u32 buffers_required,
-                           u32 * desc_data_offset, u32 buffer_data_size,
-                           u16 mask)
+vhost_user_assemble_packet (vnet_virtio_vring_packed_desc_t *desc_table,
+                           u16 *desc_idx, vlib_buffer_t *b_head,
+                           vlib_buffer_t **b_current, u32 **next,
+                           vlib_buffer_t ***b, u32 *bi_current,
+                           vhost_cpu_t *cpu, u16 *copy_len, u32 *buffers_used,
+                           u32 buffers_required, u32 *desc_data_offset,
+                           u32 buffer_data_size, u16 mask)
 {
   u32 desc_data_l;
 
@@ -1085,10 +1087,9 @@ vhost_user_assemble_packet (vring_packed_desc_t * desc_table,
 }
 
 static_always_inline u32
-vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
-                           vhost_user_intf_t * vui, u16 qid,
-                           vlib_node_runtime_t * node,
-                           vnet_hw_if_rx_mode mode, u8 enable_csum)
+vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
+                           vhost_user_intf_t *vui, u16 qid,
+                           vlib_node_runtime_t *node, u8 enable_csum)
 {
   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
   vnet_feature_main_t *fm = &feature_main;
@@ -1107,7 +1108,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
   u32 current_config_index = ~0;
   u16 mask = txvq->qsz_mask;
   u16 desc_current, desc_head, last_used_idx;
-  vring_packed_desc_t *desc_table = 0;
+  vnet_virtio_vring_packed_desc_t *desc_table = 0;
   u32 n_descs_processed = 0;
   u32 rv;
   vlib_buffer_t **b;
@@ -1121,7 +1122,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
 
   /* do we have pending interrupts ? */
   vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
-  vhost_user_input_do_interrupt (vm, txvq, rxvq);
+  vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
 
   /*
    * For adaptive mode, it is optimized to reduce interrupts.
@@ -1130,7 +1131,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
    * When the traffic subsides, the scheduler switches the node back to
    * interrupt mode. We must tell the driver we want interrupt.
    */
-  if (PREDICT_FALSE (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+  if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
     {
       if ((node->flags &
           VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
@@ -1319,8 +1320,6 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
       b_head->total_length_not_including_first_buffer -=
        b_head->current_length;
 
-      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
-
       vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
       vnet_buffer (b_head)->sw_if_index[VLIB_TX] = ~0;
       b_head->error = 0;
@@ -1367,17 +1366,19 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
       b = cpu->rx_buffers_pdesc;
       while (n_trace && left)
        {
-         vhost_trace_t *t0;
-
-         vlib_trace_buffer (vm, node, next_index, b[0],
-                            /* follow_chain */ 0);
-         t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
-         b++;
-         vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
-         last_used_idx = (last_used_idx + 1) & mask;
-         n_trace--;
+         if (PREDICT_TRUE
+             (vlib_trace_buffer
+              (vm, node, next_index, b[0], /* follow_chain */ 0)))
+           {
+             vhost_trace_t *t0;
+             t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
+             vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
+             last_used_idx = (last_used_idx + 1) & mask;
+             n_trace--;
+             vlib_set_trace_count (vm, node, n_trace);
+           }
          left--;
-         vlib_set_trace_count (vm, node, n_trace);
+         b++;
        }
     }
 
@@ -1392,7 +1393,7 @@ vhost_user_if_input_packed (vlib_main_t * vm, vhost_user_main_t * vum,
     {
       txvq->n_since_last_int += n_rx_packets;
       if (txvq->n_since_last_int > vum->coalesce_frames)
-       vhost_user_send_call (vm, txvq);
+       vhost_user_send_call (vm, vui, txvq);
     }
 
   /* increase rx counters */
@@ -1417,39 +1418,31 @@ VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
   vhost_user_intf_t *vui;
-  vnet_device_input_runtime_t *rt =
-    (vnet_device_input_runtime_t *) node->runtime_data;
-  vnet_device_and_queue_t *dq;
+  vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+  vnet_hw_if_rxq_poll_vector_t *pve;
 
-  vec_foreach (dq, rt->devices_and_queues)
-  {
-    if ((node->state == VLIB_NODE_STATE_POLLING) ||
-       clib_atomic_swap_acq_n (&dq->interrupt_pending, 0))
-      {
-       vui =
-         pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
-       if (vhost_user_is_packed_ring_supported (vui))
-         {
-           if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
-             n_rx_packets += vhost_user_if_input_packed (vm, vum, vui,
-                                                         dq->queue_id, node,
-                                                         dq->mode, 1);
-           else
-             n_rx_packets += vhost_user_if_input_packed (vm, vum, vui,
-                                                         dq->queue_id, node,
-                                                         dq->mode, 0);
-         }
-       else
-         {
-           if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
-             n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id,
-                                                  node, dq->mode, 1);
-           else
-             n_rx_packets += vhost_user_if_input (vm, vum, vui, dq->queue_id,
-                                                  node, dq->mode, 0);
-         }
-      }
-  }
+  vec_foreach (pve, pv)
+    {
+      vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance);
+      if (vhost_user_is_packed_ring_supported (vui))
+       {
+         if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+           n_rx_packets += vhost_user_if_input_packed (
+             vm, vum, vui, pve->queue_id, node, 1);
+         else
+           n_rx_packets += vhost_user_if_input_packed (
+             vm, vum, vui, pve->queue_id, node, 0);
+       }
+      else
+       {
+         if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+           n_rx_packets +=
+             vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1);
+         else
+           n_rx_packets +=
+             vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0);
+       }
+    }
 
   return n_rx_packets;
 }