Add support for multiple microarchitectures in single binary
[vpp.git] / vnet / vnet / devices / dpdk / node.c
index 07273cb..ca94511 100644 (file)
@@ -68,7 +68,7 @@ static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *);
-  
+
   s = format (s, "HANDOFF_DISPATCH: sw_if_index %d next_index %d buffer 0x%x",
       t->sw_if_index,
       t->next_index,
@@ -154,25 +154,28 @@ handoff_dispatch_node_fn (vlib_main_t * vm,
           next0 = vnet_buffer(b0)->io_handoff.next_index;
           next1 = vnet_buffer(b1)->io_handoff.next_index;
 
-          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+          if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
             {
-              vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
-              handoff_dispatch_trace_t *t =
-                vlib_add_trace (vm, node, b0, sizeof (*t));
-              sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-              t->sw_if_index = sw_if_index0;
-              t->next_index = next0;
-              t->buffer_index = bi0;
-            }
-          if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
-            {
-              vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0);
-              handoff_dispatch_trace_t *t =
-                vlib_add_trace (vm, node, b1, sizeof (*t));
-              sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
-              t->sw_if_index = sw_if_index1;
-              t->next_index = next1;
-              t->buffer_index = bi1;
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+              {
+                vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
+                handoff_dispatch_trace_t *t =
+                  vlib_add_trace (vm, node, b0, sizeof (*t));
+                sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+                t->sw_if_index = sw_if_index0;
+                t->next_index = next0;
+                t->buffer_index = bi0;
+              }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+              {
+                vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0);
+                handoff_dispatch_trace_t *t =
+                  vlib_add_trace (vm, node, b1, sizeof (*t));
+                sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+                t->sw_if_index = sw_if_index1;
+                t->next_index = next1;
+                t->buffer_index = bi1;
+              }
             }
             
           /* verify speculative enqueues, maybe switch current next frame */
@@ -200,16 +203,19 @@ handoff_dispatch_node_fn (vlib_main_t * vm,
 
           next0 = vnet_buffer(b0)->io_handoff.next_index;
 
-          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+          if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
             {
-              vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
-              handoff_dispatch_trace_t *t =
-                vlib_add_trace (vm, node, b0, sizeof (*t));
-              sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-              t->sw_if_index = sw_if_index0;
-              t->next_index = next0;
-              t->buffer_index = bi0;
-           }
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+              {
+                vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
+                handoff_dispatch_trace_t *t =
+                  vlib_add_trace (vm, node, b0, sizeof (*t));
+                sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+                t->sw_if_index = sw_if_index0;
+                t->next_index = next0;
+                t->buffer_index = bi0;
+              }
+            }
 
           /* verify speculative enqueue, maybe switch current next frame */
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
@@ -245,6 +251,8 @@ VLIB_REGISTER_NODE (handoff_dispatch_node) = {
   },
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
+
 clib_error_t *handoff_dispatch_init (vlib_main_t *vm)
 {
   handoff_dispatch_main_t * mp = &handoff_dispatch_main;
@@ -268,53 +276,6 @@ static char * dpdk_error_strings[] = {
 #undef _
 };
 
-typedef struct {
-  u32 buffer_index;
-  u16 device_index;
-  u16 queue_index;
-  struct rte_mbuf mb;
-  vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
-} dpdk_rx_dma_trace_t;
-
-static u8 * format_dpdk_rx_dma_trace (u8 * s, va_list * va)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
-  CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main();
-  dpdk_rx_dma_trace_t * t = va_arg (*va, dpdk_rx_dma_trace_t *);
-  dpdk_main_t * dm = &dpdk_main;
-  dpdk_device_t * xd = vec_elt_at_index (dm->devices, t->device_index);
-  format_function_t * f;
-  uword indent = format_get_indent (s);
-  vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
-
-  s = format (s, "%U rx queue %d",
-             format_vnet_sw_interface_name, vnm, sw,
-             t->queue_index);
-
-  s = format (s, "\n%Ubuffer 0x%x: %U",
-             format_white_space, indent,
-             t->buffer_index,
-             format_vlib_buffer, &t->buffer);
-
-#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
-  s = format (s, "\n%U%U",
-             format_white_space, indent,
-             format_dpdk_rx_rte_mbuf, &t->mb);
-#else
-  s = format (s, "\n%U%U",
-             format_white_space, indent,
-             format_dpdk_rte_mbuf, &t->mb);
-#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
-  f = node->format_buffer;
-  if (!f)
-    f = format_hex_bytes;
-  s = format (s, "\n%U%U", format_white_space, indent,
-             f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
-
-  return s;
-}
-
 always_inline void
 dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb,
                                          vlib_buffer_t *b0,
@@ -407,7 +368,7 @@ void dpdk_rx_trace (dpdk_main_t * dm,
       n_left -= 1;
 
       b0 = vlib_get_buffer (vm, bi0);
-      mb = ((struct rte_mbuf *)b0) - 1;
+      mb = rte_mbuf_from_vlib_buffer(b0);
       dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0,
                                               &next0, &error0);
       vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
@@ -416,9 +377,9 @@ void dpdk_rx_trace (dpdk_main_t * dm,
       t0->device_index = xd->device_index;
       t0->buffer_index = bi0;
 
-      memcpy (&t0->mb, mb, sizeof (t0->mb));
-      memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
-      memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data));
+      clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
+      clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+      clib_memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data));
 
 #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
       /*
@@ -529,7 +490,8 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
                                       dpdk_device_t * xd,
                                       vlib_node_runtime_t * node,
                                       u32 cpu_index,
-                                      u16 queue_id)
+                                      u16 queue_id,
+                                      int use_efd)
 {
   u32 n_buffers;
   u32 next_index = DPDK_RX_NEXT_ETHERNET_INPUT;
@@ -551,7 +513,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
   if (n_buffers == 0)
     {
       /* check if EFD (dpdk) is enabled */
-      if (PREDICT_FALSE(dm->efd.enabled))
+      if (PREDICT_FALSE(use_efd && dm->efd.enabled))
         {
           /* reset a few stats */
           xd->efd_agent.last_poll_time = 0;
@@ -587,7 +549,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
   /* Check for congestion if EFD (Early-Fast-Discard) is enabled
    * in any mode (e.g. dpdk, monitor, or drop_all)
    */
-  if (PREDICT_FALSE(dm->efd.enabled))
+  if (PREDICT_FALSE(use_efd && dm->efd.enabled))
     {
       /* update EFD counters */
       dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled);
@@ -643,20 +605,20 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
           if (PREDICT_TRUE(n_buffers > 2))
           {
               struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2];
-              vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1);
+              vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb);
               CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, STORE);
               CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE);
           }
 
           ASSERT(mb);
 
-          b0 = (vlib_buffer_t *)(mb+1);
+          b0 = vlib_buffer_from_rte_mbuf(mb);
 
           /* check whether EFD is looking for packets to discard */
           if (PREDICT_FALSE(efd_discard_burst))
             {
               vlib_thread_main_t * tm = vlib_get_thread_main();
-              
+
               if (PREDICT_TRUE(cntr_type = is_efd_discardable(tm, b0, mb)))
                 {
                   rte_pktmbuf_free(mb);
@@ -674,7 +636,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
           if (PREDICT_FALSE(mb->nb_segs > 1))
             {
               struct rte_mbuf *pfmb = mb->next;
-              vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1);
+              vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb);
               CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD);
               CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE);
              b_chain = b0;
@@ -734,7 +696,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
            {
              ASSERT(mb_seg != 0);
 
-             b_seg = (vlib_buffer_t *)(mb_seg+1);
+             b_seg = vlib_buffer_from_rte_mbuf(mb_seg);
              vlib_buffer_init_for_free_list (b_seg, fl);
               b_seg->clone_count = 0;
 
@@ -834,7 +796,7 @@ dpdk_input (vlib_main_t * vm,
     {
       xd = vec_elt_at_index(dm->devices, dq->device);
       ASSERT(dq->queue_id == 0);
-      n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0);
+      n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0);
     }
 
   VIRL_SPEED_LIMIT()
@@ -859,7 +821,32 @@ dpdk_input_rss (vlib_main_t * vm,
   vec_foreach (dq, dm->devices_by_cpu[cpu_index])
     {
       xd = vec_elt_at_index(dm->devices, dq->device);
-      n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id);
+      n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0);
+    }
+
+  VIRL_SPEED_LIMIT()
+
+  return n_rx_packets;
+}
+
+uword
+dpdk_input_efd (vlib_main_t * vm,
+      vlib_node_runtime_t * node,
+      vlib_frame_t * f)
+{
+  dpdk_main_t * dm = &dpdk_main;
+  dpdk_device_t * xd;
+  uword n_rx_packets = 0;
+  dpdk_device_and_queue_t * dq;
+  u32 cpu_index = os_get_cpu_number();
+
+  /*
+   * Poll all devices on this cpu for input/interrupts.
+   */
+  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
+    {
+      xd = vec_elt_at_index(dm->devices, dq->device);
+      n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1);
     }
 
   VIRL_SPEED_LIMIT()
@@ -867,6 +854,7 @@ dpdk_input_rss (vlib_main_t * vm,
   return n_rx_packets;
 }
 
+
 VLIB_REGISTER_NODE (dpdk_input_node) = {
   .function = dpdk_input,
   .type = VLIB_NODE_TYPE_INPUT,
@@ -891,6 +879,17 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
   },
 };
 
+
+/* handle dpdk_input_rss alternative function */
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input)
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_rss)
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd)
+
+/* this macro defines dpdk_input_rss_multiarch_select() */
+CLIB_MULTIARCH_SELECT_FN(dpdk_input);
+CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss);
+CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd);
+
 /*
  * Override the next nodes for the dpdk input nodes.
  * Must be invoked prior to VLIB_INIT_FUNCTION calls.
@@ -1004,9 +1003,9 @@ static inline u64 ipv6_get_key (ip6_header_t *ip)
    u64  hash_key;
 
    hash_key = ip->src_address.as_u64[0] ^
-              ip->src_address.as_u64[1] ^
-              ip->dst_address.as_u64[0] ^
-              ip->dst_address.as_u64[1] ^
+              rotate_left(ip->src_address.as_u64[1],13) ^
+              rotate_left(ip->dst_address.as_u64[0],26) ^
+              rotate_left(ip->dst_address.as_u64[1],39) ^
               ip->protocol;
 
    return hash_key;
@@ -1142,7 +1141,7 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
   u32 num_devices = 0;
   uword * p;
   u16 queue_id = 0;
-  vlib_node_runtime_t * node_trace;
+  vlib_node_runtime_t * node_trace = 0;
   u32 first_worker_index = 0;
   u32 buffer_flags_template;
   
@@ -1186,9 +1185,6 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
                            first_worker_index + num_workers - 1,
                            (vlib_frame_queue_t *)(~0));
 
-  /* packet tracing is triggered on the dpdk-input node for ease-of-use */
-  node_trace = vlib_node_get_runtime (vm, dpdk_input_node.index);
-
   buffer_flags_template = dm->buffer_flags_template;
 
   /* And handle them... */
@@ -1245,8 +1241,19 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
               continue;
             }
 
-          vec_reset_length (xd->d_trace_buffers);
-          trace_cnt = n_trace = vlib_get_trace_count (vm, node_trace);
+          trace_cnt = n_trace = 0;
+          if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
+            {
+              /*
+               * packet tracing is triggered on the dpdk-input node for
+               * ease-of-use. Re-fetch the node_runtime for dpdk-input
+               * in case it has changed.
+               */
+              node_trace = vlib_node_get_runtime (vm, dpdk_input_node.index);
+
+              vec_reset_length (xd->d_trace_buffers);
+              trace_cnt = n_trace = vlib_get_trace_count (vm, node_trace);
+            }
         
           /*
            * DAW-FIXME: VMXNET3 device stop/start doesn't work, 
@@ -1321,13 +1328,13 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
               if (PREDICT_TRUE(n_buffers > 1))
                 {
                   struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2];
-                  vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1);
+                  vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb);
                   CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD);
                   CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE);
                   CLIB_PREFETCH (bp->data, CLIB_CACHE_LINE_BYTES, LOAD);
                 }
                 
-              b0 = (vlib_buffer_t *)(mb+1);
+              b0 = vlib_buffer_from_rte_mbuf(mb);
 
               /* check whether EFD is looking for packets to discard */
               if (PREDICT_FALSE(efd_discard_burst))
@@ -1351,7 +1358,7 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
               if (PREDICT_FALSE(mb->nb_segs > 1))
                 {
                   struct rte_mbuf *pfmb = mb->next;
-                  vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1);
+                  vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb);
                   CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD);
                   CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE);
                   b_chain = b0;
@@ -1402,7 +1409,7 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
                 {
                   ASSERT(mb_seg != 0);
  
-                  b_seg = (vlib_buffer_t *)(mb_seg+1);
+                  b_seg = vlib_buffer_from_rte_mbuf(mb_seg);
                   vlib_buffer_init_for_free_list (b_seg, fl);
                   b_seg->clone_count = 0;
  
@@ -1722,14 +1729,14 @@ dpdk_io_input (vlib_main_t * vm,
           if (PREDICT_TRUE(n_buffers > 1))
             {
               struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2];
-              vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1);
+              vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb);
               CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD);
               CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE);
               CLIB_PREFETCH (bp->data, CLIB_CACHE_LINE_BYTES, LOAD);
             }
-                
-          b0 = (vlib_buffer_t *)(mb+1);
-                
+
+          b0 = vlib_buffer_from_rte_mbuf(mb);
+
           /* check whether EFD is looking for packets to discard */
           if (PREDICT_FALSE(efd_discard_burst))
             {
@@ -1752,7 +1759,7 @@ dpdk_io_input (vlib_main_t * vm,
           if (PREDICT_FALSE(mb->nb_segs > 1))
             {
               struct rte_mbuf *pfmb = mb->next;
-              vlib_buffer_t *bp = (vlib_buffer_t *)(pfmb+1);
+              vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb);
               CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD);
               CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE);
               b_chain = b0;
@@ -1803,7 +1810,7 @@ dpdk_io_input (vlib_main_t * vm,
             {
               ASSERT(mb_seg != 0);
  
-              b_seg = (vlib_buffer_t *)(mb_seg+1);
+              b_seg = vlib_buffer_from_rte_mbuf(mb_seg);
               vlib_buffer_init_for_free_list (b_seg, fl);
               b_seg->clone_count = 0;