Revert "L2-input: use vlib_buffer_enqueue_to_next" 17/14817/2
authorJohn Lo <loj@cisco.com>
Thu, 13 Sep 2018 22:10:25 +0000 (22:10 +0000)
committerNeale Ranns <nranns@cisco.com>
Fri, 14 Sep 2018 08:38:43 +0000 (08:38 +0000)
The patch did not improve single input link performance and
degrade performance with multiple input links.

This reverts commit b1232555e91b286feab5667b5a22f29aa8e96626.

Change-Id: Ib9336a2e0610088b9145a43cacbdadb52dabd522
Signed-off-by: John Lo <loj@cisco.com>
src/vnet/ethernet/packet.h
src/vnet/l2/l2_input.c

index 04ce420..d70960b 100644 (file)
@@ -64,20 +64,20 @@ typedef struct
 
 /* I/G bit: individual (unicast)/group (broadcast/multicast). */
 always_inline uword
-ethernet_address_cast (const u8 * a)
+ethernet_address_cast (u8 * a)
 {
   return (a[0] >> 0) & 1;
 }
 
 always_inline int
-ethernet_address_is_broadcast (const u8 * a)
+ethernet_address_is_broadcast (u8 * a)
 {
   return clib_mem_unaligned (a, u32) == 0xffffffff &&
     clib_mem_unaligned (a + 4, u16) == 0xffff;
 }
 
 always_inline uword
-ethernet_address_is_locally_administered (const u8 * a)
+ethernet_address_is_locally_administered (u8 * a)
 {
   return (a[0] >> 1) & 1;
 }
index fbf4a6b..7e41c88 100644 (file)
@@ -138,85 +138,9 @@ typedef enum
   L2INPUT_N_NEXT,
 } l2input_next_t;
 
-static_always_inline u32
-l2_input_classiy_unicast (vlib_buffer_t * b, const ethernet_header_t * h0)
-{
-  u32 feat_mask = ~0;
-  u8 *l3h0 = (u8 *) h0 + vnet_buffer (b)->l2.l2_len;
-
-#define get_u16(addr) ( *((u16 *)(addr)) )
-  u16 ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2));
-  u8 protocol = ((ip6_header_t *) l3h0)->protocol;
-
-  /* Disable bridge forwarding (flooding will execute instead if not xconnect) */
-  feat_mask &= ~(L2INPUT_FEAT_FWD |
-                L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_GBP_FWD);
-
-  /* Disable ARP-term for non-ARP and non-ICMP6 packet */
-  if (ethertype != ETHERNET_TYPE_ARP &&
-      (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6))
-    feat_mask &= ~(L2INPUT_FEAT_ARP_TERM);
-
-  /*
-   * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor
-   * solicitation packet from BVI to 0 so it can also flood to VXLAN
-   * tunnels or other ports with the same SHG as that of the BVI.
-   */
-  else if (PREDICT_FALSE (vnet_buffer (b)->sw_if_index[VLIB_TX] ==
-                         L2INPUT_BVI))
-    {
-      if (ethertype == ETHERNET_TYPE_ARP)
-       {
-         ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0;
-         if (arp0->opcode ==
-             clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request))
-           vnet_buffer (b)->l2.shg = 0;
-       }
-      else                     /* must be ICMPv6 */
-       {
-         ip6_header_t *iph0 = (ip6_header_t *) l3h0;
-         icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0;
-         ndh0 = ip6_next_header (iph0);
-         if (ndh0->icmp.type == ICMP6_neighbor_solicitation)
-           vnet_buffer (b)->l2.shg = 0;
-       }
-    }
-
-  return (feat_mask);
-}
 
 static_always_inline void
-l2_input_classify_bridge (l2_bridge_domain_t * bd_config,
-                         u16 bd_index,
-                         u32 sw_if_index, vlib_buffer_t * b, u32 * feat_mask)
-{
-  /* save BD ID for next feature graph nodes */
-  vnet_buffer (b)->l2.bd_index = bd_index;
-
-  /* Save bridge domain and interface seq_num */
-  /* *INDENT-OFF* */
-  l2fib_seq_num_t sn = {
-    .swif = *l2fib_swif_seq_num(sw_if_index),
-    .bd = bd_config->seq_num,
-  };
-  /* *INDENT-ON* */
-  vnet_buffer (b)->l2.l2fib_sn = sn.as_u16;;
-  vnet_buffer (b)->l2.bd_age = bd_config->mac_age;
-
-  /*
-   * Process bridge domain feature enables.
-   * To perform learning/flooding/forwarding, the corresponding bit
-   * must be enabled in both the input interface config and in the
-   * bridge domain config. In the bd_bitmap, bits for features other
-   * than learning/flooding/forwarding should always be set.
-   */
-  *feat_mask = *feat_mask & bd_config->feature_bitmap;
-}
-
-static_always_inline void
-classify_and_dispatch (l2input_main_t * msm,
-                      u16 n_left,
-                      u32 sw_if_index, vlib_buffer_t ** b, u16 * next)
+classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0)
 {
   /*
    * Load L2 input feature struct
@@ -232,202 +156,119 @@ classify_and_dispatch (l2input_main_t * msm,
    *   set tx sw-if-handle
    */
 
-  l2_input_config_t *config;
+  u32 feat_mask = ~0;
+  u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+  ethernet_header_t *h0 = vlib_buffer_get_current (b0);
 
   /* Get config for the input interface */
-  config = vec_elt_at_index (msm->configs, sw_if_index);
+  l2_input_config_t *config = vec_elt_at_index (msm->configs, sw_if_index0);
 
-  while (n_left > 8)
-    {
-      const ethernet_header_t *h0, *h1, *h2, *h3;
-      u32 fm0, fm1, fm2, fm3;
-      u32 fb0, fb1, fb2, fb3;
+  /* Save split horizon group */
+  vnet_buffer (b0)->l2.shg = config->shg;
 
-      if (n_left >= 4)
-       {
-         vlib_prefetch_buffer_header (b[0], LOAD);
-         vlib_prefetch_buffer_header (b[1], LOAD);
-         vlib_prefetch_buffer_header (b[2], LOAD);
-         vlib_prefetch_buffer_header (b[3], LOAD);
-       }
+  /* determine layer2 kind for stat and mask */
+  if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address)))
+    {
+      u8 *l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len;
 
-      fm0 = fm1 = fm2 = fm3 = ~0;
-      h0 = vlib_buffer_get_current (b[0]);
-      h1 = vlib_buffer_get_current (b[1]);
-      h2 = vlib_buffer_get_current (b[2]);
-      h3 = vlib_buffer_get_current (b[3]);
+#define get_u16(addr) ( *((u16 *)(addr)) )
+      u16 ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2));
+      u8 protocol = ((ip6_header_t *) l3h0)->protocol;
 
-      /* Save split horizon group */
-      vnet_buffer (b[0])->l2.shg = config->shg;
-      vnet_buffer (b[1])->l2.shg = config->shg;
-      vnet_buffer (b[2])->l2.shg = config->shg;
-      vnet_buffer (b[3])->l2.shg = config->shg;
+      /* Disable bridge forwarding (flooding will execute instead if not xconnect) */
+      feat_mask &= ~(L2INPUT_FEAT_FWD |
+                    L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_GBP_FWD);
 
-      /* determine layer2 kind for stat and mask */
-      if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address)))
-       {
-         fm0 = l2_input_classiy_unicast (b[0], h0);
-       }
-      else
-       {
-         if (PREDICT_FALSE (sw_if_index == L2INPUT_BVI))
-           vnet_buffer (b[0])->l2.shg = 0;
-       }
-      if (PREDICT_FALSE (ethernet_address_cast (h1->dst_address)))
-       {
-         fm1 = l2_input_classiy_unicast (b[1], h1);
-       }
-      else
-       {
-         if (PREDICT_FALSE (sw_if_index == L2INPUT_BVI))
-           vnet_buffer (b[1])->l2.shg = 0;
-       }
-      if (PREDICT_FALSE (ethernet_address_cast (h2->dst_address)))
-       {
-         fm2 = l2_input_classiy_unicast (b[2], h2);
-       }
-      else
-       {
-         if (PREDICT_FALSE (sw_if_index == L2INPUT_BVI))
-           vnet_buffer (b[1])->l2.shg = 0;
-       }
-      if (PREDICT_FALSE (ethernet_address_cast (h3->dst_address)))
-       {
-         fm3 = l2_input_classiy_unicast (b[3], h3);
-       }
-      else
-       {
-         if (PREDICT_FALSE (sw_if_index == L2INPUT_BVI))
-           vnet_buffer (b[1])->l2.shg = 0;
-       }
+      /* Disable ARP-term for non-ARP and non-ICMP6 packet */
+      if (ethertype != ETHERNET_TYPE_ARP &&
+         (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6))
+       feat_mask &= ~(L2INPUT_FEAT_ARP_TERM);
 
-      if (config->bridge)
-       {
-         /* Do bridge-domain processing */
-         l2_bridge_domain_t *bd_config;
-         u16 bd_index;
-
-         bd_index = config->bd_index;
-         bd_config = vec_elt_at_index (msm->bd_configs, bd_index);
-
-         l2_input_classify_bridge (bd_config, bd_index, sw_if_index,
-                                   b[0], &fm0);
-         l2_input_classify_bridge (bd_config, bd_index, sw_if_index,
-                                   b[1], &fm1);
-         l2_input_classify_bridge (bd_config, bd_index, sw_if_index,
-                                   b[2], &fm2);
-         l2_input_classify_bridge (bd_config, bd_index, sw_if_index,
-                                   b[3], &fm3);
-       }
-      else if (config->xconnect)
-       {
-         /* Set the output interface */
-         vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
-           config->output_sw_if_index;
-         vnet_buffer (b[1])->sw_if_index[VLIB_TX] =
-           config->output_sw_if_index;
-         vnet_buffer (b[2])->sw_if_index[VLIB_TX] =
-           config->output_sw_if_index;
-         vnet_buffer (b[3])->sw_if_index[VLIB_TX] =
-           config->output_sw_if_index;
-       }
-      else
+      /*
+       * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor
+       * solicitation packet from BVI to 0 so it can also flood to VXLAN
+       * tunnels or other ports with the same SHG as that of the BVI.
+       */
+      else if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
+                             L2INPUT_BVI))
        {
-         fm0 = L2INPUT_FEAT_DROP;
-         fm1 = L2INPUT_FEAT_DROP;
-         fm2 = L2INPUT_FEAT_DROP;
-         fm3 = L2INPUT_FEAT_DROP;
+         if (ethertype == ETHERNET_TYPE_ARP)
+           {
+             ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0;
+             if (arp0->opcode ==
+                 clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request))
+               vnet_buffer (b0)->l2.shg = 0;
+           }
+         else                  /* must be ICMPv6 */
+           {
+             ip6_header_t *iph0 = (ip6_header_t *) l3h0;
+             icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0;
+             ndh0 = ip6_next_header (iph0);
+             if (ndh0->icmp.type == ICMP6_neighbor_solicitation)
+               vnet_buffer (b0)->l2.shg = 0;
+           }
        }
-
-      /* mask out features from bitmap using packet type and bd config */
-      fb0 = config->feature_bitmap & fm0;
-      fb1 = config->feature_bitmap & fm1;
-      fb2 = config->feature_bitmap & fm2;
-      fb3 = config->feature_bitmap & fm3;
-
-      /* save for next feature graph nodes */
-      vnet_buffer (b[0])->l2.feature_bitmap = fb0;
-      vnet_buffer (b[1])->l2.feature_bitmap = fb1;
-      vnet_buffer (b[2])->l2.feature_bitmap = fb2;
-      vnet_buffer (b[3])->l2.feature_bitmap = fb3;
-
-      /* Determine the next node */
-      *next = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
-                                              fb0);
-      next++;
-      *next = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
-                                              fb1);
-      next++;
-      *next = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
-                                              fb2);
-      next++;
-      *next = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
-                                              fb3);
-      next++;
-
-      b += 4;
-      n_left -= 4;
     }
-  while (n_left)
+  else
     {
-      const ethernet_header_t *h0;
-      u32 fm0, fb0;
-
-      fm0 = ~0;
-      h0 = vlib_buffer_get_current (b[0]);
+      /*
+       * For packet from BVI - set SHG of unicast packet from BVI to 0 so it
+       * is not dropped on output to VXLAN tunnels or other ports with the
+       * same SHG as that of the BVI.
+       */
+      if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
+                        L2INPUT_BVI))
+       vnet_buffer (b0)->l2.shg = 0;
+    }
 
-      /* Save split horizon group */
-      vnet_buffer (b[0])->l2.shg = config->shg;
 
-      /* determine layer2 kind for stat and mask */
-      if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address)))
-       {
-         fm0 = l2_input_classiy_unicast (b[0], h0);
-       }
-      else
-       {
-         /*
-          * For packet from BVI - set SHG of unicast packet from BVI to 0 so it
-          * is not dropped on output to VXLAN tunnels or other ports with the
-          * same SHG as that of the BVI.
-          */
-         if (PREDICT_FALSE (sw_if_index == L2INPUT_BVI))
-           vnet_buffer (b[0])->l2.shg = 0;
-       }
+  if (config->bridge)
+    {
+      /* Do bridge-domain processing */
+      u16 bd_index0 = config->bd_index;
+      /* save BD ID for next feature graph nodes */
+      vnet_buffer (b0)->l2.bd_index = bd_index0;
 
-      if (config->bridge)
-       {
-         /* Do bridge-domain processing */
-         u16 bd_index = config->bd_index;
-         l2_bridge_domain_t *bd_config =
-           vec_elt_at_index (msm->bd_configs, bd_index);
+      /* Get config for the bridge domain interface */
+      l2_bridge_domain_t *bd_config =
+       vec_elt_at_index (msm->bd_configs, bd_index0);
 
-         l2_input_classify_bridge (bd_config, bd_index, sw_if_index,
-                                   b[0], &fm0);
-       }
-      else if (config->xconnect)
-       {
-         /* Set the output interface */
-         vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
-           config->output_sw_if_index;
-       }
-      else
-       fm0 = L2INPUT_FEAT_DROP;
+      /* Save bridge domain and interface seq_num */
+      /* *INDENT-OFF* */
+      l2fib_seq_num_t sn = {
+        .swif = *l2fib_swif_seq_num(sw_if_index0),
+       .bd = bd_config->seq_num,
+      };
+      /* *INDENT-ON* */
+      vnet_buffer (b0)->l2.l2fib_sn = sn.as_u16;;
+      vnet_buffer (b0)->l2.bd_age = bd_config->mac_age;
 
-      /* mask out features from bitmap using packet type and bd config */
-      fb0 = config->feature_bitmap & fm0;
+      /*
+       * Process bridge domain feature enables.
+       * To perform learning/flooding/forwarding, the corresponding bit
+       * must be enabled in both the input interface config and in the
+       * bridge domain config. In the bd_bitmap, bits for features other
+       * than learning/flooding/forwarding should always be set.
+       */
+      feat_mask = feat_mask & bd_config->feature_bitmap;
+    }
+  else if (config->xconnect)
+    {
+      /* Set the output interface */
+      vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index;
+    }
+  else
+    feat_mask = L2INPUT_FEAT_DROP;
 
-      /* save for next feature graph nodes */
-      vnet_buffer (b[0])->l2.feature_bitmap = fb0;
+  /* mask out features from bitmap using packet type and bd config */
+  u32 feature_bitmap = config->feature_bitmap & feat_mask;
 
-      /* Determine the next node */
-      *next = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
-                                              fb0);
+  /* save for next feature graph nodes */
+  vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
 
-      next += 1;
-      b += 1;
-      n_left -= 1;
-    }
+  /* Determine the next node */
+  *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+                                           feature_bitmap);
 }
 
 static_always_inline uword
@@ -435,88 +276,169 @@ l2input_node_inline (vlib_main_t * vm,
                     vlib_node_runtime_t * node, vlib_frame_t * frame,
                     int do_trace)
 {
-  u32 n_left, *from;
+  u32 n_left_from, *from, *to_next;
+  l2input_next_t next_index;
   l2input_main_t *msm = &l2input_main;
-  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
-  u16 nexts[VLIB_FRAME_SIZE];
-  u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index;
 
   from = vlib_frame_vector_args (frame);
-  n_left = frame->n_vectors;   /* number of packets to process */
-  vlib_get_buffers (vm, from, bufs, n_left);
-  b = bufs;
-  sw_if_index = sw_if_indices;
+  n_left_from = frame->n_vectors;      /* number of packets to process */
+  next_index = node->cached_next_index;
 
-  /* extract data from buffer metadata */
-  while (n_left >= 8)
-    {
-      /* Prefetch the buffer header for the N+2 loop iteration */
-      vlib_prefetch_buffer_header (b[4], LOAD);
-      vlib_prefetch_buffer_header (b[5], LOAD);
-      vlib_prefetch_buffer_header (b[6], LOAD);
-      vlib_prefetch_buffer_header (b[7], LOAD);
-
-      sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
-      sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
-      sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
-      sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
-
-      /* next */
-      sw_if_index += 4;
-      n_left -= 4;
-      b += 4;
-    }
-  while (n_left)
+  while (n_left_from > 0)
     {
-      sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
-
-      /* next */
-      sw_if_index += 1;
-      n_left -= 1;
-      b += 1;
-    }
+      u32 n_left_to_next;
 
-  n_left = frame->n_vectors;
+      /* get space to enqueue frame to graph node "next_index" */
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-  while (n_left)
-    {
-      u16 count, *next;
-      u16 off = frame->n_vectors - n_left;
-      b = bufs + off;
-
-      sw_if_index = sw_if_indices + off;
-      next = nexts + off;
-
-      count = clib_count_equal_u32 (sw_if_index, n_left);
-      n_left -= count;
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+       {
+         u32 bi0, bi1, bi2, bi3;
+         vlib_buffer_t *b0, *b1, *b2, *b3;
+         u32 next0, next1, next2, next3;
+         u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+
+         /* Prefetch next iteration. */
+         {
+           vlib_buffer_t *p4, *p5, *p6, *p7;
+
+           p4 = vlib_get_buffer (vm, from[4]);
+           p5 = vlib_get_buffer (vm, from[5]);
+           p6 = vlib_get_buffer (vm, from[6]);
+           p7 = vlib_get_buffer (vm, from[7]);
+
+           /* Prefetch the buffer header and packet for the N+2 loop iteration */
+           vlib_prefetch_buffer_header (p4, LOAD);
+           vlib_prefetch_buffer_header (p5, LOAD);
+           vlib_prefetch_buffer_header (p6, LOAD);
+           vlib_prefetch_buffer_header (p7, LOAD);
+
+           CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+           CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+           CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+           CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+           /*
+            * Don't bother prefetching the bridge-domain config (which
+            * depends on the input config above). Only a small number of
+            * bridge domains are expected. Plus the structure is small
+            * and several fit in a cache line.
+            */
+         }
+
+         /* speculatively enqueue b0 and b1 to the current next frame */
+         /* bi is "buffer index", b is pointer to the buffer */
+         to_next[0] = bi0 = from[0];
+         to_next[1] = bi1 = from[1];
+         to_next[2] = bi2 = from[2];
+         to_next[3] = bi3 = from[3];
+         from += 4;
+         to_next += 4;
+         n_left_from -= 4;
+         n_left_to_next -= 4;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         b1 = vlib_get_buffer (vm, bi1);
+         b2 = vlib_get_buffer (vm, bi2);
+         b3 = vlib_get_buffer (vm, bi3);
+
+         if (do_trace)
+           {
+             /* RX interface handles */
+             sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+             sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+             sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+             sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
 
-      classify_and_dispatch (msm, count, sw_if_index[0], b, next);
-    }
+             if (b0->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+                 l2input_trace_t *t =
+                   vlib_add_trace (vm, node, b0, sizeof (*t));
+                 t->sw_if_index = sw_if_index0;
+                 clib_memcpy (t->src, h0->src_address, 6);
+                 clib_memcpy (t->dst, h0->dst_address, 6);
+               }
+             if (b1->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+                 l2input_trace_t *t =
+                   vlib_add_trace (vm, node, b1, sizeof (*t));
+                 t->sw_if_index = sw_if_index1;
+                 clib_memcpy (t->src, h1->src_address, 6);
+                 clib_memcpy (t->dst, h1->dst_address, 6);
+               }
+             if (b2->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ethernet_header_t *h2 = vlib_buffer_get_current (b2);
+                 l2input_trace_t *t =
+                   vlib_add_trace (vm, node, b2, sizeof (*t));
+                 t->sw_if_index = sw_if_index2;
+                 clib_memcpy (t->src, h2->src_address, 6);
+                 clib_memcpy (t->dst, h2->dst_address, 6);
+               }
+             if (b3->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ethernet_header_t *h3 = vlib_buffer_get_current (b3);
+                 l2input_trace_t *t =
+                   vlib_add_trace (vm, node, b3, sizeof (*t));
+                 t->sw_if_index = sw_if_index3;
+                 clib_memcpy (t->src, h3->src_address, 6);
+                 clib_memcpy (t->dst, h3->dst_address, 6);
+               }
+           }
 
-  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-    {
-      n_left = frame->n_vectors;       /* number of packets to process */
-      b = bufs;
+         classify_and_dispatch (msm, b0, &next0);
+         classify_and_dispatch (msm, b1, &next1);
+         classify_and_dispatch (msm, b2, &next2);
+         classify_and_dispatch (msm, b3, &next3);
+
+         /* verify speculative enqueues, maybe switch current next frame */
+         /* if next0==next1==next_index then nothing special needs to be done */
+         vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, bi1, bi2, bi3,
+                                          next0, next1, next2, next3);
+       }
 
-      while (n_left)
+      while (n_left_from > 0 && n_left_to_next > 0)
        {
-         if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         u32 sw_if_index0;
+
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
            {
-             ethernet_header_t *h0 = vlib_buffer_get_current (b[0]);
-             l2input_trace_t *t =
-               vlib_add_trace (vm, node, b[0], sizeof (*t));
-
-             t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+             ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+             l2input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+             t->sw_if_index = sw_if_index0;
              clib_memcpy (t->src, h0->src_address, 6);
              clib_memcpy (t->dst, h0->dst_address, 6);
            }
-         /* next */
-         n_left--;
-         b++;
+
+         classify_and_dispatch (msm, b0, &next0);
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
        }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
   vlib_node_increment_counter (vm, l2input_node.index,
                               L2INPUT_ERROR_L2INPUT, frame->n_vectors);