misc: remove GNU Indent directives
[vpp.git] / src / vnet / pg / input.c
index ee6aad4..f81485d 100644 (file)
  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+  /*
+   * To be honest, the packet generator needs an extreme
+   * makeover. Two key assumptions which drove the current implementation
+   * are no longer true. First, buffer managers implement a
+   * post-TX recycle list. Second, that packet generator performance
+   * is first-order important.
+   */
+
 #include <vlib/vlib.h>
 #include <vnet/pg/pg.h>
 #include <vnet/vnet.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/feature/feature.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
 #include <vnet/devices/devices.h>
+#include <vnet/gso/gro_func.h>
 
 static int
 validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s,
@@ -66,7 +78,7 @@ validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s,
   if (i >= n_bytes)
     return 1;
 
-  clib_warning ("buffer %U", format_vnet_buffer, b);
+  clib_warning ("buffer %U", format_vnet_buffer_no_chain, b);
   clib_warning ("differ at index %d", i);
   clib_warning ("is     %U", format_hex_bytes, bd, n_bytes);
   clib_warning ("mask   %U", format_hex_bytes, pm, n_bytes);
@@ -953,7 +965,7 @@ pg_generate_fix_multi_buffer_lengths (pg_main_t * pg,
   if (vec_len (unused_buffers) > 0)
     {
       vlib_buffer_free_no_next (vm, unused_buffers, vec_len (unused_buffers));
-      _vec_len (unused_buffers) = 0;
+      vec_set_len (unused_buffers, 0);
     }
 }
 
@@ -1054,49 +1066,6 @@ pg_set_next_buffer_pointers (pg_main_t * pg,
     }
 }
 
-static_always_inline void
-init_replay_buffers_inline (vlib_main_t * vm,
-                           pg_stream_t * s,
-                           u32 * buffers,
-                           u32 n_buffers, u32 data_offset, u32 n_data)
-{
-  u32 n_left, *b, i, l;
-
-  n_left = n_buffers;
-  b = buffers;
-  i = s->current_replay_packet_index;
-  l = vec_len (s->replay_packet_templates);
-
-  while (n_left >= 1)
-    {
-      u32 bi0, n0;
-      vlib_buffer_t *b0;
-      u8 *d0;
-
-      bi0 = b[0];
-      b += 1;
-      n_left -= 1;
-
-      b0 = vlib_get_buffer (vm, bi0);
-
-      vnet_buffer (b0)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
-      /* was s->sw_if_index[VLIB_TX]; */
-      vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-
-      d0 = vec_elt (s->replay_packet_templates, i);
-      vnet_buffer2 (b0)->pg_replay_timestamp = s->replay_packet_timestamps[i];
-
-      n0 = n_data;
-      if (data_offset + n_data >= vec_len (d0))
-       n0 = vec_len (d0) > data_offset ? vec_len (d0) - data_offset : 0;
-
-      b0->current_length = n0;
-
-      clib_memcpy_fast (b0->data, d0 + data_offset, n0);
-      i = i + 1 == l ? 0 : i + 1;
-    }
-}
-
 static_always_inline void
 init_buffers_inline (vlib_main_t * vm,
                     pg_stream_t * s,
@@ -1106,9 +1075,7 @@ init_buffers_inline (vlib_main_t * vm,
   u32 n_left, *b;
   u8 *data, *mask;
 
-  if (vec_len (s->replay_packet_templates) > 0)
-    return init_replay_buffers_inline (vm, s, buffers, n_buffers, data_offset,
-                                      n_data);
+  ASSERT (s->replay_packet_templates == 0);
 
   data = s->fixed_packet_data + data_offset;
   mask = s->fixed_packet_data_mask + data_offset;
@@ -1140,12 +1107,14 @@ init_buffers_inline (vlib_main_t * vm,
 
       b0 = vlib_get_buffer (vm, bi0);
       b1 = vlib_get_buffer (vm, bi1);
+      b0->flags |= s->buffer_flags;
+      b1->flags |= s->buffer_flags;
 
       vnet_buffer (b0)->sw_if_index[VLIB_RX] =
        vnet_buffer (b1)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
 
       vnet_buffer (b0)->sw_if_index[VLIB_TX] =
-       vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+       vnet_buffer (b1)->sw_if_index[VLIB_TX] = s->sw_if_index[VLIB_TX];
 
       if (set_data)
        {
@@ -1169,9 +1138,9 @@ init_buffers_inline (vlib_main_t * vm,
       n_left -= 1;
 
       b0 = vlib_get_buffer (vm, bi0);
+      b0->flags |= s->buffer_flags;
       vnet_buffer (b0)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
-      /* s->sw_if_index[VLIB_TX]; */
-      vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+      vnet_buffer (b0)->sw_if_index[VLIB_TX] = s->sw_if_index[VLIB_TX];
 
       if (set_data)
        clib_memcpy_fast (b0->data, data, n_data);
@@ -1190,10 +1159,9 @@ pg_stream_fill_helper (pg_main_t * pg,
   uword is_start_of_packet = bi == s->buffer_indices;
   u32 n_allocated;
 
-  n_allocated = vlib_buffer_alloc_from_free_list (vm,
-                                                 buffers,
-                                                 n_alloc,
-                                                 bi->free_list_index);
+  ASSERT (vec_len (s->replay_packet_templates) == 0);
+
+  n_allocated = vlib_buffer_alloc (vm, buffers, n_alloc);
   if (n_allocated == 0)
     return 0;
 
@@ -1216,37 +1184,134 @@ pg_stream_fill_helper (pg_main_t * pg,
 
   if (is_start_of_packet)
     {
-      if (vec_len (s->replay_packet_templates) > 0)
+      pg_generate_set_lengths (pg, s, buffers, n_alloc);
+      if (vec_len (s->buffer_indices) > 1)
+       pg_generate_fix_multi_buffer_lengths (pg, s, buffers, n_alloc);
+
+      pg_generate_edit (pg, s, buffers, n_alloc);
+    }
+
+  return n_alloc;
+}
+
+static u32
+pg_stream_fill_replay (pg_main_t * pg, pg_stream_t * s, u32 n_alloc)
+{
+  pg_buffer_index_t *bi;
+  u32 n_left, i, l;
+  u32 buffer_alloc_request = 0;
+  u32 buffer_alloc_result;
+  u32 current_buffer_index;
+  u32 *buffers;
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 buf_sz = vlib_buffer_get_default_data_size (vm);
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_sw_interface_t *si;
+
+  buffers = pg->replay_buffers_by_thread[vm->thread_index];
+  vec_reset_length (buffers);
+  bi = s->buffer_indices;
+
+  n_left = n_alloc;
+  i = s->current_replay_packet_index;
+  l = vec_len (s->replay_packet_templates);
+
+  /* Figure out how many buffers we need */
+  while (n_left > 0)
+    {
+      u8 *d0;
+
+      d0 = vec_elt (s->replay_packet_templates, i);
+      buffer_alloc_request += (vec_len (d0) + (buf_sz - 1)) / buf_sz;
+
+      i = ((i + 1) == l) ? 0 : i + 1;
+      n_left--;
+    }
+
+  ASSERT (buffer_alloc_request > 0);
+  vec_validate (buffers, buffer_alloc_request - 1);
+
+  /* Allocate that many buffers */
+  buffer_alloc_result = vlib_buffer_alloc (vm, buffers, buffer_alloc_request);
+  if (buffer_alloc_result < buffer_alloc_request)
+    {
+      clib_warning ("alloc failure, got %d not %d", buffer_alloc_result,
+                   buffer_alloc_request);
+      vlib_buffer_free_no_next (vm, buffers, buffer_alloc_result);
+      pg->replay_buffers_by_thread[vm->thread_index] = buffers;
+      return 0;
+    }
+
+  /* Now go generate the buffers, and add them to the FIFO */
+  n_left = n_alloc;
+
+  current_buffer_index = 0;
+  i = s->current_replay_packet_index;
+  l = vec_len (s->replay_packet_templates);
+  while (n_left > 0)
+    {
+      u8 *d0;
+      int not_last;
+      u32 data_offset;
+      u32 bytes_to_copy, bytes_this_chunk;
+      vlib_buffer_t *b;
+
+      d0 = vec_elt (s->replay_packet_templates, i);
+      data_offset = 0;
+      bytes_to_copy = vec_len (d0);
+
+      /* Add head chunk to pg fifo */
+      clib_fifo_add1 (bi->buffer_fifo, buffers[current_buffer_index]);
+
+      /* Copy the data */
+      while (bytes_to_copy)
        {
-         vnet_main_t *vnm = vnet_get_main ();
-         vnet_interface_main_t *im = &vnm->interface_main;
-         vnet_sw_interface_t *si =
-           vnet_get_sw_interface (vnm, s->sw_if_index[VLIB_RX]);
-         u32 l = 0;
-         u32 i;
-         for (i = 0; i < n_alloc; i++)
-           l += vlib_buffer_index_length_in_chain (vm, buffers[i]);
-         vlib_increment_combined_counter (im->combined_sw_if_counters
-                                          + VNET_INTERFACE_COUNTER_RX,
-                                          vlib_get_thread_index (),
-                                          si->sw_if_index, n_alloc, l);
-         s->current_replay_packet_index += n_alloc;
-         s->current_replay_packet_index %=
-           vec_len (s->replay_packet_templates);
+         bytes_this_chunk = clib_min (bytes_to_copy, buf_sz);
+         ASSERT (current_buffer_index < vec_len (buffers));
+         b = vlib_get_buffer (vm, buffers[current_buffer_index]);
+         clib_memcpy_fast (b->data, d0 + data_offset, bytes_this_chunk);
+         vnet_buffer (b)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
+         vnet_buffer (b)->sw_if_index[VLIB_TX] = s->sw_if_index[VLIB_TX];
+         b->flags = s->buffer_flags;
+         b->next_buffer = 0;
+         b->current_data = 0;
+         b->current_length = bytes_this_chunk;
+
+         not_last = bytes_this_chunk < bytes_to_copy;
+         if (not_last)
+           {
+             ASSERT (current_buffer_index < (vec_len (buffers) - 1));
+             b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+             b->next_buffer = buffers[current_buffer_index + 1];
+           }
+         bytes_to_copy -= bytes_this_chunk;
+         data_offset += bytes_this_chunk;
+         current_buffer_index++;
        }
-      else
-       {
-         pg_generate_set_lengths (pg, s, buffers, n_alloc);
-         if (vec_len (s->buffer_indices) > 1)
-           pg_generate_fix_multi_buffer_lengths (pg, s, buffers, n_alloc);
 
-         pg_generate_edit (pg, s, buffers, n_alloc);
-       }
+      i = ((i + 1) == l) ? 0 : i + 1;
+      n_left--;
     }
 
+  /* Update the interface counters */
+  si = vnet_get_sw_interface (vnm, s->sw_if_index[VLIB_RX]);
+  l = 0;
+  for (i = 0; i < n_alloc; i++)
+    l += vlib_buffer_index_length_in_chain (vm, buffers[i]);
+  vlib_increment_combined_counter (im->combined_sw_if_counters
+                                  + VNET_INTERFACE_COUNTER_RX,
+                                  vlib_get_thread_index (),
+                                  si->sw_if_index, n_alloc, l);
+
+  s->current_replay_packet_index += n_alloc;
+  s->current_replay_packet_index %= vec_len (s->replay_packet_templates);
+
+  pg->replay_buffers_by_thread[vm->thread_index] = buffers;
   return n_alloc;
 }
 
+
 static u32
 pg_stream_fill (pg_main_t * pg, pg_stream_t * s, u32 n_buffers)
 {
@@ -1273,6 +1338,12 @@ pg_stream_fill (pg_main_t * pg, pg_stream_t * s, u32 n_buffers)
        n_alloc = 0;
     }
 
+  /*
+   * Handle pcap replay directly
+   */
+  if (s->replay_packet_templates)
+    return pg_stream_fill_replay (pg, s, n_alloc);
+
   /* All buffer fifos should have the same size. */
   if (CLIB_DEBUG > 0)
     {
@@ -1362,10 +1433,10 @@ format_pg_input_trace (u8 * s, va_list * va)
     s = format (s, "stream %d", t->stream_index);
 
   s = format (s, ", %d bytes", t->packet_length);
-  s = format (s, ", %d sw_if_index", t->sw_if_index);
+  s = format (s, ", sw_if_index %d", t->sw_if_index);
 
-  s = format (s, "\n%U%U",
-             format_white_space, indent, format_vnet_buffer, &t->buffer);
+  s = format (s, "\n%U%U", format_white_space, indent,
+             format_vnet_buffer_no_chain, &t->buffer);
 
   s = format (s, "\n%U", format_white_space, indent);
 
@@ -1383,18 +1454,17 @@ format_pg_input_trace (u8 * s, va_list * va)
   return s;
 }
 
-static void
+static int
 pg_input_trace (pg_main_t * pg,
-               vlib_node_runtime_t * node,
-               pg_stream_t * s, u32 * buffers, u32 n_buffers)
+               vlib_node_runtime_t * node, u32 stream_index, u32 next_index,
+               u32 * buffers, const u32 n_buffers, const u32 n_trace)
 {
   vlib_main_t *vm = vlib_get_main ();
-  u32 *b, n_left, stream_index, next_index;
+  u32 *b, n_left;
+  u32 n_trace0 = 0, n_trace1 = 0;
 
-  n_left = n_buffers;
+  n_left = clib_min (n_buffers, n_trace);
   b = buffers;
-  stream_index = s - pg->streams;
-  next_index = s->next_index;
 
   while (n_left >= 2)
     {
@@ -1410,8 +1480,10 @@ pg_input_trace (pg_main_t * pg,
       b0 = vlib_get_buffer (vm, bi0);
       b1 = vlib_get_buffer (vm, bi1);
 
-      vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1);
-      vlib_trace_buffer (vm, node, next_index, b1, /* follow_chain */ 1);
+      n_trace0 +=
+       vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1);
+      n_trace1 +=
+       vlib_trace_buffer (vm, node, next_index, b1, /* follow_chain */ 1);
 
       t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
       t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
@@ -1448,7 +1520,8 @@ pg_input_trace (pg_main_t * pg,
 
       b0 = vlib_get_buffer (vm, bi0);
 
-      vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1);
+      n_trace0 +=
+       vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1);
       t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
 
       t0->stream_index = stream_index;
@@ -1459,6 +1532,93 @@ pg_input_trace (pg_main_t * pg,
       clib_memcpy_fast (t0->buffer.pre_data, b0->data,
                        sizeof (t0->buffer.pre_data));
     }
+
+  return n_trace - n_trace0 - n_trace1;
+}
+
+static_always_inline void
+fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
+                          u32 buffer_oflags, int gso_enabled, u32 gso_size)
+{
+  for (int i = 0; i < n_buffers; i++)
+    {
+      vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[i]);
+      u8 l4_proto = 0;
+      vnet_buffer_oflags_t oflags = 0;
+
+      ethernet_header_t *eh =
+       (ethernet_header_t *) vlib_buffer_get_current (b0);
+      u16 ethertype = clib_net_to_host_u16 (eh->type);
+      u16 l2hdr_sz = sizeof (ethernet_header_t);
+
+      if (ethernet_frame_is_tagged (ethertype))
+       {
+         ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+         ethertype = clib_net_to_host_u16 (vlan->type);
+         l2hdr_sz += sizeof (*vlan);
+         if (ethertype == ETHERNET_TYPE_VLAN)
+           {
+             vlan++;
+             ethertype = clib_net_to_host_u16 (vlan->type);
+             l2hdr_sz += sizeof (*vlan);
+           }
+       }
+
+      vnet_buffer (b0)->l2_hdr_offset = 0;
+      vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+
+      if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+       {
+         ip4_header_t *ip4 =
+           (ip4_header_t *) (vlib_buffer_get_current (b0) + l2hdr_sz);
+         vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+         l4_proto = ip4->protocol;
+         b0->flags |=
+           (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+            VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+            VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+         if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+           oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+       }
+      else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+       {
+         ip6_header_t *ip6 =
+           (ip6_header_t *) (vlib_buffer_get_current (b0) + l2hdr_sz);
+         vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
+         /* FIXME IPv6 EH traversal */
+         l4_proto = ip6->protocol;
+         b0->flags |=
+           (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+            VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+            VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+       }
+
+      if (l4_proto == IP_PROTOCOL_TCP)
+       {
+         if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+           oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+
+         /* only set GSO flag for chained buffers */
+         if (gso_enabled && (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+           {
+             b0->flags |= VNET_BUFFER_F_GSO;
+             tcp_header_t *tcp =
+               (tcp_header_t *) (vlib_buffer_get_current (b0) +
+                                 vnet_buffer (b0)->l4_hdr_offset);
+             vnet_buffer2 (b0)->gso_l4_hdr_sz = tcp_header_bytes (tcp);
+             vnet_buffer2 (b0)->gso_size = gso_size;
+           }
+       }
+      else if (l4_proto == IP_PROTOCOL_UDP)
+       {
+         if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+           oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+       }
+
+      if (oflags)
+       vnet_buffer_offload_flags_set (b0, oflags);
+    }
 }
 
 static uword
@@ -1476,8 +1636,11 @@ pg_generate_packets (vlib_node_runtime_t * node,
   u8 feature_arc_index = fm->device_input_feature_arc_index;
   cm = &fm->feature_config_mains[feature_arc_index];
   u32 current_config_index = ~(u32) 0;
+  pg_interface_t *pi;
   int i;
 
+  pi = pool_elt_at_index (
+    pg->interfaces, pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
   bi0 = s->buffer_indices;
 
   n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate);
@@ -1493,6 +1656,13 @@ pg_generate_packets (vlib_node_runtime_t * node,
                            &next_index, 0);
     }
 
+  if (PREDICT_FALSE (pi->coalesce_enabled))
+    {
+      vnet_hw_if_tx_queue_t txq = { 0 };
+      vnet_gro_flow_table_schedule_node_on_dispatcher (vm, &txq,
+                                                      pi->flow_table);
+    }
+
   while (n_packets_to_generate > 0)
     {
       u32 *head, *start, *end;
@@ -1502,16 +1672,15 @@ pg_generate_packets (vlib_node_runtime_t * node,
          vlib_next_frame_t *nf;
          vlib_frame_t *f;
          ethernet_input_frame_t *ef;
-         pg_interface_t *pi;
          vlib_get_new_next_frame (vm, node, next_index, to_next, n_left);
          nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
-         f = vlib_get_frame (vm, nf->frame_index);
+         f = vlib_get_frame (vm, nf->frame);
          f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
 
          ef = vlib_frame_scalar_args (f);
-         pi = pool_elt_at_index (pg->interfaces, s->pg_if_index);
          ef->sw_if_index = pi->sw_if_index;
          ef->hw_if_index = pi->hw_if_index;
+         vlib_frame_no_append (f);
        }
       else
        vlib_get_next_frame (vm, node, next_index, to_next, n_left);
@@ -1525,16 +1694,23 @@ pg_generate_packets (vlib_node_runtime_t * node,
       head = clib_fifo_head (bi0->buffer_fifo);
 
       if (head + n_this_frame <= end)
-       vlib_copy_buffers (to_next, head, n_this_frame);
+       vlib_buffer_copy_indices (to_next, head, n_this_frame);
       else
        {
          u32 n = end - head;
-         vlib_copy_buffers (to_next + 0, head, n);
-         vlib_copy_buffers (to_next + n, start, n_this_frame - n);
+         vlib_buffer_copy_indices (to_next + 0, head, n);
+         vlib_buffer_copy_indices (to_next + n, start, n_this_frame - n);
        }
 
-      vec_foreach (bi, s->buffer_indices)
-       clib_fifo_advance_head (bi->buffer_fifo, n_this_frame);
+      if (s->replay_packet_templates == 0)
+       {
+         vec_foreach (bi, s->buffer_indices)
+           clib_fifo_advance_head (bi->buffer_fifo, n_this_frame);
+       }
+      else
+       {
+         clib_fifo_advance_head (bi0->buffer_fifo, n_this_frame);
+       }
 
       if (current_config_index != ~(u32) 0)
        for (i = 0; i < n_this_frame; i++)
@@ -1545,16 +1721,36 @@ pg_generate_packets (vlib_node_runtime_t * node,
            vnet_buffer (b)->feature_arc_index = feature_arc_index;
          }
 
+      if (pi->gso_enabled || (s->buffer_flags & VNET_BUFFER_F_OFFLOAD))
+       {
+         fill_buffer_offload_flags (vm, to_next, n_this_frame,
+                                    s->buffer_oflags, pi->gso_enabled,
+                                    pi->gso_size);
+       }
+
       n_trace = vlib_get_trace_count (vm, node);
-      if (n_trace > 0)
+      if (PREDICT_FALSE (n_trace > 0))
        {
-         u32 n = clib_min (n_trace, n_this_frame);
-         pg_input_trace (pg, node, s, to_next, n);
-         vlib_set_trace_count (vm, node, n_trace - n);
+         n_trace =
+           pg_input_trace (pg, node, s - pg->streams, next_index, to_next,
+                           n_this_frame, n_trace);
+         vlib_set_trace_count (vm, node, n_trace);
        }
       n_packets_to_generate -= n_this_frame;
       n_packets_generated += n_this_frame;
       n_left -= n_this_frame;
+      if (CLIB_DEBUG > 0)
+       {
+         int i;
+         vlib_buffer_t *b;
+
+         for (i = 0; i < n_this_frame; i++)
+           {
+             b = vlib_get_buffer (vm, to_next[i]);
+             ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0 ||
+                     b->current_length >= VLIB_BUFFER_MIN_CHAIN_SEG_SIZE);
+           }
+       }
       vlib_put_next_frame (vm, node, next_index, n_left);
     }
 
@@ -1598,8 +1794,8 @@ pg_input_stream (vlib_node_runtime_t * node, pg_main_t * pg, pg_stream_t * s)
     n_packets = s->n_packets_limit - s->n_packets_generated;
 
   /* Generate up to one frame's worth of packets. */
-  if (n_packets > VLIB_FRAME_SIZE)
-    n_packets = VLIB_FRAME_SIZE;
+  if (n_packets > s->n_max_frame)
+    n_packets = s->n_max_frame;
 
   if (n_packets > 0)
     n_packets = pg_generate_packets (node, pg, s, n_packets);
@@ -1620,19 +1816,17 @@ pg_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   if (vlib_num_workers ())
     worker_index = vlib_get_current_worker_index ();
 
-  /* *INDENT-OFF* */
-  clib_bitmap_foreach (i, pg->enabled_streams[worker_index], ({
+  clib_bitmap_foreach (i, pg->enabled_streams[worker_index])  {
     pg_stream_t *s = vec_elt_at_index (pg->streams, i);
     n_packets += pg_input_stream (node, pg, s);
-  }));
-  /* *INDENT-ON* */
+  }
 
   return n_packets;
 }
 
-/* *INDENT-OFF* */
 VLIB_REGISTER_NODE (pg_input_node) = {
   .function = pg_input,
+  .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
   .name = "pg-input",
   .sibling_of = "device-input",
   .type = VLIB_NODE_TYPE_INPUT,
@@ -1642,7 +1836,120 @@ VLIB_REGISTER_NODE (pg_input_node) = {
   /* Input node will be left disabled until a stream is active. */
   .state = VLIB_NODE_STATE_DISABLED,
 };
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
+                                   vlib_node_runtime_t * node,
+                                   vlib_frame_t * frame)
+{
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  u16 nexts[VLIB_FRAME_SIZE], *next;
+  pg_main_t *pg = &pg_main;
+  u32 n_left, *from;
+
+  from = vlib_frame_vector_args (frame);
+  n_left = frame->n_vectors;
+  next = nexts;
+
+  clib_memset_u16 (next, 0, VLIB_FRAME_SIZE);
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+
+  while (n_left)
+    {
+      const ethernet_header_t *eth;
+      pg_interface_t *pi;
+      mac_address_t in;
+
+      pi = pool_elt_at_index (
+       pg->interfaces,
+       pg->if_index_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
+      eth = vlib_buffer_get_current (b[0]);
+
+      mac_address_from_bytes (&in, eth->dst_address);
+
+      if (PREDICT_FALSE (ethernet_address_cast (in.bytes)))
+       {
+         mac_address_t *allowed;
+
+         if (0 != vec_len (pi->allowed_mcast_macs))
+           {
+             vec_foreach (allowed, pi->allowed_mcast_macs)
+             {
+               if (0 != mac_address_cmp (allowed, &in))
+                 break;
+             }
+
+             if (vec_is_member (allowed, pi->allowed_mcast_macs))
+               vnet_feature_next_u16 (&next[0], b[0]);
+           }
+       }
+
+      b += 1;
+      next += 1;
+      n_left -= 1;
+    }
+
+  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+  return (frame->n_vectors);
+}
+
+VLIB_REGISTER_NODE (pg_input_mac_filter) = {
+  .name = "pg-input-mac-filter",
+  .vector_size = sizeof (u32),
+  .format_trace = format_pg_input_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VNET_FEATURE_INIT (pg_input_mac_filter_feat, static) = {
+  .arc_name = "device-input",
+  .node_name = "pg-input-mac-filter",
+};
+
+static clib_error_t *
+pg_input_mac_filter_cfg (vlib_main_t * vm,
+                        unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  u32 sw_if_index = ~0;
+  int is_enable = 1;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U",
+                   unformat_vnet_sw_interface,
+                   vnet_get_main (), &sw_if_index))
+       ;
+      else if (unformat (line_input, "%U",
+                        unformat_vlib_enable_disable, &is_enable))
+       ;
+      else
+       return clib_error_create ("unknown input `%U'",
+                                 format_unformat_error, line_input);
+    }
+  unformat_free (line_input);
+
+  if (~0 == sw_if_index)
+    return clib_error_create ("specify interface");
+
+  vnet_feature_enable_disable ("device-input",
+                              "pg-input-mac-filter",
+                              sw_if_index, is_enable, 0, 0);
+
+  return NULL;
+}
+
+VLIB_CLI_COMMAND (enable_streams_cli, static) = {
+  .path = "packet-generator mac-filter",
+  .short_help = "packet-generator mac-filter <INTERFACE> <on|off>",
+  .function = pg_input_mac_filter_cfg,
+};
+
 
 /*
  * fd.io coding-style-patch-verification: ON