gso: packet coalesce library 09/28209/4
authorMohsin Kazmi <sykazmi@cisco.com>
Tue, 11 Aug 2020 13:00:44 +0000 (15:00 +0200)
committerBenoît Ganne <bganne@cisco.com>
Fri, 14 Aug 2020 09:38:58 +0000 (09:38 +0000)
Type: feature

Change-Id: Ia19d3611e596d9ec47509889b34e8fe793a0ccc3
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
15 files changed:
src/vat/api_format.c
src/vnet/CMakeLists.txt
src/vnet/gso/FEATURE.yaml
src/vnet/gso/gro.h [new file with mode: 0644]
src/vnet/gso/gro_func.h [new file with mode: 0644]
src/vnet/pg/cli.c
src/vnet/pg/input.c
src/vnet/pg/output.c
src/vnet/pg/pg.api
src/vnet/pg/pg.h
src/vnet/pg/pg_api.c
src/vnet/pg/stream.c
src/vpp/api/custom_dump.c
test/test_gro.py [new file with mode: 0644]
test/vpp_pg_interface.py

index cc0f5be..950096d 100644 (file)
@@ -5149,6 +5149,7 @@ _(flow_classify_set_interface_reply)                    \
 _(sw_interface_span_enable_disable_reply)               \
 _(pg_capture_reply)                                     \
 _(pg_enable_disable_reply)                              \
+_(pg_interface_enable_disable_coalesce_reply)           \
 _(ip_source_and_port_range_check_add_del_reply)         \
 _(ip_source_and_port_range_check_interface_add_del_reply)\
 _(delete_subif_reply)                                   \
@@ -5438,6 +5439,7 @@ _(GET_NEXT_INDEX_REPLY, get_next_index_reply)                           \
 _(PG_CREATE_INTERFACE_REPLY, pg_create_interface_reply)                 \
 _(PG_CAPTURE_REPLY, pg_capture_reply)                                   \
 _(PG_ENABLE_DISABLE_REPLY, pg_enable_disable_reply)                     \
+_(PG_INTERFACE_ENABLE_DISABLE_COALESCE_REPLY, pg_interface_enable_disable_coalesce_reply) \
 _(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL_REPLY,                         \
  ip_source_and_port_range_check_add_del_reply)                          \
 _(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL_REPLY,               \
@@ -18620,6 +18622,44 @@ api_pg_enable_disable (vat_main_t * vam)
   return ret;
 }
 
+int
+api_pg_interface_enable_disable_coalesce (vat_main_t * vam)
+{
+  unformat_input_t *input = vam->input;
+  vl_api_pg_interface_enable_disable_coalesce_t *mp;
+
+  u32 sw_if_index = ~0;
+  u8 enable = 1;
+  int ret;
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+       ;
+      else if (unformat (input, "sw_if_index %d", &sw_if_index))
+       ;
+      else if (unformat (input, "disable"))
+       enable = 0;
+      else
+       break;
+    }
+
+  if (sw_if_index == ~0)
+    {
+      errmsg ("Interface required but not specified");
+      return -99;
+    }
+
+  /* Construct the API message */
+  M (PG_INTERFACE_ENABLE_DISABLE_COALESCE, mp);
+  mp->context = 0;
+  mp->coalesce_enabled = enable;
+  mp->sw_if_index = htonl (sw_if_index);
+
+  S (mp);
+  W (ret);
+  return ret;
+}
+
 int
 api_ip_source_and_port_range_check_add_del (vat_main_t * vam)
 {
@@ -20955,6 +20995,7 @@ _(get_next_index, "node-name <node-name> next-node-name <node-name>")   \
 _(pg_create_interface, "if_id <nn> [gso-enabled gso-size <size>]")      \
 _(pg_capture, "if_id <nnn> pcap <file_name> count <nnn> [disable]")     \
 _(pg_enable_disable, "[stream <id>] disable")                           \
+_(pg_interface_enable_disable_coalesce, "<intf> | sw_if_index <nn> enable | disable")  \
 _(ip_source_and_port_range_check_add_del,                               \
   "<ip-addr>/<mask> range <nn>-<nn> vrf <id>")                          \
 _(ip_source_and_port_range_check_interface_add_del,                     \
index 7757301..a7f97be 100644 (file)
@@ -1001,6 +1001,8 @@ list(APPEND VNET_SOURCES
 )
 
 list(APPEND VNET_HEADERS
+  gso/gro.h
+  gso/gro_func.h
   gso/hdr_offset_parser.h
   gso/gso.h
 )
index 79b506d..d3db0cc 100644 (file)
@@ -8,6 +8,8 @@ features:
   - GSO for IP-IP tunnel
   - GSO for IPSec tunnel  
   - Provide inline function to get header offsets
+  - Basic GRO support
+  - Implements flow table support
 description: "Generic Segmentation Offload"
 missing:
   - Thorough Testing, GRE, Geneve
diff --git a/src/vnet/gso/gro.h b/src/vnet/gso/gro.h
new file mode 100644 (file)
index 0000000..bfa5920
--- /dev/null
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_gro_h
+#define included_gro_h
+
+#include <vlib/vlib.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip46_address.h>
+
+#define GRO_FLOW_TABLE_MAX_SIZE 16
+#define GRO_FLOW_TABLE_FLUSH 1e-5
+#define GRO_FLOW_N_BUFFERS 64
+#define GRO_FLOW_TIMEOUT 1e-5  /* 10 micro-seconds */
+#define GRO_TO_VECTOR_SIZE(X)   (X + GRO_FLOW_TABLE_MAX_SIZE)
+
+typedef union
+{
+  struct
+  {
+    u32 sw_if_index[VLIB_N_RX_TX];
+    ip46_address_t src_address;
+    ip46_address_t dst_address;
+    u16 src_port;
+    u16 dst_port;
+  };
+
+  u64 flow_data[5];
+  u32 flow_data_u32;
+} gro_flow_key_t;
+
+typedef struct
+{
+  gro_flow_key_t flow_key;
+  f64 next_timeout_ts;
+  u32 last_ack_number;
+  u32 buffer_index;
+  u16 n_buffers;
+} gro_flow_t;
+
+typedef struct
+{
+  f64 timeout_ts;
+  u64 total_vectors;
+  u32 n_vectors;
+  u32 node_index;
+  u8 is_enable;
+  u8 is_l2;
+  u8 flow_table_size;
+  gro_flow_t gro_flow[GRO_FLOW_TABLE_MAX_SIZE];
+} gro_flow_table_t;
+
+static_always_inline void
+gro_flow_set_flow_key (gro_flow_t * to, gro_flow_key_t * from)
+{
+  to->flow_key.flow_data[0] = from->flow_data[0];
+  to->flow_key.flow_data[1] = from->flow_data[1];
+  to->flow_key.flow_data[2] = from->flow_data[2];
+  to->flow_key.flow_data[3] = from->flow_data[3];
+  to->flow_key.flow_data[4] = from->flow_data[4];
+  to->flow_key.flow_data_u32 = from->flow_data_u32;
+}
+
+static_always_inline u8
+gro_flow_is_equal (gro_flow_key_t * first, gro_flow_key_t * second)
+{
+  if (first->flow_data[0] == second->flow_data[0] &&
+      first->flow_data[1] == second->flow_data[1] &&
+      first->flow_data[2] == second->flow_data[2] &&
+      first->flow_data[3] == second->flow_data[3] &&
+      first->flow_data[4] == second->flow_data[4] &&
+      first->flow_data_u32 == second->flow_data_u32)
+    return 1;
+
+  return 0;
+}
+
+/**
+ * timeout_expire is in between 3 to 10 microseconds
+ * 3e-6 1e-5
+ */
+static_always_inline void
+gro_flow_set_timeout (vlib_main_t * vm, gro_flow_t * gro_flow,
+                     f64 timeout_expire)
+{
+  gro_flow->next_timeout_ts = vlib_time_now (vm) + timeout_expire;
+}
+
+static_always_inline u8
+gro_flow_is_timeout (vlib_main_t * vm, gro_flow_t * gro_flow)
+{
+  if (gro_flow->next_timeout_ts < vlib_time_now (vm))
+    return 1;
+  return 0;
+}
+
+static_always_inline void
+gro_flow_store_packet (gro_flow_t * gro_flow, u32 bi0)
+{
+  if (gro_flow->n_buffers == 0)
+    {
+      gro_flow->buffer_index = bi0;
+    }
+  gro_flow->n_buffers++;
+}
+
+static_always_inline u32
+gro_flow_table_init (gro_flow_table_t ** flow_table, u8 is_l2, u32 node_index)
+{
+  if (*flow_table)
+    return 0;
+
+  gro_flow_table_t *flow_table_temp = 0;
+  flow_table_temp =
+    (gro_flow_table_t *) clib_mem_alloc (sizeof (gro_flow_table_t));
+  if (!flow_table_temp)
+    return 0;
+  clib_memset (flow_table_temp, 0, sizeof (gro_flow_table_t));
+  flow_table_temp->node_index = node_index;
+  flow_table_temp->is_enable = 1;
+  flow_table_temp->is_l2 = is_l2;
+  *flow_table = flow_table_temp;
+  return 1;
+}
+
+static_always_inline void
+gro_flow_table_set_timeout (vlib_main_t * vm, gro_flow_table_t * flow_table,
+                           f64 timeout_expire)
+{
+  if (flow_table)
+    flow_table->timeout_ts = vlib_time_now (vm) + timeout_expire;
+}
+
+static_always_inline u8
+gro_flow_table_is_timeout (vlib_main_t * vm, gro_flow_table_t * flow_table)
+{
+  if (flow_table && (flow_table->timeout_ts < vlib_time_now (vm)))
+    return 1;
+  return 0;
+}
+
+static_always_inline u8
+gro_flow_table_is_enable (gro_flow_table_t * flow_table)
+{
+  if (flow_table)
+    return flow_table->is_enable;
+
+  return 0;
+}
+
+static_always_inline void
+gro_flow_table_set_is_enable (gro_flow_table_t * flow_table, u8 is_enable)
+{
+  if (flow_table)
+    {
+      if (is_enable)
+       {
+         flow_table->is_enable = 1;
+       }
+      else
+       {
+         flow_table->is_enable = 0;
+       }
+    }
+}
+
+static_always_inline void
+gro_flow_table_free (gro_flow_table_t * flow_table)
+{
+  if (flow_table)
+    clib_mem_free (flow_table);
+}
+
+static_always_inline void
+gro_flow_table_set_node_index (gro_flow_table_t * flow_table, u32 node_index)
+{
+  if (flow_table)
+    flow_table->node_index = node_index;
+}
+
+static_always_inline gro_flow_t *
+gro_flow_table_new_flow (gro_flow_table_t * flow_table)
+{
+  if (PREDICT_TRUE (flow_table->flow_table_size < GRO_FLOW_TABLE_MAX_SIZE))
+    {
+      gro_flow_t *gro_flow;
+      u32 i = 0;
+      while (i < GRO_FLOW_TABLE_MAX_SIZE)
+       {
+         gro_flow = &flow_table->gro_flow[i];
+         if (gro_flow->n_buffers == 0)
+           {
+             flow_table->flow_table_size++;
+             return gro_flow;
+           }
+         i++;
+       }
+    }
+
+  return (0);
+}
+
+static_always_inline gro_flow_t *
+gro_flow_table_get_flow (gro_flow_table_t * flow_table,
+                        gro_flow_key_t * flow_key)
+{
+  gro_flow_t *gro_flow = 0;
+  u32 i = 0;
+  while (i < GRO_FLOW_TABLE_MAX_SIZE)
+    {
+      gro_flow = &flow_table->gro_flow[i];
+      if (gro_flow_is_equal (flow_key, &gro_flow->flow_key))
+       return gro_flow;
+      i++;
+    }
+  return (0);
+}
+
+static_always_inline gro_flow_t *
+gro_flow_table_find_or_add_flow (gro_flow_table_t * flow_table,
+                                gro_flow_key_t * flow_key)
+{
+  gro_flow_t *gro_flow = 0;
+
+  gro_flow = gro_flow_table_get_flow (flow_table, flow_key);
+  if (gro_flow)
+    return gro_flow;
+
+  gro_flow = gro_flow_table_new_flow (flow_table);
+
+  if (gro_flow)
+    {
+      gro_flow_set_flow_key (gro_flow, flow_key);
+      return gro_flow;
+    }
+
+  return (0);
+}
+
+static_always_inline void
+gro_flow_table_reset_flow (gro_flow_table_t * flow_table,
+                          gro_flow_t * gro_flow)
+{
+  if (PREDICT_TRUE (flow_table->flow_table_size > 0))
+    {
+      clib_memset (gro_flow, 0, sizeof (gro_flow_t));
+      flow_table->flow_table_size--;
+    }
+}
+
+static_always_inline u8 *
+gro_flow_table_format (u8 * s, va_list * args)
+{
+  gro_flow_table_t *flow_table = va_arg (*args, gro_flow_table_t *);
+
+  s =
+    format (s,
+           "flow-table: size %u gro-total-vectors %lu gro-n-vectors %u",
+           flow_table->flow_table_size, flow_table->total_vectors,
+           flow_table->n_vectors);
+  if (flow_table->n_vectors)
+    {
+      double average_rate =
+       (double) flow_table->total_vectors / (double) flow_table->n_vectors;
+      s = format (s, " gro-average-rate %.2f", average_rate);
+    }
+  else
+    s = format (s, " gro-average-rate 0.00");
+
+  return s;
+}
+#endif /* included_gro_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/gso/gro_func.h b/src/vnet/gso/gro_func.h
new file mode 100644 (file)
index 0000000..a410a65
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_gro_func_h
+#define included_gro_func_h
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/gso/gro.h>
+#include <vnet/gso/hdr_offset_parser.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp.h>
+#include <vnet/vnet.h>
+
+static_always_inline u8
+gro_is_bad_packet (vlib_buffer_t * b, u8 flags, i16 l234_sz)
+{
+  if (((b->current_length - l234_sz) <= 0) || ((flags &= ~TCP_FLAG_ACK) != 0))
+    return 1;
+  return 0;
+}
+
+static_always_inline void
+gro_get_ip4_flow_from_packet (u32 * sw_if_index,
+                             ip4_header_t * ip4, tcp_header_t * tcp,
+                             gro_flow_key_t * flow_key, int is_l2)
+{
+  flow_key->sw_if_index[VLIB_RX] = sw_if_index[VLIB_RX];
+  flow_key->sw_if_index[VLIB_TX] = sw_if_index[VLIB_TX];
+  ip46_address_set_ip4 (&flow_key->src_address, &ip4->src_address);
+  ip46_address_set_ip4 (&flow_key->dst_address, &ip4->dst_address);
+  flow_key->src_port = tcp->src_port;
+  flow_key->dst_port = tcp->dst_port;
+}
+
+static_always_inline void
+gro_get_ip6_flow_from_packet (u32 * sw_if_index,
+                             ip6_header_t * ip6, tcp_header_t * tcp,
+                             gro_flow_key_t * flow_key, int is_l2)
+{
+  flow_key->sw_if_index[VLIB_RX] = sw_if_index[VLIB_RX];
+  flow_key->sw_if_index[VLIB_TX] = sw_if_index[VLIB_TX];
+  ip46_address_set_ip6 (&flow_key->src_address, &ip6->src_address);
+  ip46_address_set_ip6 (&flow_key->dst_address, &ip6->dst_address);
+  flow_key->src_port = tcp->src_port;
+  flow_key->dst_port = tcp->dst_port;
+}
+
+static_always_inline u32
+gro_is_ip4_or_ip6_packet (vlib_buffer_t * b0, int is_l2)
+{
+  if (b0->flags & VNET_BUFFER_F_IS_IP4)
+    return VNET_BUFFER_F_IS_IP4;
+  if (b0->flags & VNET_BUFFER_F_IS_IP6)
+    return VNET_BUFFER_F_IS_IP6;
+  if (is_l2)
+    {
+      ethernet_header_t *eh =
+       (ethernet_header_t *) vlib_buffer_get_current (b0);
+      u16 ethertype = clib_net_to_host_u16 (eh->type);
+
+      if (ethernet_frame_is_tagged (ethertype))
+       {
+         ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+         ethertype = clib_net_to_host_u16 (vlan->type);
+         if (ethertype == ETHERNET_TYPE_VLAN)
+           {
+             vlan++;
+             ethertype = clib_net_to_host_u16 (vlan->type);
+           }
+       }
+      if (ethertype == ETHERNET_TYPE_IP4)
+       return VNET_BUFFER_F_IS_IP4;
+      if (ethertype == ETHERNET_TYPE_IP6)
+       return VNET_BUFFER_F_IS_IP6;
+    }
+  else
+    {
+      if ((((u8 *) vlib_buffer_get_current (b0))[0] & 0xf0) == 0x40)
+       return VNET_BUFFER_F_IS_IP4;
+      if ((((u8 *) vlib_buffer_get_current (b0))[0] & 0xf0) == 0x60)
+       return VNET_BUFFER_F_IS_IP6;
+    }
+
+  return 0;
+}
+
+typedef enum
+{
+  GRO_PACKET_ACTION_NONE = 0,
+  GRO_PACKET_ACTION_ENQUEUE = 1,
+  GRO_PACKET_ACTION_FLUSH = 2,
+} gro_packet_action_t;
+
+static_always_inline gro_packet_action_t
+gro_tcp_sequence_check (tcp_header_t * tcp0, tcp_header_t * tcp1,
+                       u32 payload_len0)
+{
+  u32 next_tcp_seq0 = clib_net_to_host_u32 (tcp0->seq_number);
+  u32 next_tcp_seq1 = clib_net_to_host_u32 (tcp1->seq_number);
+
+  /* next packet, enqueue */
+  if (PREDICT_TRUE (next_tcp_seq0 + payload_len0 == next_tcp_seq1))
+    return GRO_PACKET_ACTION_ENQUEUE;
+  /* flush all packets */
+  else
+    return GRO_PACKET_ACTION_FLUSH;
+}
+
+static_always_inline void
+gro_merge_buffers (vlib_main_t * vm, vlib_buffer_t * b0,
+                  vlib_buffer_t * b1, u32 bi1, u32 payload_len1,
+                  u16 l234_sz1)
+{
+  vlib_buffer_t *pb = b0;
+
+  if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
+    b0->total_length_not_including_first_buffer = 0;
+
+  while (pb->flags & VLIB_BUFFER_NEXT_PRESENT)
+    pb = vlib_get_buffer (vm, pb->next_buffer);
+
+  vlib_buffer_advance (b1, l234_sz1);
+  pb->flags |= VLIB_BUFFER_NEXT_PRESENT;
+  pb->next_buffer = bi1;
+  b0->total_length_not_including_first_buffer += payload_len1;
+  b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+}
+
+static_always_inline u32
+gro_get_packet_data (vlib_main_t * vm, vlib_buffer_t * b0,
+                    generic_header_offset_t * gho0,
+                    gro_flow_key_t * flow_key0, int is_l2)
+{
+  ip4_header_t *ip4_0 = 0;
+  ip6_header_t *ip6_0 = 0;
+  tcp_header_t *tcp0 = 0;
+  u32 pkt_len0 = 0;
+  u16 l234_sz0 = 0;
+  u32 sw_if_index0[VLIB_N_RX_TX] = { ~0 };
+
+  u32 is_ip0 = gro_is_ip4_or_ip6_packet (b0, is_l2);
+
+  if (is_ip0 & VNET_BUFFER_F_IS_IP4)
+    vnet_generic_header_offset_parser (b0, gho0, is_l2, 1 /* is_ip4 */ ,
+                                      0 /* is_ip6 */ );
+  else if (is_ip0 & VNET_BUFFER_F_IS_IP6)
+    vnet_generic_header_offset_parser (b0, gho0, is_l2, 0 /* is_ip4 */ ,
+                                      1 /* is_ip6 */ );
+  else
+    return 0;
+
+  if (PREDICT_FALSE ((gho0->gho_flags & GHO_F_TCP) == 0))
+    return 0;
+
+  ip4_0 =
+    (ip4_header_t *) (vlib_buffer_get_current (b0) + gho0->l3_hdr_offset);
+  ip6_0 =
+    (ip6_header_t *) (vlib_buffer_get_current (b0) + gho0->l3_hdr_offset);
+  tcp0 =
+    (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0->l4_hdr_offset);
+
+  l234_sz0 = gho0->hdr_sz;
+  if (PREDICT_FALSE (gro_is_bad_packet (b0, tcp0->flags, l234_sz0)))
+    return 0;
+
+  sw_if_index0[VLIB_RX] = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+  sw_if_index0[VLIB_TX] = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+  if (gho0->gho_flags & GHO_F_IP4)
+    {
+      gro_get_ip4_flow_from_packet (sw_if_index0, ip4_0, tcp0, flow_key0,
+                                   is_l2);
+    }
+  else if (gho0->gho_flags & GHO_F_IP6)
+    {
+      gro_get_ip6_flow_from_packet (sw_if_index0, ip6_0, tcp0, flow_key0,
+                                   is_l2);
+    }
+  else
+    return 0;
+
+  pkt_len0 = vlib_buffer_length_in_chain (vm, b0);
+  if (PREDICT_FALSE (pkt_len0 >= TCP_MAX_GSO_SZ))
+    return 0;
+
+  return pkt_len0;
+}
+
+static_always_inline u32
+gro_coalesce_buffers (vlib_main_t * vm, vlib_buffer_t * b0,
+                     vlib_buffer_t * b1, u32 bi1, int is_l2)
+{
+  generic_header_offset_t gho0 = { 0 };
+  generic_header_offset_t gho1 = { 0 };
+  gro_flow_key_t flow_key0, flow_key1;
+  ip4_header_t *ip4_0, *ip4_1;
+  ip6_header_t *ip6_0, *ip6_1;
+  tcp_header_t *tcp0, *tcp1;
+  u16 l234_sz0, l234_sz1;
+  u32 pkt_len0, pkt_len1, payload_len0, payload_len1;
+  u32 sw_if_index0[VLIB_N_RX_TX] = { ~0 };
+  u32 sw_if_index1[VLIB_N_RX_TX] = { ~0 };
+
+  u32 is_ip0 = gro_is_ip4_or_ip6_packet (b0, is_l2);
+  u32 is_ip1 = gro_is_ip4_or_ip6_packet (b1, is_l2);
+
+  if (is_ip0 & VNET_BUFFER_F_IS_IP4)
+    vnet_generic_header_offset_parser (b0, &gho0, is_l2, 1 /* is_ip4 */ ,
+                                      0 /* is_ip6 */ );
+  else if (is_ip0 & VNET_BUFFER_F_IS_IP6)
+    vnet_generic_header_offset_parser (b0, &gho0, is_l2, 0 /* is_ip4 */ ,
+                                      1 /* is_ip6 */ );
+  else
+    return 0;
+
+  if (is_ip1 & VNET_BUFFER_F_IS_IP4)
+    vnet_generic_header_offset_parser (b1, &gho1, is_l2, 1 /* is_ip4 */ ,
+                                      0 /* is_ip6 */ );
+  else if (is_ip1 & VNET_BUFFER_F_IS_IP6)
+    vnet_generic_header_offset_parser (b1, &gho1, is_l2, 0 /* is_ip4 */ ,
+                                      1 /* is_ip6 */ );
+  else
+    return 0;
+
+  pkt_len0 = vlib_buffer_length_in_chain (vm, b0);
+  pkt_len1 = vlib_buffer_length_in_chain (vm, b1);
+
+  if (((gho0.gho_flags & GHO_F_TCP) == 0)
+      || ((gho1.gho_flags & GHO_F_TCP) == 0))
+    return 0;
+
+  ip4_0 =
+    (ip4_header_t *) (vlib_buffer_get_current (b0) + gho0.l3_hdr_offset);
+  ip4_1 =
+    (ip4_header_t *) (vlib_buffer_get_current (b1) + gho1.l3_hdr_offset);
+  ip6_0 =
+    (ip6_header_t *) (vlib_buffer_get_current (b0) + gho0.l3_hdr_offset);
+  ip6_1 =
+    (ip6_header_t *) (vlib_buffer_get_current (b1) + gho1.l3_hdr_offset);
+
+  tcp0 = (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+  tcp1 = (tcp_header_t *) (vlib_buffer_get_current (b1) + gho1.l4_hdr_offset);
+
+  l234_sz0 = gho0.hdr_sz;
+  l234_sz1 = gho1.hdr_sz;
+
+  if (gro_is_bad_packet (b0, tcp0->flags, l234_sz0)
+      || gro_is_bad_packet (b1, tcp1->flags, l234_sz1))
+    return 0;
+
+  sw_if_index0[VLIB_RX] = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+  sw_if_index0[VLIB_TX] = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+  sw_if_index1[VLIB_RX] = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+  sw_if_index1[VLIB_TX] = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+  if ((gho0.gho_flags & GHO_F_IP4) && (gho1.gho_flags & GHO_F_IP4))
+    {
+      gro_get_ip4_flow_from_packet (sw_if_index0, ip4_0, tcp0, &flow_key0,
+                                   is_l2);
+      gro_get_ip4_flow_from_packet (sw_if_index1, ip4_1, tcp1, &flow_key1,
+                                   is_l2);
+    }
+  else if ((gho0.gho_flags & GHO_F_IP6) && (gho1.gho_flags & GHO_F_IP6))
+    {
+      gro_get_ip6_flow_from_packet (sw_if_index0, ip6_0, tcp0, &flow_key0,
+                                   is_l2);
+      gro_get_ip6_flow_from_packet (sw_if_index1, ip6_1, tcp1, &flow_key1,
+                                   is_l2);
+    }
+  else
+    return 0;
+
+  if (gro_flow_is_equal (&flow_key0, &flow_key1) == 0)
+    return 0;
+
+  payload_len0 = pkt_len0 - l234_sz0;
+  payload_len1 = pkt_len1 - l234_sz1;
+
+  if (pkt_len0 >= TCP_MAX_GSO_SZ || pkt_len1 >= TCP_MAX_GSO_SZ
+      || (pkt_len0 + payload_len1) >= TCP_MAX_GSO_SZ)
+    return 0;
+
+  if (gro_tcp_sequence_check (tcp0, tcp1, payload_len0) ==
+      GRO_PACKET_ACTION_ENQUEUE)
+    {
+      gro_merge_buffers (vm, b0, b1, bi1, payload_len1, l234_sz1);
+      return tcp1->ack_number;
+    }
+
+  return 0;
+}
+
+static_always_inline void
+gro_fixup_header (vlib_main_t * vm, vlib_buffer_t * b0, u32 ack_number,
+                 int is_l2)
+{
+  generic_header_offset_t gho0 = { 0 };
+
+  u32 is_ip0 = gro_is_ip4_or_ip6_packet (b0, is_l2);
+
+  if (is_ip0 & VNET_BUFFER_F_IS_IP4)
+    vnet_generic_header_offset_parser (b0, &gho0, is_l2, 1 /* is_ip4 */ ,
+                                      0 /* is_ip6 */ );
+  else if (is_ip0 & VNET_BUFFER_F_IS_IP6)
+    vnet_generic_header_offset_parser (b0, &gho0, is_l2, 0 /* is_ip4 */ ,
+                                      1 /* is_ip6 */ );
+
+  vnet_buffer2 (b0)->gso_size = b0->current_length - gho0.hdr_sz;
+
+  if (gho0.gho_flags & GHO_F_IP4)
+    {
+      ip4_header_t *ip4 =
+       (ip4_header_t *) (vlib_buffer_get_current (b0) + gho0.l3_hdr_offset);
+      ip4->length =
+       clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
+                             gho0.l3_hdr_offset);
+      b0->flags |=
+       (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4 |
+        VNET_BUFFER_F_OFFLOAD_TCP_CKSUM | VNET_BUFFER_F_OFFLOAD_IP_CKSUM);
+    }
+  else if (gho0.gho_flags & GHO_F_IP6)
+    {
+      ip6_header_t *ip6 =
+       (ip6_header_t *) (vlib_buffer_get_current (b0) + gho0.l3_hdr_offset);
+      ip6->payload_length =
+       clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
+                             gho0.l4_hdr_offset);
+      b0->flags |=
+       (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6 |
+        VNET_BUFFER_F_OFFLOAD_TCP_CKSUM);
+    }
+
+  tcp_header_t *tcp0 =
+    (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+  tcp0->ack_number = ack_number;
+  b0->flags &= ~VLIB_BUFFER_IS_TRACED;
+}
+
+static_always_inline u32
+vnet_gro_flow_table_flush (vlib_main_t * vm, gro_flow_table_t * flow_table,
+                          u32 * to)
+{
+  if (flow_table->flow_table_size > 0)
+    {
+      gro_flow_t *gro_flow;
+      u32 i = 0, j = 0;
+      while (i < GRO_FLOW_TABLE_MAX_SIZE)
+       {
+         gro_flow = &flow_table->gro_flow[i];
+         if (gro_flow->n_buffers && gro_flow_is_timeout (vm, gro_flow))
+           {
+             // flush the packet
+             vlib_buffer_t *b0 =
+               vlib_get_buffer (vm, gro_flow->buffer_index);
+             gro_fixup_header (vm, b0, gro_flow->last_ack_number,
+                               flow_table->is_l2);
+             to[j] = gro_flow->buffer_index;
+             gro_flow_table_reset_flow (flow_table, gro_flow);
+             flow_table->n_vectors++;
+             j++;
+           }
+         i++;
+       }
+
+      return j;
+    }
+  return 0;
+}
+
+static_always_inline void
+vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t * vm,
+                                                gro_flow_table_t *
+                                                flow_table)
+{
+  if (gro_flow_table_is_timeout (vm, flow_table))
+    {
+      u32 to[GRO_FLOW_TABLE_MAX_SIZE] = { 0 };
+      u32 n_to = vnet_gro_flow_table_flush (vm, flow_table, to);
+
+      if (n_to > 0)
+       {
+         u32 node_index = flow_table->node_index;
+         vlib_frame_t *f = vlib_get_frame_to_node (vm, node_index);
+         u32 *f_to = vlib_frame_vector_args (f);
+         u32 i = 0;
+
+         while (i < n_to)
+           {
+             f_to[f->n_vectors] = to[i];
+             i++;
+             f->n_vectors++;
+           }
+         vlib_put_frame_to_node (vm, node_index, f);
+       }
+      gro_flow_table_set_timeout (vm, flow_table, GRO_FLOW_TABLE_FLUSH);
+    }
+}
+
+static_always_inline u32
+vnet_gro_flow_table_inline (vlib_main_t * vm, gro_flow_table_t * flow_table,
+                           u32 bi0, u32 * to)
+{
+  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+  generic_header_offset_t gho0 = { 0 };
+  gro_flow_t *gro_flow = 0;
+  gro_flow_key_t flow_key0 = { };
+  tcp_header_t *tcp0 = 0;
+  u32 pkt_len0 = 0;
+  int is_l2 = flow_table->is_l2;
+
+  if (!gro_flow_table_is_enable (flow_table))
+    {
+      to[0] = bi0;
+      return 1;
+    }
+
+  if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_GSO))
+    {
+      to[0] = bi0;
+      return 1;
+    }
+
+  pkt_len0 = gro_get_packet_data (vm, b0, &gho0, &flow_key0, is_l2);
+  if (pkt_len0 == 0)
+    {
+      to[0] = bi0;
+      return 1;
+    }
+
+  gro_flow = gro_flow_table_find_or_add_flow (flow_table, &flow_key0);
+  if (!gro_flow)
+    {
+      to[0] = bi0;
+      return 1;
+    }
+
+  if (PREDICT_FALSE (gro_flow->n_buffers == 0))
+    {
+      flow_table->total_vectors++;
+      gro_flow_store_packet (gro_flow, bi0);
+      tcp0 =
+       (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+      gro_flow->last_ack_number = tcp0->ack_number;
+      gro_flow_set_timeout (vm, gro_flow, GRO_FLOW_TIMEOUT);
+      return 0;
+    }
+  else
+    {
+      tcp0 =
+       (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+      generic_header_offset_t gho_s = { 0 };
+      tcp_header_t *tcp_s;
+      u16 l234_sz0, l234_sz_s;
+      u32 pkt_len_s, payload_len0, payload_len_s;
+      u32 bi_s = gro_flow->buffer_index;
+
+      vlib_buffer_t *b_s = vlib_get_buffer (vm, bi_s);
+      u32 is_ip_s = gro_is_ip4_or_ip6_packet (b_s, is_l2);
+      if (is_ip_s & VNET_BUFFER_F_IS_IP4)
+       vnet_generic_header_offset_parser (b_s, &gho_s, is_l2,
+                                          1 /* is_ip4 */ , 0 /* is_ip6 */ );
+      else if (is_ip_s & VNET_BUFFER_F_IS_IP6)
+       vnet_generic_header_offset_parser (b_s, &gho_s, is_l2,
+                                          0 /* is_ip4 */ , 1 /* is_ip6 */ );
+
+      tcp_s =
+       (tcp_header_t *) (vlib_buffer_get_current (b_s) +
+                         gho_s.l4_hdr_offset);
+      pkt_len_s = vlib_buffer_length_in_chain (vm, b_s);
+      l234_sz0 = gho0.hdr_sz;
+      l234_sz_s = gho_s.hdr_sz;
+      payload_len0 = pkt_len0 - l234_sz0;
+      payload_len_s = pkt_len_s - l234_sz_s;
+      gro_packet_action_t action =
+       gro_tcp_sequence_check (tcp_s, tcp0, payload_len_s);
+
+      if (PREDICT_TRUE (action == GRO_PACKET_ACTION_ENQUEUE))
+       {
+         if (PREDICT_TRUE ((pkt_len_s + payload_len0) < TCP_MAX_GSO_SZ))
+           {
+             flow_table->total_vectors++;
+             gro_merge_buffers (vm, b_s, b0, bi0, payload_len0, l234_sz0);
+             gro_flow_store_packet (gro_flow, bi0);
+             gro_flow->last_ack_number = tcp0->ack_number;
+             return 0;
+           }
+         else
+           {
+             // flush the stored GSO size packet and buffer the current packet
+             flow_table->n_vectors++;
+             flow_table->total_vectors++;
+             gro_fixup_header (vm, b_s, gro_flow->last_ack_number, is_l2);
+             gro_flow->n_buffers = 0;
+             gro_flow_store_packet (gro_flow, bi0);
+             gro_flow->last_ack_number = tcp0->ack_number;
+             gro_flow_set_timeout (vm, gro_flow, GRO_FLOW_TIMEOUT);
+             to[0] = bi_s;
+             return 1;
+           }
+       }
+      else
+       {
+         // flush the all (current and stored) packets
+         flow_table->n_vectors++;
+         flow_table->total_vectors++;
+         gro_fixup_header (vm, b_s, gro_flow->last_ack_number, is_l2);
+         gro_flow->n_buffers = 0;
+         gro_flow_table_reset_flow (flow_table, gro_flow);
+         to[0] = bi_s;
+         to[1] = bi0;
+         return 2;
+       }
+    }
+}
+
+/**
+ * coalesce buffers with flow tables
+ */
+static_always_inline u32
+vnet_gro_inline (vlib_main_t * vm, gro_flow_table_t * flow_table, u32 * from,
+                u16 n_left_from, u32 * to)
+{
+  u16 count = 0, i = 0;
+
+  for (i = 0; i < n_left_from; i++)
+    count += vnet_gro_flow_table_inline (vm, flow_table, from[i], &to[count]);
+
+  return count;
+}
+
+/**
+ * coalesce buffers in opportunistic way without flow tables
+ */
+static_always_inline u32
+vnet_gro_simple_inline (vlib_main_t * vm, u32 * from, u16 n_left_from,
+                       int is_l2)
+{
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  vlib_get_buffers (vm, from, b, n_left_from);
+  u32 bi = 1, ack_number = 0;
+  if (PREDICT_TRUE (((b[0]->flags & VNET_BUFFER_F_GSO) == 0)))
+    {
+      while (n_left_from > 1)
+       {
+         if (PREDICT_TRUE (((b[bi]->flags & VNET_BUFFER_F_GSO) == 0)))
+           {
+             u32 ret;
+             if ((ret =
+                  gro_coalesce_buffers (vm, b[0], b[bi], from[bi],
+                                        is_l2)) != 0)
+               {
+                 n_left_from -= 1;
+                 bi += 1;
+                 ack_number = ret;
+                 continue;
+               }
+             else
+               break;
+           }
+         else
+           break;
+       }
+
+      if (bi >= 2)
+       {
+         gro_fixup_header (vm, b[0], ack_number, is_l2);
+       }
+    }
+  return bi;
+}
+#endif /* included_gro_func_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index b3aaddf..cb8b5bb 100644 (file)
@@ -661,7 +661,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
 {
   pg_main_t *pg = &pg_main;
   unformat_input_t _line_input, *line_input = &_line_input;
-  u32 if_id, gso_enabled = 0, gso_size = 0;
+  u32 if_id, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
   clib_error_t *error = NULL;
 
   if (!unformat_user (input, unformat_line_input, line_input))
@@ -681,6 +681,8 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
              error = clib_error_create ("gso enabled but gso size missing");
              goto done;
            }
+         if (unformat (line_input, "coalesce-enabled"))
+           coalesce_enabled = 1;
        }
       else
        {
@@ -690,7 +692,8 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
        }
     }
 
-  pg_interface_add_or_get (pg, if_id, gso_enabled, gso_size);
+  pg_interface_add_or_get (pg, if_id, gso_enabled, gso_size,
+                          coalesce_enabled);
 
 done:
   unformat_free (line_input);
@@ -701,7 +704,8 @@ done:
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (create_pg_if_cmd, static) = {
   .path = "create packet-generator",
-  .short_help = "create packet-generator interface <interface name> [gso-enabled gso-size <size>]",
+  .short_help = "create packet-generator interface <interface name>"
+                " [gso-enabled gso-size <size> [coalesce-enabled]]",
   .function = create_pg_if_cmd_fn,
 };
 /* *INDENT-ON* */
index 6968cce..785592f 100644 (file)
@@ -54,6 +54,7 @@
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/udp/udp_packet.h>
 #include <vnet/devices/devices.h>
+#include <vnet/gso/gro_func.h>
 
 static int
 validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s,
@@ -1640,6 +1641,9 @@ pg_generate_packets (vlib_node_runtime_t * node,
                            &next_index, 0);
     }
 
+  if (PREDICT_FALSE (pi->coalesce_enabled))
+    vnet_gro_flow_table_schedule_node_on_dispatcher (vm, pi->flow_table);
+
   while (n_packets_to_generate > 0)
     {
       u32 *head, *start, *end;
index d8059fa..042591a 100644 (file)
@@ -42,6 +42,7 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vnet/ethernet/ethernet.h>
+#include <vnet/gso/gro_func.h>
 
 uword
 pg_output (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -50,6 +51,8 @@ pg_output (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   u32 *buffers = vlib_frame_vector_args (frame);
   uword n_buffers = frame->n_vectors;
   uword n_left = n_buffers;
+  u32 to[GRO_TO_VECTOR_SIZE (n_buffers)];
+  uword n_to = 0;
   vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
   pg_interface_t *pif = pool_elt_at_index (pg->interfaces, rd->dev_instance);
 
@@ -57,6 +60,13 @@ pg_output (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     while (clib_atomic_test_and_set (pif->lockp))
       ;
 
+  if (PREDICT_FALSE (pif->coalesce_enabled))
+    {
+      n_to = vnet_gro_inline (vm, pif->flow_table, buffers, n_left, to);
+      buffers = to;
+      n_left = n_to;
+    }
+
   while (n_left > 0)
     {
       n_left--;
@@ -84,7 +94,13 @@ pg_output (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
       pif->pcap_main.n_packets_to_capture)
     pcap_close (&pif->pcap_main);
 
-  vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_buffers);
+  if (PREDICT_FALSE (pif->coalesce_enabled))
+    {
+      n_buffers = n_to;
+      vlib_buffer_free (vm, to, n_to);
+    }
+  else
+    vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_buffers);
   if (PREDICT_FALSE (pif->lockp != 0))
     clib_atomic_release (pif->lockp);
 
index 86343d5..3a44f1d 100644 (file)
@@ -49,6 +49,21 @@ define pg_create_interface_reply
   vl_api_interface_index_t sw_if_index;
 };
 
+/** \brief PacketGenerator interface enable/disable packet coalesce
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param interface_id - interface index
+    @param coalesce_enabled - enable/disable packet coalesce on this interface
+*/
+autoreply define pg_interface_enable_disable_coalesce
+{
+  u32 client_index;
+  u32 context;
+  vl_api_interface_index_t sw_if_index;
+  bool coalesce_enabled;
+  option status="in_progress";
+};
+
 /** \brief PacketGenerator capture packets on given interface request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
index a6616d9..06e6126 100644 (file)
@@ -45,6 +45,7 @@
 #include <vppinfra/fifo.h>     /* for buffer_fifo */
 #include <vppinfra/pcap.h>
 #include <vnet/interface.h>
+#include <vnet/gso/gro.h>
 
 extern vnet_device_class_t pg_dev_class;
 
@@ -305,6 +306,8 @@ typedef struct
   /* Identifies stream for this interface. */
   u32 id;
 
+  u8 coalesce_enabled;
+  gro_flow_table_t *flow_table;
   u8 gso_enabled;
   u32 gso_size;
   pcap_main_t pcap_main;
@@ -358,9 +361,14 @@ void pg_stream_change (pg_main_t * pg, pg_stream_t * s);
 void pg_stream_enable_disable (pg_main_t * pg, pg_stream_t * s,
                               int is_enable);
 
+/* Enable/disable packet coalesce on given interface */
+void pg_interface_enable_disable_coalesce (pg_interface_t * pi, u8 enable,
+                                          u32 tx_node_index);
+
 /* Find/create free packet-generator interface index. */
 u32 pg_interface_add_or_get (pg_main_t * pg, uword stream_index,
-                            u8 gso_enabled, u32 gso_size);
+                            u8 gso_enabled, u32 gso_size,
+                            u8 coalesce_enabled);
 
 always_inline pg_node_t *
 pg_get_node (uword node_index)
index bb58a4f..554e8ea 100644 (file)
@@ -44,7 +44,8 @@
 #define foreach_pg_api_msg                                              \
 _(PG_CREATE_INTERFACE, pg_create_interface)                             \
 _(PG_CAPTURE, pg_capture)                                               \
-_(PG_ENABLE_DISABLE, pg_enable_disable)
+_(PG_ENABLE_DISABLE, pg_enable_disable)                                 \
+_(PG_INTERFACE_ENABLE_DISABLE_COALESCE, pg_interface_enable_disable_coalesce)
 
 static void
 vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
@@ -55,7 +56,7 @@ vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
   pg_main_t *pg = &pg_main;
   u32 pg_if_id = pg_interface_add_or_get (pg, ntohl (mp->interface_id),
                                          mp->gso_enabled,
-                                         ntohl (mp->gso_size));
+                                         ntohl (mp->gso_size), 0);
   pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id);
 
   /* *INDENT-OFF* */
@@ -66,6 +67,41 @@ vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
   /* *INDENT-ON* */
 }
 
+static void
+  vl_api_pg_interface_enable_disable_coalesce_t_handler
+  (vl_api_pg_interface_enable_disable_coalesce_t * mp)
+{
+  vl_api_pg_interface_enable_disable_coalesce_reply_t *rmp;
+  int rv = 0;
+
+  VALIDATE_SW_IF_INDEX (mp);
+
+  u32 sw_if_index = ntohl (mp->sw_if_index);
+
+  pg_main_t *pg = &pg_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hw =
+    vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
+
+  if (hw)
+    {
+      pg_interface_t *pi =
+       pool_elt_at_index (pg->interfaces, hw->dev_instance);
+      if (pi->gso_enabled)
+       pg_interface_enable_disable_coalesce (pi, mp->coalesce_enabled,
+                                             hw->tx_node_index);
+      else
+       rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE;
+    }
+  else
+    {
+      rv = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+    }
+
+  BAD_SW_IF_INDEX_LABEL;
+  REPLY_MACRO (VL_API_PG_INTERFACE_ENABLE_DISABLE_COALESCE_REPLY);
+}
+
 static void
 vl_api_pg_capture_t_handler (vl_api_pg_capture_t * mp)
 {
index f09e9a4..88c8937 100644 (file)
@@ -178,9 +178,26 @@ pg_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
   return 0;
 }
 
+void
+pg_interface_enable_disable_coalesce (pg_interface_t * pi, u8 enable,
+                                     u32 tx_node_index)
+{
+  if (enable)
+    {
+      gro_flow_table_init (&pi->flow_table, 1 /* is_l2 */ ,
+                          tx_node_index);
+      pi->coalesce_enabled = 1;
+    }
+  else
+    {
+      pi->coalesce_enabled = 0;
+      gro_flow_table_free (pi->flow_table);
+    }
+}
+
 u32
 pg_interface_add_or_get (pg_main_t * pg, uword if_id, u8 gso_enabled,
-                        u32 gso_size)
+                        u32 gso_size, u8 coalesce_enabled)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vlib_main_t *vm = vlib_get_main ();
@@ -219,6 +236,10 @@ pg_interface_add_or_get (pg_main_t * pg, uword if_id, u8 gso_enabled,
          hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
          pi->gso_enabled = 1;
          pi->gso_size = gso_size;
+         if (coalesce_enabled)
+           {
+             pg_interface_enable_disable_coalesce (pi, 1, hi->tx_node_index);
+           }
        }
       pi->sw_if_index = hi->sw_if_index;
 
@@ -454,7 +475,7 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init)
   /* Find an interface to use. */
   s->pg_if_index =
     pg_interface_add_or_get (pg, s->if_id, 0 /* gso_enabled */ ,
-                            0 /* gso_size */ );
+                            0 /* gso_size */ , 0 /* coalesce_enabled */ );
 
   if (s->sw_if_index[VLIB_RX] == ~0)
     {
index 451f263..27d1c0c 100644 (file)
@@ -2562,6 +2562,21 @@ static void *vl_api_pg_enable_disable_t_print
   FINISH;
 }
 
+static void *vl_api_pg_interface_enable_disable_coalesce_t_print
+  (vl_api_pg_interface_enable_disable_coalesce_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: pg_interface_enable_disable_coalesce ");
+  s = format (s, "sw_if_index %d ", (mp->sw_if_index));
+  if (!mp->coalesce_enabled)
+    s = format (s, "disable");
+  else
+    s = format (s, "enable");
+  FINISH;
+}
+
+
 static void *vl_api_ip_source_and_port_range_check_add_del_t_print
   (vl_api_ip_source_and_port_range_check_add_del_t * mp, void *handle)
 {
@@ -3656,6 +3671,7 @@ _(GET_NEXT_INDEX, get_next_index)                                       \
 _(PG_CREATE_INTERFACE,pg_create_interface)                              \
 _(PG_CAPTURE, pg_capture)                                               \
 _(PG_ENABLE_DISABLE, pg_enable_disable)                                 \
+_(PG_INTERFACE_ENABLE_DISABLE_COALESCE, pg_interface_enable_disable_coalesce) \
 _(POLICER_ADD_DEL, policer_add_del)                                     \
 _(POLICER_DUMP, policer_dump)                                           \
 _(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface)       \
diff --git a/test/test_gro.py b/test/test_gro.py
new file mode 100644 (file)
index 0000000..33215d6
--- /dev/null
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""GRO functional tests"""
+
+#
+# Add tests for:
+# - GRO
+# - Verify that sending 1500 Bytes frame without GRO enabled correctly
+# - Verify that sending 1500 Bytes frame with GRO enabled correctly
+#
+import unittest
+
+from scapy.packet import Raw
+from scapy.layers.inet6 import IPv6, Ether, IP, UDP, ICMPv6PacketTooBig
+from scapy.layers.inet6 import ipv6nh, IPerror6
+from scapy.layers.inet import TCP, ICMP
+from scapy.data import ETH_P_IP, ETH_P_IPV6, ETH_P_ARP
+
+from framework import VppTestCase, VppTestRunner
+from vpp_object import VppObject
+from vpp_interface import VppInterface
+
+
+""" Test_gro is a subclass of VPPTestCase classes.
+    GRO tests.
+"""
+
+
+class TestGRO(VppTestCase):
+    """ GRO Test Case """
+
+    @classmethod
+    def setUpClass(self):
+        super(TestGRO, self).setUpClass()
+        res = self.create_pg_interfaces(range(2))
+        res_gro = self.create_pg_interfaces(range(2, 3), 1, 1460)
+        self.create_pg_interfaces(range(3, 4), 1, 8940)
+        self.pg_interfaces.append(res[0])
+        self.pg_interfaces.append(res[1])
+        self.pg_interfaces.append(res_gro[0])
+        self.pg2.coalesce_enable()
+        self.pg3.coalesce_enable()
+
+    @classmethod
+    def tearDownClass(self):
+        super(TestGRO, self).tearDownClass()
+
+    def setUp(self):
+        super(TestGRO, self).setUp()
+        for i in self.pg_interfaces:
+            i.admin_up()
+            i.config_ip4()
+            i.config_ip6()
+            i.disable_ipv6_ra()
+            i.resolve_arp()
+            i.resolve_ndp()
+
+    def tearDown(self):
+        super(TestGRO, self).tearDown()
+        if not self.vpp_dead:
+            for i in self.pg_interfaces:
+                i.unconfig_ip4()
+                i.unconfig_ip6()
+                i.admin_down()
+
+    def test_gro(self):
+        """ GRO test """
+
+        n_packets = 124
+        #
+        # Send 1500 bytes frame with gro disabled
+        #
+        p4 = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
+              IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4,
+                 flags='DF') /
+              TCP(sport=1234, dport=4321) /
+              Raw(b'\xa5' * 1460))
+
+        rxs = self.send_and_expect(self.pg0, n_packets * p4, self.pg1)
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg1.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg1.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg0.remote_ip4)
+            self.assertEqual(rx[IP].dst, self.pg1.remote_ip4)
+            self.assertEqual(rx[TCP].sport, 1234)
+            self.assertEqual(rx[TCP].dport, 4321)
+
+        #
+        # Send 1500 bytes frame with gro enabled on
+        # output interfaces support GRO
+        #
+        p = []
+        s = 0
+        for n in range(0, n_packets):
+            p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
+                      IP(src=self.pg0.remote_ip4, dst=self.pg2.remote_ip4,
+                         flags='DF') /
+                      TCP(sport=1234, dport=4321, seq=s, ack=n, flags='A') /
+                      Raw(b'\xa5' * 1460)))
+            s += 1460
+
+        rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=2)
+
+        i = 0
+        for rx in rxs:
+            i += 1
+            self.assertEqual(rx[Ether].src, self.pg2.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg2.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg0.remote_ip4)
+            self.assertEqual(rx[IP].dst, self.pg2.remote_ip4)
+            self.assertEqual(rx[IP].len, 64280)  # 1460 * 44 + 40 < 65536
+            self.assertEqual(rx[TCP].sport, 1234)
+            self.assertEqual(rx[TCP].dport, 4321)
+            self.assertEqual(rx[TCP].ack, (44*i - 1))
+
+        p4_temp = (Ether(src=self.pg2.remote_mac, dst=self.pg2.local_mac) /
+                   IP(src=self.pg2.remote_ip4, dst=self.pg0.remote_ip4,
+                      flags='DF') /
+                   TCP(sport=1234, dport=4321, flags='F'))
+
+        rxs = self.send_and_expect(self.pg2, 100*[p4_temp], self.pg0, n_rx=100)
+        rx_coalesce = self.pg2.get_capture(1, timeout=1)
+
+        rx0 = rx_coalesce[0]
+        self.assertEqual(rx0[Ether].src, self.pg2.local_mac)
+        self.assertEqual(rx0[Ether].dst, self.pg2.remote_mac)
+        self.assertEqual(rx0[IP].src, self.pg0.remote_ip4)
+        self.assertEqual(rx0[IP].dst, self.pg2.remote_ip4)
+        self.assertEqual(rx0[IP].len, 52600)  # 1460 * 36 + 40
+        self.assertEqual(rx0[TCP].sport, 1234)
+        self.assertEqual(rx0[TCP].dport, 4321)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg0.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg0.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg2.remote_ip4)
+            self.assertEqual(rx[IP].dst, self.pg0.remote_ip4)
+            self.assertEqual(rx[IP].len, 40)
+            self.assertEqual(rx[TCP].sport, 1234)
+            self.assertEqual(rx[TCP].dport, 4321)
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
index 32c0eae..4858aa7 100755 (executable)
@@ -57,6 +57,13 @@ class VppPGInterface(VppInterface):
         """gso size on packet-generator interface"""
         return self._gso_size
 
+    @property
+    def coalesce_is_enabled(self):
+        """coalesce enabled on packet-generator interface"""
+        if self._coalesce_enabled == 0:
+            return "coalesce-disabled"
+        return "coalesce-enabled"
+
     @property
     def out_path(self):
         """pcap file path - captured packets"""
@@ -113,6 +120,7 @@ class VppPGInterface(VppInterface):
         self._pg_index = pg_index
         self._gso_enabled = gso
         self._gso_size = gso_size
+        self._coalesce_enabled = 0
         self._out_file = "pg%u_out.pcap" % self.pg_index
         self._out_path = self.test.tempdir + "/" + self._out_file
         self._in_file = "pg%u_in.pcap" % self.pg_index
@@ -160,6 +168,18 @@ class VppPGInterface(VppInterface):
     def disable_capture(self):
         self.test.vapi.cli("%s disable" % self.capture_cli)
 
+    def coalesce_enable(self):
+        """ Enable packet coalesce on this packet-generator interface"""
+        self._coalesce_enabled = 1
+        self.test.vapi.pg_interface_enable_disable_coalesce(self.sw_if_index,
+                                                            1)
+
+    def coalesce_disable(self):
+        """ Disable packet coalesce on this packet-generator interface"""
+        self._coalesce_enabled = 0
+        self.test.vapi.pg_interface_enable_disable_coalesce(self.sw_if_index,
+                                                            0)
+
     def add_stream(self, pkts, nb_replays=None, worker=None):
         """
         Add a stream of packets to this packet-generator