dpdk: add l2_hdr_offset and l3_hdr_offset in vlib_buffer_t 06/9506/1
authorDamjan Marion <damarion@cisco.com>
Tue, 21 Nov 2017 12:12:41 +0000 (13:12 +0100)
committerDamjan Marion <damarion@cisco.com>
Tue, 21 Nov 2017 12:44:22 +0000 (13:44 +0100)
Change-Id: I0a6d1257e391c3b6f7da6498bd5f7d4c545d17e9
Signed-off-by: Damjan Marion <damarion@cisco.com>
src/plugins/dpdk/device/format.c
src/plugins/dpdk/device/init.c
src/plugins/dpdk/device/node.c
src/plugins/ixge/ixge.c
src/vnet.am
src/vnet/buffer.c [new file with mode: 0644]
src/vnet/buffer.h
src/vnet/devices/devices.c
src/vnet/devices/devices.h
src/vnet/pg/input.c
src/vnet/pg/stream.c

index 8f1f729..faf59e7 100644 (file)
@@ -591,7 +591,7 @@ format_dpdk_tx_dma_trace (u8 * s, va_list * va)
 
   s = format (s, "\n%Ubuffer 0x%x: %U",
              format_white_space, indent,
-             t->buffer_index, format_vlib_buffer, &t->buffer);
+             t->buffer_index, format_vnet_buffer, &t->buffer);
 
   s = format (s, "\n%U%U",
              format_white_space, indent,
@@ -622,7 +622,7 @@ format_dpdk_rx_dma_trace (u8 * s, va_list * va)
 
   s = format (s, "\n%Ubuffer 0x%x: %U",
              format_white_space, indent,
-             t->buffer_index, format_vlib_buffer, &t->buffer);
+             t->buffer_index, format_vnet_buffer, &t->buffer);
 
   s = format (s, "\n%U%U",
              format_white_space, indent,
index 3cd040a..8d08da1 100755 (executable)
@@ -266,7 +266,6 @@ dpdk_lib_init (dpdk_main_t * dm)
                                      VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
       vlib_buffer_init_for_free_list (bt, fl);
       bt->flags = dm->buffer_flags_template;
-      bt->current_data = -RTE_PKTMBUF_HEADROOM;
       vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
     }
 
@@ -1600,7 +1599,7 @@ dpdk_init (vlib_main_t * vm)
   dm->buffer_flags_template =
     (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID
      | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
-     VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+     VNET_BUFFER_F_L4_CHECKSUM_CORRECT | VNET_BUFFER_F_L2_HDR_OFFSET_VALID);
 
   dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
   dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
index 1240b8d..4863390 100644 (file)
@@ -36,40 +36,21 @@ static char *dpdk_error_strings[] = {
 };
 #endif
 
-always_inline int
-vlib_buffer_is_ip4 (vlib_buffer_t * b)
-{
-  ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
-  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
-}
-
-always_inline int
-vlib_buffer_is_ip6 (vlib_buffer_t * b)
-{
-  ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
-  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
-}
-
-always_inline int
-vlib_buffer_is_mpls (vlib_buffer_t * b)
-{
-  ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
-  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS));
-}
-
 always_inline u32
-dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
+dpdk_rx_next_from_etype (struct rte_mbuf *mb)
 {
-  if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+  ethernet_header_t *h = rte_pktmbuf_mtod (mb, ethernet_header_t *);
+  if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)))
     {
       if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
        return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
       else
        return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     }
-  else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+  else if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)))
     return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
-  else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+  else
+    if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)))
     return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
   else
     return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
@@ -78,6 +59,7 @@ dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
 always_inline u32
 dpdk_rx_next_from_packet_start (struct rte_mbuf * mb, vlib_buffer_t * b0)
 {
+  ethernet_header_t *h = rte_pktmbuf_mtod (mb, ethernet_header_t *);
   word start_delta;
   int rv;
 
@@ -86,16 +68,17 @@ dpdk_rx_next_from_packet_start (struct rte_mbuf * mb, vlib_buffer_t * b0)
 
   vlib_buffer_advance (b0, -start_delta);
 
-  if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+  if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)))
     {
       if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
        rv = VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
       else
        rv = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     }
-  else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+  else if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)))
     rv = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
-  else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+  else
+    if (PREDICT_TRUE (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)))
     rv = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
   else
     rv = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
@@ -326,6 +309,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
       u32 bi2, next2;
       u32 bi3, next3;
       u8 error0, error1, error2, error3;
+      i16 offset0, offset1, offset2, offset3;
       u64 or_ol_flags;
 
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -366,21 +350,10 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
          b2 = vlib_buffer_from_rte_mbuf (mb2);
          b3 = vlib_buffer_from_rte_mbuf (mb3);
 
-         clib_memcpy64_x4 (b0, b1, b2, b3, bt);
-
          dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]);
          dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]);
 
-         /* current_data must be set to -RTE_PKTMBUF_HEADROOM in template */
-         b0->current_data += mb0->data_off;
-         b1->current_data += mb1->data_off;
-         b2->current_data += mb2->data_off;
-         b3->current_data += mb3->data_off;
-
-         b0->current_length = mb0->data_len;
-         b1->current_length = mb1->data_len;
-         b2->current_length = mb2->data_len;
-         b3->current_length = mb3->data_len;
+         clib_memcpy64_x4 (b0, b1, b2, b3, bt);
 
          dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 10]);
          dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]);
@@ -403,10 +376,10 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
            }
          else
            {
-             next0 = dpdk_rx_next_from_etype (mb0, b0);
-             next1 = dpdk_rx_next_from_etype (mb1, b1);
-             next2 = dpdk_rx_next_from_etype (mb2, b2);
-             next3 = dpdk_rx_next_from_etype (mb3, b3);
+             next0 = dpdk_rx_next_from_etype (mb0);
+             next1 = dpdk_rx_next_from_etype (mb1);
+             next2 = dpdk_rx_next_from_etype (mb2);
+             next3 = dpdk_rx_next_from_etype (mb3);
            }
 
          dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 11]);
@@ -426,16 +399,43 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
              b3->error = node->errors[error3];
            }
 
-         vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
-         vlib_buffer_advance (b1, device_input_next_node_advance[next1]);
-         vlib_buffer_advance (b2, device_input_next_node_advance[next2]);
-         vlib_buffer_advance (b3, device_input_next_node_advance[next3]);
-
+         offset0 = device_input_next_node_advance[next0];
+         b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM;
+         b0->flags |= device_input_next_node_flags[next0];
+         vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
+         vnet_buffer (b0)->l2_hdr_offset =
+           mb0->data_off - RTE_PKTMBUF_HEADROOM;
+         b0->current_length = mb0->data_len - offset0;
          n_rx_bytes += mb0->pkt_len;
+
+         offset1 = device_input_next_node_advance[next1];
+         b1->current_data = mb1->data_off + offset1 - RTE_PKTMBUF_HEADROOM;
+         b1->flags |= device_input_next_node_flags[next1];
+         vnet_buffer (b1)->l3_hdr_offset = b1->current_data;
+         vnet_buffer (b1)->l2_hdr_offset =
+           mb1->data_off - RTE_PKTMBUF_HEADROOM;
+         b1->current_length = mb1->data_len - offset1;
          n_rx_bytes += mb1->pkt_len;
+
+         offset2 = device_input_next_node_advance[next2];
+         b2->current_data = mb2->data_off + offset2 - RTE_PKTMBUF_HEADROOM;
+         b2->flags |= device_input_next_node_flags[next2];
+         vnet_buffer (b2)->l3_hdr_offset = b2->current_data;
+         vnet_buffer (b2)->l2_hdr_offset =
+           mb2->data_off - RTE_PKTMBUF_HEADROOM;
+         b2->current_length = mb2->data_len - offset2;
          n_rx_bytes += mb2->pkt_len;
+
+         offset3 = device_input_next_node_advance[next3];
+         b3->current_data = mb3->data_off + offset3 - RTE_PKTMBUF_HEADROOM;
+         b3->flags |= device_input_next_node_flags[next3];
+         vnet_buffer (b3)->l3_hdr_offset = b3->current_data;
+         vnet_buffer (b3)->l2_hdr_offset =
+           mb3->data_off - RTE_PKTMBUF_HEADROOM;
+         b3->current_length = mb3->data_len - offset3;
          n_rx_bytes += mb3->pkt_len;
 
+
          /* Process subsequent segments of multi-segment packets */
          if (maybe_multiseg)
            {
@@ -488,10 +488,6 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
 
          clib_memcpy (b0, bt, CLIB_CACHE_LINE_BYTES);
 
-         ASSERT (b0->current_data == -RTE_PKTMBUF_HEADROOM);
-         b0->current_data += mb0->data_off;
-         b0->current_length = mb0->data_len;
-
          bi0 = vlib_get_buffer_index (vm, b0);
 
          to_next[0] = bi0;
@@ -501,13 +497,18 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
          if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
            next0 = xd->per_interface_next_index;
          else
-           next0 = dpdk_rx_next_from_etype (mb0, b0);
+           next0 = dpdk_rx_next_from_etype (mb0);
 
          dpdk_rx_error_from_mb (mb0, &next0, &error0);
          b0->error = node->errors[error0];
 
-         vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
-
+         offset0 = device_input_next_node_advance[next0];
+         b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM;
+         b0->flags |= device_input_next_node_flags[next0];
+         vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
+         vnet_buffer (b0)->l2_hdr_offset =
+           mb0->data_off - RTE_PKTMBUF_HEADROOM;
+         b0->current_length = mb0->data_len - offset0;
          n_rx_bytes += mb0->pkt_len;
 
          /* Process subsequent segments of multi-segment packets */
index f9d01d5..bf94242 100644 (file)
@@ -601,7 +601,7 @@ format_ixge_rx_dma_trace (u8 * s, va_list * va)
 
   s = format (s, "\n%Ubuffer 0x%x: %U",
              format_white_space, indent,
-             t->buffer_index, format_vlib_buffer, &t->buffer);
+             t->buffer_index, format_vnet_buffer, &t->buffer);
 
   s = format (s, "\n%U", format_white_space, indent);
 
@@ -885,7 +885,7 @@ format_ixge_tx_dma_trace (u8 * s, va_list * va)
 
   s = format (s, "\n%Ubuffer 0x%x: %U",
              format_white_space, indent,
-             t->buffer_index, format_vlib_buffer, &t->buffer);
+             t->buffer_index, format_vnet_buffer, &t->buffer);
 
   s = format (s, "\n%U", format_white_space, indent);
 
index f5e4478..7dc4b7d 100644 (file)
@@ -31,6 +31,7 @@ endif
 # Generic stuff
 ########################################
 libvnet_la_SOURCES +=                          \
+  vnet/buffer.c                                        \
   vnet/config.c                                        \
   vnet/devices/devices.c                       \
   vnet/handoff.c                               \
diff --git a/src/vnet/buffer.c b/src/vnet/buffer.c
new file mode 100644 (file)
index 0000000..78b32ea
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/buffer.h>
+
+
+u8 *
+format_vnet_buffer (u8 * s, va_list * args)
+{
+  vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+  u32 indent = format_get_indent (s);
+  u8 *a = 0;
+
+#define _(bit, name, v) \
+  if (v && (b->flags & VNET_BUFFER_F_##name)) \
+    a = format (a, "%s ", v);
+  foreach_vnet_buffer_field
+#undef _
+    if (b->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID)
+    a = format (a, "l2-hdr-offset %d ", vnet_buffer (b)->l2_hdr_offset);
+
+  if (b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID)
+    a = format (a, "l2-hdr-offset %d ", vnet_buffer (b)->l3_hdr_offset);
+
+  if (b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID)
+    a = format (a, "l4-hdr-offset %d ", vnet_buffer (b)->l4_hdr_offset);
+
+  s = format (s, "%U", format_vlib_buffer, b);
+  if (a)
+    s = format (s, "\n%U%v", format_white_space, indent, a);
+  vec_free (a);
+
+  return s;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index 6518fb6..50b94f7 100644 (file)
 #include <vlib/vlib.h>
 
 #define foreach_vnet_buffer_field \
-  _( 1, L4_CHECKSUM_COMPUTED)                          \
-  _( 2, L4_CHECKSUM_CORRECT)                           \
-  _( 3, VLAN_2_DEEP)                                   \
-  _( 4, VLAN_1_DEEP)                                   \
-  _( 8, SPAN_CLONE)                                    \
-  _( 6, HANDOFF_NEXT_VALID)                            \
-  _( 7, LOCALLY_ORIGINATED)                            \
-  _( 8, IS_IP4)                                                \
-  _( 9, IS_IP6)                                                \
-  _(10, OFFLOAD_IP_CKSUM)                              \
-  _(11, OFFLOAD_TCP_CKSUM)                             \
-  _(12, OFFLOAD_UDP_CKSUM)                              \
-  _(13, IS_NATED)
+  _( 1, L4_CHECKSUM_COMPUTED, "l4-cksum-computed")     \
+  _( 2, L4_CHECKSUM_CORRECT, "l4-cksum-correct")       \
+  _( 3, VLAN_2_DEEP, "vlan-2-deep")                    \
+  _( 4, VLAN_1_DEEP, "vlan-1-deep")                    \
+  _( 8, SPAN_CLONE, "span-clone")                      \
+  _( 6, HANDOFF_NEXT_VALID, "handoff-next-valid")      \
+  _( 7, LOCALLY_ORIGINATED, "local")                   \
+  _( 8, IS_IP4, "ip4")                                 \
+  _( 9, IS_IP6, "ip6")                                 \
+  _(10, OFFLOAD_IP_CKSUM, "offload-ip-cksum")          \
+  _(11, OFFLOAD_TCP_CKSUM, "offload-tcp-cksum")                \
+  _(12, OFFLOAD_UDP_CKSUM, "offload-udp-cksum")                \
+  _(13, IS_NATED, "nated")                             \
+  _(14, L2_HDR_OFFSET_VALID, 0)                                \
+  _(15, L3_HDR_OFFSET_VALID, 0)                                \
+  _(16, L4_HDR_OFFSET_VALID, 0)
 
 #define VNET_BUFFER_FLAGS_VLAN_BITS \
   (VNET_BUFFER_F_VLAN_1_DEEP | VNET_BUFFER_F_VLAN_2_DEEP)
 
 enum
 {
-#define _(bit, name) VNET_BUFFER_F_##name  = (1 << LOG2_VLIB_BUFFER_FLAG_USER(bit)),
+#define _(bit, name, v) VNET_BUFFER_F_##name  = (1 << LOG2_VLIB_BUFFER_FLAG_USER(bit)),
   foreach_vnet_buffer_field
 #undef _
 };
 
 enum
 {
-#define _(bit, name) VNET_BUFFER_F_LOG2_##name  = LOG2_VLIB_BUFFER_FLAG_USER(bit),
+#define _(bit, name, v) VNET_BUFFER_F_LOG2_##name  = LOG2_VLIB_BUFFER_FLAG_USER(bit),
   foreach_vnet_buffer_field
 #undef _
 };
@@ -350,6 +353,7 @@ STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) <=
               STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
               "VNET buffer opaque2 meta-data too large for vlib_buffer");
 
+format_function_t format_vnet_buffer;
 
 #endif /* included_vnet_buffer_h */
 
index a38ecd2..99011da 100644 (file)
@@ -52,6 +52,16 @@ device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES /
       [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t),
 };
 
+const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
+device_input_next_node_flags[((VNET_DEVICE_INPUT_N_NEXT_NODES /
+                               CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
+{
+      [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
+      [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
+      [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
+      [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
+};
+
 VNET_FEATURE_ARC_INIT (device_input, static) =
 {
   .arc_name  = "device-input",
index b74e371..c303cb6 100644 (file)
@@ -73,6 +73,7 @@ typedef struct
 extern vnet_device_main_t vnet_device_main;
 extern vlib_node_registration_t device_input_node;
 extern const u32 device_input_next_node_advance[];
+extern const u32 device_input_next_node_flags[];
 
 static inline void
 vnet_hw_interface_set_input_node (vnet_main_t * vnm, u32 hw_if_index,
index b70f3ce..f31152c 100644 (file)
@@ -65,7 +65,7 @@ validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s,
   if (i >= n_bytes)
     return 1;
 
-  clib_warning ("buffer %U", format_vlib_buffer, b);
+  clib_warning ("buffer %U", format_vnet_buffer, b);
   clib_warning ("differ at index %d", i);
   clib_warning ("is     %U", format_hex_bytes, bd, n_bytes);
   clib_warning ("mask   %U", format_hex_bytes, pm, n_bytes);
@@ -1403,7 +1403,7 @@ format_pg_input_trace (u8 * s, va_list * va)
   s = format (s, ", %d sw_if_index", t->sw_if_index);
 
   s = format (s, "\n%U%U",
-             format_white_space, indent, format_vlib_buffer, &t->buffer);
+             format_white_space, indent, format_vnet_buffer, &t->buffer);
 
   s = format (s, "\n%U", format_white_space, indent);
 
index 2dfbf5a..762f241 100644 (file)
@@ -104,7 +104,7 @@ format_pg_output_trace (u8 * s, va_list * va)
 
   s = format (s, "%Ubuffer 0x%x: %U",
              format_white_space, indent,
-             t->buffer_index, format_vlib_buffer, &t->buffer);
+             t->buffer_index, format_vnet_buffer, &t->buffer);
 
   s = format (s, "\n%U%U", format_white_space, indent,
              format_ethernet_header_with_length, t->buffer.pre_data,