Improve dpdk-input node to handle drivers not setting mbuf PTYPE 76/4376/6
authorJohn Lo <loj@cisco.com>
Sat, 17 Dec 2016 08:09:58 +0000 (03:09 -0500)
committerDamjan Marion <dmarion.lists@gmail.com>
Tue, 20 Dec 2016 12:09:10 +0000 (12:09 +0000)
For drivers that do not provide dpdk rte_mbuf PTYPE information,
check ethernet header Etype to acccelerate IP4 and IP6 forwarding
path. Update packet trace for dpdk-input node to provide more info
from DPDK rte_mbuf offload flags and packet types.

Change-Id: I207158797a155305314d002726c0af97b8cb0eb3
Signed-off-by: John Lo <loj@cisco.com>
vnet/vnet/devices/devices.h
vnet/vnet/devices/dpdk/dpdk.h
vnet/vnet/devices/dpdk/format.c
vnet/vnet/devices/dpdk/init.c
vnet/vnet/devices/dpdk/node.c

index 3bd700a..c46dab9 100644 (file)
@@ -21,6 +21,7 @@
 
 typedef enum
 {
+  VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT,
   VNET_DEVICE_INPUT_NEXT_IP4_INPUT,
   VNET_DEVICE_INPUT_NEXT_IP6_INPUT,
   VNET_DEVICE_INPUT_NEXT_MPLS_INPUT,
@@ -32,7 +33,8 @@ typedef enum
 #define VNET_DEVICE_INPUT_NEXT_NODES {                                 \
     [VNET_DEVICE_INPUT_NEXT_DROP] = "error-drop",                      \
     [VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input",                \
-    [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",      \
+    [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = "ip4-input-no-checksum",  \
+    [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = "ip4-input",                  \
     [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = "ip6-input",                  \
     [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = "mpls-input",                        \
 }
index 771c91f..3669bc5 100644 (file)
@@ -189,13 +189,13 @@ typedef struct
   i8 cpu_socket;
 
   u16 flags;
-#define DPDK_DEVICE_FLAG_ADMIN_UP       (1 << 0)
-#define DPDK_DEVICE_FLAG_PROMISC        (1 << 1)
-#define DPDK_DEVICE_FLAG_PMD            (1 << 2)
-#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 3)
-
-#define DPDK_DEVICE_FLAG_HAVE_SUBIF     (1 << 5)
-#define DPDK_DEVICE_FLAG_HQOS           (1 << 6)
+#define DPDK_DEVICE_FLAG_ADMIN_UP           (1 << 0)
+#define DPDK_DEVICE_FLAG_PROMISC            (1 << 1)
+#define DPDK_DEVICE_FLAG_PMD                (1 << 2)
+#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3)
+#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG     (1 << 4)
+#define DPDK_DEVICE_FLAG_HAVE_SUBIF         (1 << 5)
+#define DPDK_DEVICE_FLAG_HQOS               (1 << 6)
 
   u16 nb_tx_desc;
     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
index 212f109..0b8a693 100644 (file)
   _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
   _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
 
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* New ol_flags bits added in DPDK-16.11 */
+#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
+#define PKT_RX_L4_CKSUM_GOOD    (1ULL << 8)
+#endif
+
 #define foreach_dpdk_pkt_rx_offload_flag                                \
   _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet")              \
   _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result")                 \
   _ (PKT_RX_FDIR, "RX packet with FDIR infos")                          \
   _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK")              \
   _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK")              \
+  _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped")               \
+  _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid")              \
+  _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid")              \
   _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet")          \
-  _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
+  _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")      \
+  _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* PTYPE added in DPDK-16.11 */
+#define RTE_PTYPE_L2_ETHER_VLAN             0x00000006
+#define RTE_PTYPE_L2_ETHER_QINQ             0x00000007
+#endif
 
 #define foreach_dpdk_pkt_type                                           \
   _ (L2, ETHER, "Ethernet packet")                                      \
   _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync")               \
   _ (L2, ETHER_ARP, "ARP packet")                                       \
   _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet")     \
+  _ (L2, ETHER_NSH, "NSH (Network Service Header) packet")              \
+  _ (L2, ETHER_VLAN, "VLAN packet")                                     \
+  _ (L2, ETHER_QINQ, "QinQ packet")                                     \
   _ (L3, IPV4, "IPv4 packet without extension headers")                 \
   _ (L3, IPV4_EXT, "IPv4 packet with extension headers")                \
   _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \
@@ -642,7 +661,8 @@ format_dpdk_rte_mbuf (u8 * s, va_list * va)
     s = format (s, "\n%U%U", format_white_space, indent,
                format_dpdk_pkt_offload_flags, &mb->ol_flags);
 
-  if (mb->ol_flags & PKT_RX_VLAN_PKT)
+  if ((mb->ol_flags & PKT_RX_VLAN_PKT) &&
+      ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
     {
       ethernet_vlan_header_tv_t *vlan_hdr =
        ((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
index c9e7dc8..0448c15 100755 (executable)
@@ -473,6 +473,7 @@ dpdk_lib_init (dpdk_main_t * dm)
              /* Cisco VIC */
            case VNET_DPDK_PMD_ENIC:
              rte_eth_link_get_nowait (i, &l);
+             xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
              xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC;
              if (l.link_speed == 40000)
                {
@@ -489,6 +490,7 @@ dpdk_lib_init (dpdk_main_t * dm)
              /* Intel Fortville */
            case VNET_DPDK_PMD_I40E:
            case VNET_DPDK_PMD_I40EVF:
+             xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
              xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
              xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE;
              xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE;
@@ -561,6 +563,7 @@ dpdk_lib_init (dpdk_main_t * dm)
              break;
 
            case VNET_DPDK_PMD_BOND:
+             xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
              xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND;
              break;
 
index 4e66877..e541cdb 100644 (file)
@@ -34,14 +34,61 @@ static char *dpdk_error_strings[] = {
 #undef _
 };
 
-#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+always_inline int
+vlib_buffer_is_ip4 (vlib_buffer_t * b)
+{
+  ethernet_header_t *h = (ethernet_header_t *) b->data;
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
+}
+
+always_inline int
+vlib_buffer_is_ip6 (vlib_buffer_t * b)
+{
+  ethernet_header_t *h = (ethernet_header_t *) b->data;
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
+}
+
+always_inline int
+vlib_buffer_is_mpls (vlib_buffer_t * b)
+{
+  ethernet_header_t *h = (ethernet_header_t *) b->data;
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
+}
+
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* New ol_flags bits added in DPDK-16.11 */
+#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
+#endif
+
+always_inline u32
+dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
+{
+  if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+    if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
+      return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
+    else
+      return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+  else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+    return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+  else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+    return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+  else
+    return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+}
+
 always_inline int
 dpdk_mbuf_is_vlan (struct rte_mbuf *mb)
 {
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
   return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) ==
     RTE_PTYPE_L2_ETHER_VLAN;
-}
+#else
+  return
+    (mb->ol_flags &
+     (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) ==
+    PKT_RX_VLAN_PKT;
 #endif
+}
 
 always_inline int
 dpdk_mbuf_is_ip4 (struct rte_mbuf *mb)
@@ -55,35 +102,19 @@ dpdk_mbuf_is_ip6 (struct rte_mbuf *mb)
   return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0;
 }
 
-always_inline int
-vlib_buffer_is_mpls (vlib_buffer_t * b)
-{
-  ethernet_header_t *h = (ethernet_header_t *) b->data;
-  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
-}
-
-always_inline void
-dpdk_rx_next_from_mb (struct rte_mbuf *mb, vlib_buffer_t * b0, u32 * next0)
+always_inline u32
+dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0)
 {
-  u32 n0;
-
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
-  if (PREDICT_FALSE
-      ((mb->ol_flags & (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED)) ==
-       PKT_RX_VLAN_PKT))
-#else
   if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb)))
-#endif
-    n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
   else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
-    n0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
   else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
-    n0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
   else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
-    n0 = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
   else
-    n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-  *next0 = n0;
+    return dpdk_rx_next_from_etype (mb, b0);
 }
 
 always_inline void
@@ -127,10 +158,12 @@ dpdk_rx_trace (dpdk_main_t * dm,
 
       if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
        next0 = xd->per_interface_next_index;
-      else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
-       next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+      else if (PREDICT_TRUE
+              ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
+       next0 = dpdk_rx_next_from_mb (mb, b0);
       else
-       dpdk_rx_next_from_mb (mb, b0, &next0);
+       next0 = dpdk_rx_next_from_etype (mb, b0);
+
       dpdk_rx_error_from_mb (mb, &next0, &error0);
 
       vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
@@ -350,17 +383,20 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
            {
              next0 = next1 = next2 = next3 = xd->per_interface_next_index;
            }
-         else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
+         else if (PREDICT_TRUE
+                  ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
            {
-             next0 = next1 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-             next2 = next3 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+             next0 = dpdk_rx_next_from_mb (mb0, b0);
+             next1 = dpdk_rx_next_from_mb (mb1, b1);
+             next2 = dpdk_rx_next_from_mb (mb2, b2);
+             next3 = dpdk_rx_next_from_mb (mb3, b3);
            }
          else
            {
-             dpdk_rx_next_from_mb (mb0, b0, &next0);
-             dpdk_rx_next_from_mb (mb1, b1, &next1);
-             dpdk_rx_next_from_mb (mb2, b2, &next2);
-             dpdk_rx_next_from_mb (mb3, b3, &next3);
+             next0 = dpdk_rx_next_from_etype (mb0, b0);
+             next1 = dpdk_rx_next_from_etype (mb1, b1);
+             next2 = dpdk_rx_next_from_etype (mb2, b2);
+             next3 = dpdk_rx_next_from_etype (mb3, b3);
            }
 
          if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD))
@@ -475,10 +511,12 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
 
          if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
            next0 = xd->per_interface_next_index;
-         else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
-           next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+         else if (PREDICT_TRUE
+                  ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
+           next0 = dpdk_rx_next_from_mb (mb0, b0);
          else
-           dpdk_rx_next_from_mb (mb0, b0, &next0);
+           next0 = dpdk_rx_next_from_etype (mb0, b0);
+
          dpdk_rx_error_from_mb (mb0, &next0, &error0);
          b0->error = node->errors[error0];