dpdk: tx code rework
[vpp.git] / src / plugins / dpdk / device / init.c
index 1c630be..83d26ce 100755 (executable)
@@ -19,6 +19,7 @@
 #include <vppinfra/bitmap.h>
 #include <vppinfra/linux/sysfs.h>
 #include <vlib/unix/unix.h>
+#include <vlib/log.h>
 
 #include <vnet/ethernet/ethernet.h>
 #include <dpdk/device/dpdk.h>
@@ -62,14 +63,22 @@ port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
 
   if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
     return VNET_DPDK_PORT_TYPE_ETH_100G;
+  else if (dev_info->speed_capa & ETH_LINK_SPEED_56G)
+    return VNET_DPDK_PORT_TYPE_ETH_56G;
   else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
     return VNET_DPDK_PORT_TYPE_ETH_50G;
   else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
     return VNET_DPDK_PORT_TYPE_ETH_40G;
   else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
     return VNET_DPDK_PORT_TYPE_ETH_25G;
+  else if (dev_info->speed_capa & ETH_LINK_SPEED_20G)
+    return VNET_DPDK_PORT_TYPE_ETH_20G;
   else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
     return VNET_DPDK_PORT_TYPE_ETH_10G;
+  else if (dev_info->speed_capa & ETH_LINK_SPEED_5G)
+    return VNET_DPDK_PORT_TYPE_ETH_5G;
+  else if (dev_info->speed_capa & ETH_LINK_SPEED_2_5G)
+    return VNET_DPDK_PORT_TYPE_ETH_2_5G;
   else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
     return VNET_DPDK_PORT_TYPE_ETH_1G;
 
@@ -180,6 +189,7 @@ static clib_error_t *
 dpdk_lib_init (dpdk_main_t * dm)
 {
   u32 nports;
+  u32 mtu, max_rx_frame;
   u32 nb_desc = 0;
   int i;
   clib_error_t *error;
@@ -218,44 +228,34 @@ dpdk_lib_init (dpdk_main_t * dm)
   nports = rte_eth_dev_count ();
   if (nports < 1)
     {
-      clib_warning ("DPDK drivers found no ports...");
+      dpdk_log_notice ("DPDK drivers found no ports...");
     }
 
   if (CLIB_DEBUG > 0)
-    clib_warning ("DPDK drivers found %d ports...", nports);
-
-  /*
-   * All buffers are all allocated from the same rte_mempool.
-   * Thus they all have the same number of data bytes.
-   */
-  dm->vlib_buffer_free_list_index =
-    vlib_buffer_get_or_create_free_list (vm,
-                                        VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
-                                        "dpdk rx");
+    dpdk_log_notice ("DPDK drivers found %d ports...", nports);
 
   if (dm->conf->enable_tcp_udp_checksum)
     dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
                                   | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
 
   /* vlib_buffer_t template */
-  vec_validate_aligned (dm->buffer_templates, tm->n_vlib_mains - 1,
+  vec_validate_aligned (dm->per_thread_data, tm->n_vlib_mains - 1,
                        CLIB_CACHE_LINE_BYTES);
   for (i = 0; i < tm->n_vlib_mains; i++)
     {
       vlib_buffer_free_list_t *fl;
-      vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, i);
+      dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i);
       fl = vlib_buffer_get_free_list (vm,
                                      VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
-      vlib_buffer_init_for_free_list (bt, fl);
-      bt->flags = dm->buffer_flags_template;
-      vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+      vlib_buffer_init_for_free_list (&ptd->buffer_template, fl);
+      ptd->buffer_template.flags = dm->buffer_flags_template;
+      vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0;
     }
 
   for (i = 0; i < nports; i++)
     {
       u8 addr[6];
       u8 vlan_strip = 0;
-      int j;
       struct rte_eth_dev_info dev_info;
       struct rte_eth_link l;
       dpdk_device_config_t *devconf = 0;
@@ -416,6 +416,7 @@ dpdk_lib_init (dpdk_main_t * dm)
 
            case VNET_DPDK_PMD_ENA:
              xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
+             xd->port_conf.rxmode.enable_scatter = 0;
              break;
 
            case VNET_DPDK_PMD_DPAA2:
@@ -464,6 +465,10 @@ dpdk_lib_init (dpdk_main_t * dm)
              xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
              break;
 
+           case VNET_DPDK_PMD_VHOST_ETHER:
+             xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
+             break;
+
            default:
              xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
            }
@@ -475,39 +480,6 @@ dpdk_lib_init (dpdk_main_t * dm)
            xd->nb_tx_desc = devconf->num_tx_desc;
        }
 
-      /*
-       * Ensure default mtu is not > the mtu read from the hardware.
-       * Otherwise rte_eth_dev_configure() will fail and the port will
-       * not be available.
-       */
-      if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
-       {
-         /*
-          * This device does not support the platforms's max frame
-          * size. Use it's advertised mru instead.
-          */
-         xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
-       }
-      else
-       {
-         xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
-
-         /*
-          * Some platforms do not account for Ethernet FCS (4 bytes) in
-          * MTU calculations. To interop with them increase mru but only
-          * if the device's settings can support it.
-          */
-         if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
-             xd->port_conf.rxmode.hw_strip_crc)
-           {
-             /*
-              * Allow additional 4 bytes (for Ethernet FCS). These bytes are
-              * stripped by h/w and so will not consume any buffer memory.
-              */
-             xd->port_conf.rxmode.max_rx_pkt_len += 4;
-           }
-       }
-
       if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
        {
          f64 now = vlib_time_now (vm);
@@ -564,28 +536,6 @@ dpdk_lib_init (dpdk_main_t * dm)
          dq->queue_id = 0;
        }
 
-      vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains,
-                           CLIB_CACHE_LINE_BYTES);
-      for (j = 0; j < tm->n_vlib_mains; j++)
-       {
-         vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc,
-                          sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES);
-         vec_reset_length (xd->tx_vectors[j]);
-       }
-
-      vec_validate_aligned (xd->rx_vectors, xd->rx_q_used,
-                           CLIB_CACHE_LINE_BYTES);
-      for (j = 0; j < xd->rx_q_used; j++)
-       {
-         vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1,
-                               CLIB_CACHE_LINE_BYTES);
-         vec_reset_length (xd->rx_vectors[j]);
-       }
-
-      vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains,
-                           CLIB_CACHE_LINE_BYTES);
-
-
       /* count the number of descriptors used for this device */
       nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
 
@@ -596,8 +546,63 @@ dpdk_lib_init (dpdk_main_t * dm)
       if (error)
        return error;
 
+      /*
+       * Ensure default mtu is not > the mtu read from the hardware.
+       * Otherwise rte_eth_dev_configure() will fail and the port will
+       * not be available.
+       * Calculate max_frame_size and mtu supported by NIC
+       */
+      if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
+       {
+         /*
+          * This device does not support the platforms's max frame
+          * size. Use it's advertised mru instead.
+          */
+         max_rx_frame = dev_info.max_rx_pktlen;
+         mtu = dev_info.max_rx_pktlen - sizeof (ethernet_header_t);
+       }
+      else
+       {
+         /* VPP treats MTU and max_rx_pktlen both equal to
+          * ETHERNET_MAX_PACKET_BYTES, if dev_info.max_rx_pktlen >=
+          * ETHERNET_MAX_PACKET_BYTES + sizeof(ethernet_header_t)
+          */
+         if (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
+                                        sizeof (ethernet_header_t)))
+           {
+             mtu = ETHERNET_MAX_PACKET_BYTES;
+             max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
+
+             /*
+              * Some platforms do not account for Ethernet FCS (4 bytes) in
+              * MTU calculations. To interop with them increase mru but only
+              * if the device's settings can support it.
+              */
+             if (xd->port_conf.rxmode.hw_strip_crc &&
+                 (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
+                                             sizeof (ethernet_header_t) +
+                                             4)))
+               {
+                 max_rx_frame += 4;
+               }
+           }
+         else
+           {
+             max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
+             mtu = ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
+
+             if (xd->port_conf.rxmode.hw_strip_crc &&
+                 (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)))
+               {
+                 max_rx_frame += 4;
+               }
+           }
+       }
+      /*Set port rxmode config */
+      xd->port_conf.rxmode.max_rx_pkt_len = max_rx_frame;
+
       sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index);
-      xd->vlib_sw_if_index = sw->sw_if_index;
+      xd->sw_if_index = sw->sw_if_index;
       vnet_hw_interface_set_input_node (dm->vnet_main, xd->hw_if_index,
                                        dpdk_input_node.index);
 
@@ -619,8 +624,17 @@ dpdk_lib_init (dpdk_main_t * dm)
                                                ~1);
          }
 
+      /*Get vnet hardware interface */
       hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
 
+      /*Override default max_packet_bytes and max_supported_bytes set in
+       * ethernet_register_interface() above*/
+      if (hi)
+       {
+         hi->max_packet_bytes = max_rx_frame;
+         hi->max_supported_packet_bytes = max_rx_frame;
+       }
+
       if (dm->conf->no_tx_checksum_offload == 0)
        if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD)
          hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD;
@@ -628,7 +642,7 @@ dpdk_lib_init (dpdk_main_t * dm)
       dpdk_device_setup (xd);
 
       if (vec_len (xd->errors))
-       clib_warning ("setup failed for device %U. Errors:\n  %U",
+       dpdk_log_err ("setup failed for device %U. Errors:\n  %U",
                      format_dpdk_device_name, i,
                      format_dpdk_device_errors, xd);
 
@@ -651,7 +665,7 @@ dpdk_lib_init (dpdk_main_t * dm)
          if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
            vlan_strip = 1;     /* remove vlan tag from VIC port by default */
          else
-           clib_warning ("VLAN strip disabled for interface\n");
+           dpdk_log_warn ("VLAN strip disabled for interface\n");
        }
       else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
        vlan_strip = 1;
@@ -663,19 +677,22 @@ dpdk_lib_init (dpdk_main_t * dm)
          vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
          xd->port_conf.rxmode.hw_vlan_strip = vlan_off;
          if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0)
-           clib_warning ("VLAN strip enabled for interface\n");
+           dpdk_log_info ("VLAN strip enabled for interface\n");
          else
-           clib_warning ("VLAN strip cannot be supported by interface\n");
+           dpdk_log_warn ("VLAN strip cannot be supported by interface\n");
        }
 
-      hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
-       xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
+      if (hi)
+       hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
+         xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
+      else
+       clib_warning ("hi NULL");
 
-      rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
+      rte_eth_dev_set_mtu (xd->device_index, mtu);
     }
 
   if (nb_desc > dm->conf->num_mbufs)
-    clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
+    dpdk_log_err ("%d mbufs allocated but total rx/tx ring size is %d\n",
                  dm->conf->num_mbufs, nb_desc);
 
   return 0;
@@ -753,7 +770,7 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
       }
     else
       {
-        clib_warning ("Unsupported PCI device 0x%04x:0x%04x found "
+        dpdk_log_warn ("Unsupported PCI device 0x%04x:0x%04x found "
                      "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
                      pci_addr);
         continue;
@@ -879,6 +896,37 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
   return error;
 }
 
+static clib_error_t *
+dpdk_log_read_ready (clib_file_t * uf)
+{
+  unformat_input_t input;
+  u8 *line, *s = 0;
+  int n, n_try;
+
+  n = n_try = 4096;
+  while (n == n_try)
+    {
+      uword len = vec_len (s);
+      vec_resize (s, len + n_try);
+
+      n = read (uf->file_descriptor, s + len, n_try);
+      if (n < 0 && errno != EAGAIN)
+       return clib_error_return_unix (0, "read");
+      _vec_len (s) = len + (n < 0 ? 0 : n);
+    }
+
+  unformat_init_vector (&input, s);
+
+  while (unformat_user (&input, unformat_line, &line))
+    {
+      dpdk_log_notice ("%v", line);
+      vec_free (line);
+    }
+
+  unformat_free (&input);
+  return 0;
+}
+
 static clib_error_t *
 dpdk_config (vlib_main_t * vm, unformat_input_t * input)
 {
@@ -1229,6 +1277,19 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
   /* Set up DPDK eal and packet mbuf pool early. */
 
   rte_log_set_global_level (log_level);
+  int log_fds[2] = { 0 };
+  if (pipe (log_fds) == 0)
+    {
+      FILE *f = fdopen (log_fds[1], "a");
+      if (f && rte_openlog_stream (f) == 0)
+       {
+         clib_file_t t = { 0 };
+         t.read_function = dpdk_log_read_ready;
+         t.file_descriptor = log_fds[0];
+         t.description = format (0, "DPDK logging pipe");
+         clib_file_add (&file_main, &t);
+       }
+    }
 
   vm = vlib_get_main ();
 
@@ -1237,10 +1298,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
     conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
                                      conf->eal_init_args[i]);
 
-  clib_warning ("EAL init args: %s", conf->eal_init_args_str);
-  ret =
-    rte_eal_init (vec_len (conf->eal_init_args),
-                 (char **) conf->eal_init_args);
+  dpdk_log_warn ("EAL init args: %s", conf->eal_init_args_str);
+  ret = rte_eal_init (vec_len (conf->eal_init_args),
+                     (char **) conf->eal_init_args);
 
   /* lazy umount hugepages */
   umount2 ((char *) huge_dir_path, MNT_DETACH);
@@ -1250,10 +1310,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
   if (ret < 0)
     return clib_error_return (0, "rte_eal_init returned %d", ret);
 
-  /* Dump the physical memory layout prior to creating the mbuf_pool */
-  fprintf (stdout, "DPDK physical memory layout:\n");
-  rte_dump_physmem_layout (stdout);
-
   /* set custom ring memory allocator */
   {
     struct rte_mempool_ops *ops = NULL;
@@ -1323,7 +1379,7 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
        u8 new_link_state;
       } *ed;
       ed = ELOG_DATA (&vm->elog_main, e);
-      ed->sw_if_index = xd->vlib_sw_if_index;
+      ed->sw_if_index = xd->sw_if_index;
       ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
       ed->old_link_state = (u8)
        vnet_hw_interface_is_link_up (vnm, xd->hw_if_index);
@@ -1367,16 +1423,37 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
        case ETH_SPEED_NUM_1G:
          hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
          break;
+       case ETH_SPEED_NUM_2_5G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_2_5G;
+         break;
+       case ETH_SPEED_NUM_5G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_5G;
+         break;
        case ETH_SPEED_NUM_10G:
          hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G;
          break;
+       case ETH_SPEED_NUM_20G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_20G;
+         break;
+       case ETH_SPEED_NUM_25G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_25G;
+         break;
        case ETH_SPEED_NUM_40G:
          hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G;
          break;
+       case ETH_SPEED_NUM_50G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_50G;
+         break;
+       case ETH_SPEED_NUM_56G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_56G;
+         break;
+       case ETH_SPEED_NUM_100G:
+         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100G;
+         break;
        case 0:
          break;
        default:
-         clib_warning ("unknown link speed %d", xd->link.link_speed);
+         dpdk_log_warn ("unknown link speed %d", xd->link.link_speed);
          break;
        }
     }
@@ -1398,7 +1475,7 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
            u32 flags;
          } *ed;
          ed = ELOG_DATA (&vm->elog_main, e);
-         ed->sw_if_index = xd->vlib_sw_if_index;
+         ed->sw_if_index = xd->sw_if_index;
          ed->flags = hw_flags;
        }
       vnet_hw_interface_set_flags (vnm, xd->hw_if_index, hw_flags);
@@ -1461,12 +1538,12 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
                      (slink[0], (struct ether_addr *) addr);
 
                    /* Set MAC of bounded interface to that of 1st slave link */
-                   clib_warning ("Set MAC for bond port %d BondEthernet%d",
-                                 i, xd->port_id);
+                   dpdk_log_info ("Set MAC for bond port %d BondEthernet%d",
+                                  i, xd->port_id);
                    rv = rte_eth_bond_mac_address_set
                      (i, (struct ether_addr *) addr);
                    if (rv)
-                     clib_warning ("Set MAC addr failure rv=%d", rv);
+                     dpdk_log_warn ("Set MAC addr failure rv=%d", rv);
 
                    /* Populate MAC of bonded interface in VPP hw tables */
                    bhi = vnet_get_hw_interface
@@ -1491,11 +1568,13 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
                        /* Add MAC to all slave links except the first one */
                        if (nlink)
                          {
-                           clib_warning ("Add MAC for slave port %d", slave);
+                           dpdk_log_info ("Add MAC for slave port %d",
+                                          slave);
                            rv = rte_eth_dev_mac_addr_add
                              (slave, (struct ether_addr *) addr, 0);
                            if (rv)
-                             clib_warning ("Add MAC addr failure rv=%d", rv);
+                             dpdk_log_warn ("Add MAC addr failure rv=%d",
+                                            rv);
                          }
                        /* Setup slave link state change callback handling */
                        rte_eth_dev_callback_register
@@ -1509,8 +1588,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
                          (bhi->bond_info, sdev->hw_if_index, 1);
                        /* Set MACs and slave link flags on slave interface */
                        shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
-                       ssi = vnet_get_sw_interface
-                         (vnm, sdev->vlib_sw_if_index);
+                       ssi = vnet_get_sw_interface (vnm, sdev->sw_if_index);
                        sei = pool_elt_at_index
                          (em->interfaces, shi->hw_instance);
                        shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE;
@@ -1609,6 +1687,8 @@ dpdk_init (vlib_main_t * vm)
   if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
     return error;
 
+  dm->log_default = vlib_log_register_class ("dpdk", 0);
+
   return error;
 }