vnet: store hw interface speed in kbps instead of using flags
[vpp.git] / src / plugins / dpdk / device / init.c
index c811c47..fcdc29b 100644 (file)
@@ -46,15 +46,6 @@ dpdk_config_main_t dpdk_config_main;
 
 /* Port configuration, mildly modified Intel app values */
 
-static struct rte_eth_conf port_conf_template = {
-  .rxmode = {
-            .split_hdr_size = 0,
-            },
-  .txmode = {
-            .mq_mode = ETH_MQ_TX_NONE,
-            },
-};
-
 static dpdk_port_type_t
 port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
 {
@@ -83,6 +74,35 @@ port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
   return VNET_DPDK_PORT_TYPE_UNKNOWN;
 }
 
+static dpdk_port_type_t
+port_type_from_link_speed (u32 link_speed)
+{
+  switch (link_speed)
+    {
+    case ETH_SPEED_NUM_1G:
+      return VNET_DPDK_PORT_TYPE_ETH_1G;
+    case ETH_SPEED_NUM_2_5G:
+      return VNET_DPDK_PORT_TYPE_ETH_2_5G;
+    case ETH_SPEED_NUM_5G:
+      return VNET_DPDK_PORT_TYPE_ETH_5G;
+    case ETH_SPEED_NUM_10G:
+      return VNET_DPDK_PORT_TYPE_ETH_10G;
+    case ETH_SPEED_NUM_20G:
+      return VNET_DPDK_PORT_TYPE_ETH_20G;
+    case ETH_SPEED_NUM_25G:
+      return VNET_DPDK_PORT_TYPE_ETH_25G;
+    case ETH_SPEED_NUM_40G:
+      return VNET_DPDK_PORT_TYPE_ETH_40G;
+    case ETH_SPEED_NUM_50G:
+      return VNET_DPDK_PORT_TYPE_ETH_50G;
+    case ETH_SPEED_NUM_56G:
+      return VNET_DPDK_PORT_TYPE_ETH_56G;
+    case ETH_SPEED_NUM_100G:
+      return VNET_DPDK_PORT_TYPE_ETH_100G;
+    default:
+      return VNET_DPDK_PORT_TYPE_UNKNOWN;
+    }
+}
 
 static u32
 dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
@@ -125,7 +145,7 @@ dpdk_device_lock_init (dpdk_device_t * xd)
     {
       xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
                                             CLIB_CACHE_LINE_BYTES);
-      memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
+      clib_memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
     }
 }
 
@@ -187,12 +207,12 @@ static int
 dpdk_port_crc_strip_enabled (dpdk_device_t * xd)
 {
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
-  if (xd->port_conf.rxmode.hw_strip_crc)
+  return ! !(xd->port_conf.rxmode.hw_strip_crc);
+#elif RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
+  return ! !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP);
 #else
-  if (xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP)
+  return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
 #endif
-    return 1;
-  return 0;
 }
 
 static clib_error_t *
@@ -283,11 +303,12 @@ dpdk_lib_init (dpdk_main_t * dm)
 
       if (dev_info.device == 0)
        {
-         clib_warning ("DPDK bug: missing device info. Skipping  %s device",
+         clib_warning ("DPDK bug: missing device info. Skipping %s device",
                        dev_info.driver_name);
          continue;
        }
-      pci_dev = RTE_DEV_TO_PCI (dev_info.device);
+
+      pci_dev = dpdk_get_pci_device (&dev_info);
 
       if (pci_dev)     /* bonded interface has no pci info */
        {
@@ -295,9 +316,8 @@ dpdk_lib_init (dpdk_main_t * dm)
          pci_addr.bus = pci_dev->addr.bus;
          pci_addr.slot = pci_dev->addr.devid;
          pci_addr.function = pci_dev->addr.function;
-         p =
-           hash_get (dm->conf->device_config_index_by_pci_addr,
-                     pci_addr.as_u32);
+         p = hash_get (dm->conf->device_config_index_by_pci_addr,
+                       pci_addr.as_u32);
        }
 
       if (p)
@@ -347,8 +367,8 @@ dpdk_lib_init (dpdk_main_t * dm)
        {
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
          xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
-         port_conf_template.rxmode.jumbo_frame = 0;
-         port_conf_template.rxmode.enable_scatter = 0;
+         xd->port_conf.rxmode.jumbo_frame = 0;
+         xd->port_conf.rxmode.enable_scatter = 0;
 #else
          xd->port_conf.txmode.offloads &= ~DEV_TX_OFFLOAD_MULTI_SEGS;
          xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
@@ -359,8 +379,8 @@ dpdk_lib_init (dpdk_main_t * dm)
        {
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
          xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
-         port_conf_template.rxmode.jumbo_frame = 1;
-         port_conf_template.rxmode.enable_scatter = 1;
+         xd->port_conf.rxmode.jumbo_frame = 1;
+         xd->port_conf.rxmode.enable_scatter = 1;
 #else
          xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
          xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
@@ -369,9 +389,6 @@ dpdk_lib_init (dpdk_main_t * dm)
          xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
        }
 
-      clib_memcpy (&xd->port_conf, &port_conf_template,
-                  sizeof (struct rte_eth_conf));
-
       xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
 
       if (devconf->num_tx_queues > 0
@@ -471,7 +488,7 @@ dpdk_lib_init (dpdk_main_t * dm)
              xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
              xd->port_conf.rxmode.hw_strip_crc = 1;
-#else
+#elif RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
              xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
 #endif
              break;
@@ -480,7 +497,7 @@ dpdk_lib_init (dpdk_main_t * dm)
              xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
              xd->port_conf.rxmode.hw_strip_crc = 1;
-#else
+#elif RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
              xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
 #endif
              break;
@@ -490,7 +507,7 @@ dpdk_lib_init (dpdk_main_t * dm)
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
              xd->port_conf.rxmode.enable_scatter = 0;
 #else
-             xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
+             xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
 #endif
              break;
 
@@ -511,7 +528,7 @@ dpdk_lib_init (dpdk_main_t * dm)
              xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
 #if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0)
              xd->port_conf.rxmode.hw_strip_crc = 1;
-#else
+#elif RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
              xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
 #endif
              break;
@@ -560,6 +577,10 @@ dpdk_lib_init (dpdk_main_t * dm)
              xd->port_conf.intr_conf.lsc = 1;
              break;
 
+           case VNET_DPDK_PMD_NETVSC:
+             xd->port_type = port_type_from_link_speed (l.link_speed);
+             break;
+
            default:
              xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
            }
@@ -767,7 +788,7 @@ dpdk_lib_init (dpdk_main_t * dm)
        * For cisco VIC vNIC, set default to VLAN strip enabled, unless
        * specified otherwise in the startup config.
        * For other NICs default to VLAN strip disabled, unless specified
-       * otherwis in the startup config.
+       * otherwise in the startup config.
        */
       if (xd->pmd == VNET_DPDK_PMD_ENIC)
        {
@@ -821,6 +842,7 @@ dpdk_lib_init (dpdk_main_t * dm)
 static void
 dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
 {
+  vlib_main_t *vm = vlib_get_main ();
   clib_error_t *error;
   u8 *pci_addr = 0;
   int num_whitelisted = vec_len (conf->dev_confs);
@@ -839,7 +861,7 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
       vlib_pci_free_device_info (d);
       d = 0;
       }
-    d = vlib_pci_get_device_info (addr, &error);
+    d = vlib_pci_get_device_info (vm, addr, &error);
     if (error)
     {
       clib_error_report (error);
@@ -866,7 +888,20 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
       ;
     /* vmxnet3 */
     else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
-      ;
+      {
+       /*
+        * For vmxnet3 PCI, unless it is explicitly specified in the whitelist,
+        * the default is to put it in the blacklist.
+        */
+       if (devconf == 0)
+         {
+           pool_get (conf->dev_confs, devconf);
+           hash_set (conf->device_config_index_by_pci_addr, addr->as_u32,
+                     devconf - conf->dev_confs);
+           devconf->pci_addr.as_u32 = addr->as_u32;
+           devconf->is_blacklisted = 1;
+         }
+      }
     /* all Intel network devices */
     else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
       ;
@@ -907,7 +942,7 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
         continue;
       }
 
-    error = vlib_pci_bind_to_uio (addr, (char *) conf->uio_driver_name);
+    error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
 
     if (error)
       {
@@ -1020,7 +1055,7 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
           devconf->num_rx_queues)
     error =
       clib_error_return (0,
-                        "%U: number of worker threadds must be "
+                        "%U: number of worker threads must be "
                         "equal to number of rx queues", format_vlib_pci_addr,
                         &pci_addr);
 
@@ -1262,15 +1297,18 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
          /* *INDENT-ON* */
        }
 
+      uword default_hugepage_sz = clib_mem_get_default_hugepage_size ();
       /* *INDENT-OFF* */
       clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
         {
          clib_error_t *e;
-
+         uword n_pages;
          vec_validate(mem_by_socket, c);
+         n_pages = round_pow2 ((uword) mem_by_socket[c]<<20,
+                               default_hugepage_sz);
+         n_pages /= default_hugepage_sz;
 
-         e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2);
-         if (e)
+         if ((e = clib_sysfs_prealloc_hugepages(c, 0, n_pages)))
            clib_error_report (e);
       }));
       /* *INDENT-ON* */
@@ -1496,7 +1534,7 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
     return;
 
   xd->time_last_link_update = now ? now : xd->time_last_link_update;
-  memset (&xd->link, 0, sizeof (xd->link));
+  clib_memset (&xd->link, 0, sizeof (xd->link));
   rte_eth_link_get_nowait (xd->port_id, &xd->link);
 
   if (LINK_STATE_ELOGS)
@@ -1546,54 +1584,10 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
          break;
        }
     }
-  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
-    {
-      hw_flags_chg = 1;
-      switch (xd->link.link_speed)
-       {
-       case ETH_SPEED_NUM_10M:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M;
-         break;
-       case ETH_SPEED_NUM_100M:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M;
-         break;
-       case ETH_SPEED_NUM_1G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
-         break;
-       case ETH_SPEED_NUM_2_5G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_2_5G;
-         break;
-       case ETH_SPEED_NUM_5G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_5G;
-         break;
-       case ETH_SPEED_NUM_10G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G;
-         break;
-       case ETH_SPEED_NUM_20G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_20G;
-         break;
-       case ETH_SPEED_NUM_25G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_25G;
-         break;
-       case ETH_SPEED_NUM_40G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G;
-         break;
-       case ETH_SPEED_NUM_50G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_50G;
-         break;
-       case ETH_SPEED_NUM_56G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_56G;
-         break;
-       case ETH_SPEED_NUM_100G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100G;
-         break;
-       case 0:
-         break;
-       default:
-         dpdk_log_warn ("unknown link speed %d", xd->link.link_speed);
-         break;
-       }
-    }
+  if (xd->link.link_speed != prev_link.link_speed)
+    vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
+                                     xd->link.link_speed * 1000);
+
   if (hw_flags_chg)
     {
       if (LINK_STATE_ELOGS)