Allow DPDK per interface startup config to enable/disable VLAN stripping
[vpp.git] / vnet / vnet / devices / dpdk / init.c
index 63af5b3..7514ff8 100644 (file)
@@ -152,8 +152,7 @@ static u32 dpdk_flag_change (vnet_main_t * vnm,
        *            driver to dynamically change the mtu.  If/when the 
        *            VIC firmware gets fixed, then this should be removed.
        */
-      if (xd->pmd == VNET_DPDK_PMD_VICE ||
-          xd->pmd == VNET_DPDK_PMD_ENIC)
+      if (xd->pmd == VNET_DPDK_PMD_ENIC)
        {
          struct rte_eth_dev_info dev_info;
 
@@ -196,10 +195,6 @@ static u32 dpdk_flag_change (vnet_main_t * vnm,
   return old;
 }
 
-#ifdef NETMAP
-extern int rte_netmap_probe(void);
-#endif
-
 void
 dpdk_device_lock_init(dpdk_device_t * xd)
 {
@@ -252,20 +247,9 @@ dpdk_lib_init (dpdk_main_t * dm)
   rt->function = dpdk_input_multiarch_select();
 
   /* find out which cpus will be used for input */
-  p = hash_get_mem (tm->thread_registrations_by_name, "io");
+  p = hash_get_mem (tm->thread_registrations_by_name, "workers");
   tr = p ? (vlib_thread_registration_t *) p[0] : 0;
 
-  if (!tr || tr->count == 0)
-    {
-      /* no io threads, workers doing input */
-      p = hash_get_mem (tm->thread_registrations_by_name, "workers");
-      tr = p ? (vlib_thread_registration_t *) p[0] : 0;
-    }
-  else
-    {
-      dm->have_io_threads = 1;
-    }
-
   if (tr && tr->count > 0)
     {
       dm->input_cpu_first_index = tr->first_index;
@@ -278,11 +262,6 @@ dpdk_lib_init (dpdk_main_t * dm)
   vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1,
                         CLIB_CACHE_LINE_BYTES);
 
-#ifdef NETMAP
-  if(rte_netmap_probe() < 0)
-    return clib_error_return (0, "rte netmap probe failed");
-#endif
-
   nports = rte_eth_dev_count();
   if (nports < 1) 
     {
@@ -307,20 +286,24 @@ dpdk_lib_init (dpdk_main_t * dm)
   for (i = 0; i < nports; i++)
     {
       u8 addr[6];
+      u8 vlan_strip = 0;
       int j;
       struct rte_eth_dev_info dev_info;
       clib_error_t * rv;
       struct rte_eth_link l;
       dpdk_device_config_t * devconf = 0;
       vlib_pci_addr_t pci_addr;
-      uword * p;
+      uword * p = 0;
 
       rte_eth_dev_info_get(i, &dev_info);
-      pci_addr.domain = dev_info.pci_dev->addr.domain;
-      pci_addr.bus = dev_info.pci_dev->addr.bus;
-      pci_addr.slot = dev_info.pci_dev->addr.devid;
-      pci_addr.function = dev_info.pci_dev->addr.function;
-      p = hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+      if (dev_info.pci_dev) /* bonded interface has no pci info */
+        {
+         pci_addr.domain = dev_info.pci_dev->addr.domain;
+         pci_addr.bus = dev_info.pci_dev->addr.bus;
+         pci_addr.slot = dev_info.pci_dev->addr.devid;
+         pci_addr.function = dev_info.pci_dev->addr.function;
+         p = hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+        }
 
       if (p)
        devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
@@ -363,7 +346,10 @@ dpdk_lib_init (dpdk_main_t * dm)
         {
           xd->rx_q_used = devconf->num_rx_queues;
           xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
-          xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
+          if (devconf->rss_fn == 0)
+            xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
+          else
+            xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
         }
       else
         xd->rx_q_used = 1;
@@ -371,7 +357,7 @@ dpdk_lib_init (dpdk_main_t * dm)
       xd->dev_type = VNET_DPDK_DEV_ETH;
 
       /* workaround for drivers not setting driver_name */
-      if (!dev_info.driver_name)
+      if ((!dev_info.driver_name) && (dev_info.pci_dev))
         dev_info.driver_name = dev_info.pci_dev->driver->name;
       ASSERT(dev_info.driver_name);
 
@@ -404,9 +390,11 @@ dpdk_lib_init (dpdk_main_t * dm)
             xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE;
             xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE;
             break;
+         case VNET_DPDK_PMD_DPAA2:
+           xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
+           break;
 
           /* Cisco VIC */
-          case VNET_DPDK_PMD_VICE:
           case VNET_DPDK_PMD_ENIC:
             rte_eth_link_get_nowait(i, &l);
            xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC;
@@ -496,14 +484,6 @@ dpdk_lib_init (dpdk_main_t * dm)
             xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
         }
 
-  #ifdef NETMAP
-       if(strncmp(dev_info.driver_name, "vale", 4) == 0
-            || strncmp(dev_info.driver_name, "netmap", 6) == 0)
-          {
-            xd->pmd = VNET_DPDK_PMD_NETMAP;
-            xd->port_type = VNET_DPDK_PORT_TYPE_NETMAP;
-          }
-  #endif
        if (devconf->num_rx_desc)
          xd->nb_rx_desc = devconf->num_rx_desc;
 
@@ -579,29 +559,44 @@ dpdk_lib_init (dpdk_main_t * dm)
       dpdk_device_and_queue_t * dq;
       int q;
 
-      for (q = 0; q < xd->rx_q_used; q++)
-        {
-          int cpu = dm->input_cpu_first_index + next_cpu;
-          unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
-
-          /*
-           * numa node for worker thread handling this queue
-           * needed for taking buffers from the right mempool
-           */
-          vec_validate(xd->cpu_socket_id_by_queue, q);
-          xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
-
-          /*
-           * construct vector of (device,queue) pairs for each worker thread
-           */
-          vec_add2(dm->devices_by_cpu[cpu], dq, 1);
-          dq->device = xd->device_index;
-          dq->queue_id = q;
-
-          next_cpu++;
-          if (next_cpu == dm->input_cpu_count)
-            next_cpu = 0;
-        }
+      if (devconf->workers)
+       {
+         int i;
+         q = 0;
+         clib_bitmap_foreach (i, devconf->workers, ({
+           int cpu = dm->input_cpu_first_index + i;
+           unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+           vec_validate(xd->cpu_socket_id_by_queue, q);
+           xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
+           vec_add2(dm->devices_by_cpu[cpu], dq, 1);
+           dq->device = xd->device_index;
+           dq->queue_id = q++;
+         }));
+       }
+      else
+       for (q = 0; q < xd->rx_q_used; q++)
+         {
+           int cpu = dm->input_cpu_first_index + next_cpu;
+           unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+
+           /*
+            * numa node for worker thread handling this queue
+            * needed for taking buffers from the right mempool
+            */
+           vec_validate(xd->cpu_socket_id_by_queue, q);
+           xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
+
+           /*
+            * construct vector of (device,queue) pairs for each worker thread
+            */
+           vec_add2(dm->devices_by_cpu[cpu], dq, 1);
+           dq->device = xd->device_index;
+           dq->queue_id = q;
+
+           next_cpu++;
+           if (next_cpu == dm->input_cpu_count)
+             next_cpu = 0;
+         }
 
       vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains,
                             CLIB_CACHE_LINE_BYTES);
@@ -651,22 +646,30 @@ dpdk_lib_init (dpdk_main_t * dm)
        *            driver to dynamically change the mtu.  If/when the 
        *            VIC firmware gets fixed, then this should be removed.
        */
-      if (xd->pmd == VNET_DPDK_PMD_VICE ||
-          xd->pmd == VNET_DPDK_PMD_ENIC)
+      if (xd->pmd == VNET_DPDK_PMD_ENIC)
        {
          /*
           * Initialize mtu to what has been set by CIMC in the firmware cfg.
           */
          hi->max_packet_bytes = dev_info.max_rx_pktlen;
-          /*
-           * remove vlan tag from VIC port to fix VLAN0 issue.
-           * TODO Handle VLAN tagged traffic
-           */
-          int vlan_off;
-          vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index);
-          vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
-          rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off);
+         if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
+           vlan_strip = 1; /* remove vlan tag from VIC port by default */
+         else
+           clib_warning("VLAN strip disabled for interface\n");
        }
+      else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
+       vlan_strip = 1;
+
+      if (vlan_strip)
+        { 
+         int vlan_off;
+         vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index);
+         vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
+         if (rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off) == 0)
+           clib_warning("VLAN strip enabled for interface\n");
+         else
+           clib_warning("VLAN strip cannot be supported by interface\n");
+        }
 
 #if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) 
       /*
@@ -847,6 +850,7 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma
   clib_error_t * error = 0;
   uword * p;
   dpdk_device_config_t * devconf;
+  unformat_input_t sub_input;
 
   if (is_default)
     {
@@ -881,6 +885,19 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma
        ;
       else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
        ;
+      else if (unformat (input, "workers %U", unformat_bitmap_list,
+                        &devconf->workers))
+       ;
+      else if (unformat (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
+        {
+          error = unformat_rss_fn(&sub_input, &devconf->rss_fn);
+          if (error)
+            break;
+        }
+      else if (unformat (input, "vlan-strip-offload off"))
+         devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
+      else if (unformat (input, "vlan-strip-offload on"))
+         devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
       else
        {
          error = clib_error_return (0, "unknown input `%U'",
@@ -888,6 +905,18 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma
          break;
        }
     }
+
+  if (error)
+    return error;
+
+  if (devconf->workers && devconf->num_rx_queues == 0)
+    devconf->num_rx_queues = clib_bitmap_count_set_bits(devconf->workers);
+  else if (devconf->workers &&
+          clib_bitmap_count_set_bits(devconf->workers) != devconf->num_rx_queues)
+    error = clib_error_return (0, "%U: number of worker threadds must be "
+                              "equal to number of rx queues",
+                              format_vlib_pci_addr, &pci_addr);
+
   return error;
 }
 
@@ -906,10 +935,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
   u32 log_level;
   int ret, i;
   int num_whitelisted = 0;
-#ifdef NETMAP
-  int rxrings, txrings, rxslots, txslots, txburst;
-  char * nmnam;
-#endif
   u8 no_pci = 0;
   u8 no_huge = 0;
   u8 huge_dir = 0;
@@ -966,28 +991,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
 
          num_whitelisted++;
        }
-
-#ifdef NETMAP
-     else if (unformat(input, "netmap %s/%d:%d/%d:%d/%d",
-                  &nmname, &rxrings, &rxslots, &txrings, &txslots, &txburst)) {
-        char * rv;
-        rv = (char *)
-          eth_nm_args(nmname, rxrings, rxslots, txrings, txslots, txburst);
-        if (rv) {
-          error = clib_error_return (0, "%s", rv);
-          goto done;
-        }
-      }else if (unformat(input, "netmap %s", &nmname)) {
-        char * rv;
-        rv = (char *)
-          eth_nm_args(nmname, 0, 0, 0, 0, 0);
-        if (rv) {
-          error = clib_error_return (0, "%s", rv);
-          goto done;
-        }
-      }
-#endif
-
       else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
         ;
       else if (unformat (input, "kni %d", &conf->num_kni))
@@ -1112,7 +1115,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
          clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
            {
              vec_validate(mem_by_socket, c);
-             mem_by_socket[c] = 512; /* default per-socket mem */
+             mem_by_socket[c] = 256; /* default per-socket mem */
            }
          ));
        }
@@ -1518,10 +1521,7 @@ dpdk_process (vlib_main_t * vm,
         if (tm->n_vlib_mains == 1)
           vlib_node_set_state (vm, dpdk_input_node.index,
                                VLIB_NODE_STATE_POLLING);
-        else if (tm->main_thread_is_io_node)
-          vlib_node_set_state (vm, dpdk_io_input_node.index,
-                               VLIB_NODE_STATE_POLLING);
-        else if (!dm->have_io_threads)
+        else
           for (i=0; i < tm->n_vlib_mains; i++)
             if (vec_len(dm->devices_by_cpu[i]) > 0)
               vlib_node_set_state (vlib_mains[i], dpdk_input_node.index,
@@ -1533,7 +1533,7 @@ dpdk_process (vlib_main_t * vm,
 
   dpdk_vhost_user_process_init(&vu_state);
 
-  dm->io_thread_release = 1;
+  tm->worker_thread_release = 1;
 
   f64 now = vlib_time_now (vm);
   vec_foreach (xd, dm->devices)