dpdk: remove support for dpdk 16.04
[vpp.git] / vnet / vnet / devices / dpdk / init.c
index 7b65720..7045e45 100644 (file)
@@ -221,7 +221,6 @@ dpdk_device_lock_init (dpdk_device_t * xd)
                                             CLIB_CACHE_LINE_BYTES);
       memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
     }
-  xd->need_txlock = 1;
 }
 
 void
@@ -233,7 +232,6 @@ dpdk_device_lock_free (dpdk_device_t * xd)
     clib_mem_free ((void *) xd->lockp[q]);
   vec_free (xd->lockp);
   xd->lockp = 0;
-  xd->need_txlock = 0;
 }
 
 static clib_error_t *
@@ -251,10 +249,10 @@ dpdk_lib_init (dpdk_main_t * dm)
   dpdk_device_t *xd;
   vlib_pci_addr_t last_pci_addr;
   u32 last_pci_addr_port = 0;
-  vlib_thread_registration_t *tr;
-  uword *p;
+  vlib_thread_registration_t *tr, *tr_hqos;
+  uword *p, *p_hqos;
 
-  u32 next_cpu = 0;
+  u32 next_cpu = 0, next_hqos_cpu = 0;
   u8 af_packet_port_id = 0;
   last_pci_addr.as_u32 = ~0;
 
@@ -280,6 +278,30 @@ dpdk_lib_init (dpdk_main_t * dm)
   vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1,
                        CLIB_CACHE_LINE_BYTES);
 
+  dm->hqos_cpu_first_index = 0;
+  dm->hqos_cpu_count = 0;
+
+  /* find out which cpus will be used for I/O TX */
+  p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
+  tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
+
+  if (tr_hqos && tr_hqos->count > 0)
+    {
+      dm->hqos_cpu_first_index = tr_hqos->first_index;
+      dm->hqos_cpu_count = tr_hqos->count;
+    }
+
+  vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1,
+                       CLIB_CACHE_LINE_BYTES);
+
+  vec_validate_aligned (dm->hqos_threads, tm->n_vlib_mains - 1,
+                       CLIB_CACHE_LINE_BYTES);
+
+#ifdef NETMAP
+  if (rte_netmap_probe () < 0)
+    return clib_error_return (0, "rte netmap probe failed");
+#endif
+
   nports = rte_eth_dev_count ();
   if (nports < 1)
     {
@@ -405,7 +427,7 @@ dpdk_lib_init (dpdk_main_t * dm)
       else
        xd->rx_q_used = 1;
 
-      xd->dev_type = VNET_DPDK_DEV_ETH;
+      xd->flags |= DPDK_DEVICE_FLAG_PMD;
 
       /* workaround for drivers not setting driver_name */
       if ((!dev_info.driver_name) && (dev_info.pci_dev))
@@ -586,17 +608,6 @@ dpdk_lib_init (dpdk_main_t * dm)
            }
        }
 
-#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0)
-      /*
-       * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts
-       */
-      if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
-       {
-         xd->port_conf.rxmode.max_rx_pkt_len = 1518;
-         xd->port_conf.rxmode.jumbo_frame = 0;
-       }
-#endif
-
       if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
        {
          f64 now = vlib_time_now (vm);
@@ -628,7 +639,7 @@ dpdk_lib_init (dpdk_main_t * dm)
          /* *INDENT-OFF* */
          clib_bitmap_foreach (i, devconf->workers, ({
            int cpu = dm->input_cpu_first_index + i;
-           unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+           unsigned lcore = vlib_worker_threads[cpu].lcore_id;
            vec_validate(xd->cpu_socket_id_by_queue, q);
            xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
            vec_add2(dm->devices_by_cpu[cpu], dq, 1);
@@ -641,7 +652,7 @@ dpdk_lib_init (dpdk_main_t * dm)
        for (q = 0; q < xd->rx_q_used; q++)
          {
            int cpu = dm->input_cpu_first_index + next_cpu;
-           unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+           unsigned lcore = vlib_worker_threads[cpu].lcore_id;
 
            /*
             * numa node for worker thread handling this queue
@@ -662,11 +673,47 @@ dpdk_lib_init (dpdk_main_t * dm)
              next_cpu = 0;
          }
 
+
+      if (devconf->hqos_enabled)
+       {
+         xd->flags |= DPDK_DEVICE_FLAG_HQOS;
+
+         if (devconf->hqos.hqos_thread_valid)
+           {
+             int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
+
+             if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
+               return clib_error_return (0, "invalid HQoS thread index");
+
+             vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
+             dq->device = xd->device_index;
+             dq->queue_id = 0;
+           }
+         else
+           {
+             int cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
+
+             if (dm->hqos_cpu_count == 0)
+               return clib_error_return (0, "no HQoS threads available");
+
+             vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
+             dq->device = xd->device_index;
+             dq->queue_id = 0;
+
+             next_hqos_cpu++;
+             if (next_hqos_cpu == dm->hqos_cpu_count)
+               next_hqos_cpu = 0;
+
+             devconf->hqos.hqos_thread_valid = 1;
+             devconf->hqos.hqos_thread = cpu;
+           }
+       }
+
       vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains,
                            CLIB_CACHE_LINE_BYTES);
       for (j = 0; j < tm->n_vlib_mains; j++)
        {
-         vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE,
+         vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc,
                           sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES);
          vec_reset_length (xd->tx_vectors[j]);
        }
@@ -685,6 +732,13 @@ dpdk_lib_init (dpdk_main_t * dm)
       if (rv)
        return rv;
 
+      if (devconf->hqos_enabled)
+       {
+         rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
+         if (rv < 0)
+           return rv;
+       }
+
       /* count the number of descriptors used for this device */
       nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
 
@@ -729,101 +783,16 @@ dpdk_lib_init (dpdk_main_t * dm)
            clib_warning ("VLAN strip cannot be supported by interface\n");
        }
 
-#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0)
-      /*
-       * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts
-       */
-      else if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
-       hi->max_packet_bytes = 1518;
-#endif
-
       hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
        xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
 
       rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
     }
 
-#ifdef RTE_LIBRTE_KNI
-  if (dm->conf->num_kni)
-    {
-      clib_warning ("Initializing KNI interfaces...");
-      rte_kni_init (dm->conf->num_kni);
-      for (i = 0; i < dm->conf->num_kni; i++)
-       {
-         u8 addr[6];
-         int j;
-
-         /* Create vnet interface */
-         vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
-         xd->dev_type = VNET_DPDK_DEV_KNI;
-
-         xd->device_index = xd - dm->devices;
-         ASSERT (nports + i == xd->device_index);
-         xd->per_interface_next_index = ~0;
-         xd->kni_port_id = i;
-         xd->cpu_socket = -1;
-         hash_set (dm->dpdk_device_by_kni_port_id, i, xd - dm->devices);
-         xd->rx_q_used = 1;
-
-         /* assign interface to input thread */
-         dpdk_device_and_queue_t *dq;
-         vec_add2 (dm->devices_by_cpu[dm->input_cpu_first_index], dq, 1);
-         dq->device = xd->device_index;
-         dq->queue_id = 0;
-
-         vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains,
-                               CLIB_CACHE_LINE_BYTES);
-         for (j = 0; j < tm->n_vlib_mains; j++)
-           {
-             vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE,
-                              sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES);
-             vec_reset_length (xd->tx_vectors[j]);
-           }
-
-         vec_validate_aligned (xd->rx_vectors, xd->rx_q_used,
-                               CLIB_CACHE_LINE_BYTES);
-         for (j = 0; j < xd->rx_q_used; j++)
-           {
-             vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1,
-                                   CLIB_CACHE_LINE_BYTES);
-             vec_reset_length (xd->rx_vectors[j]);
-           }
-
-         /* FIXME Set up one TX-queue per worker thread */
-
-         {
-           f64 now = vlib_time_now (vm);
-           u32 rnd;
-           rnd = (u32) (now * 1e6);
-           rnd = random_u32 (&rnd);
-
-           clib_memcpy (addr + 2, &rnd, sizeof (rnd));
-           addr[0] = 2;
-           addr[1] = 0xfe;
-         }
-
-         error = ethernet_register_interface
-           (dm->vnet_main, dpdk_device_class.index, xd->device_index,
-            /* ethernet address */ addr,
-            &xd->vlib_hw_if_index, dpdk_flag_change);
-
-         if (error)
-           return error;
-
-         sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index);
-         xd->vlib_sw_if_index = sw->sw_if_index;
-         hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index);
-       }
-    }
-#endif
-
   if (nb_desc > dm->conf->num_mbufs)
     clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
                  dm->conf->num_mbufs, nb_desc);
 
-  /* init next vhost-user if index */
-  dm->next_vu_if_id = 0;
-
   return 0;
 }
 
@@ -927,6 +896,8 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
     }
 
   devconf->pci_addr.as_u32 = pci_addr.as_u32;
+  devconf->hqos_enabled = 0;
+  dpdk_device_config_hqos_default (&devconf->hqos);
 
   if (!input)
     return 0;
@@ -956,6 +927,19 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
        devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
       else if (unformat (input, "vlan-strip-offload on"))
        devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
+      else
+       if (unformat
+           (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input))
+       {
+         devconf->hqos_enabled = 1;
+         error = unformat_hqos (&sub_input, &devconf->hqos);
+         if (error)
+           break;
+       }
+      else if (unformat (input, "hqos"))
+       {
+         devconf->hqos_enabled = 1;
+       }
       else
        {
          error = clib_error_return (0, "unknown input `%U'",
@@ -1004,9 +988,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
 
   conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
 
-  // MATT-FIXME: inverted virtio-vhost logic to use virtio by default
-  conf->use_virtio_vhost = 1;
-
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       /* Prime the pump */
@@ -1064,18 +1045,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
        ;
       else if (unformat (input, "socket-mem %s", &socket_mem))
        ;
-      else
-       if (unformat
-           (input, "vhost-user-coalesce-frames %d",
-            &conf->vhost_coalesce_frames))
-       ;
-      else
-       if (unformat
-           (input, "vhost-user-coalesce-time %f",
-            &conf->vhost_coalesce_time))
-       ;
-      else if (unformat (input, "enable-vhost-user"))
-       conf->use_virtio_vhost = 0;
       else if (unformat (input, "no-pci"))
        {
          no_pci = 1;
@@ -1131,7 +1100,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
        else if (unformat (input, "default"))
        ;
 
-      else if (unformat (input, " "))
+      else if (unformat_skip_white_space (input))
        ;
       else
        {
@@ -1199,57 +1168,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
       /* *INDENT-OFF* */
       clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
         {
-         u32 pages_avail, page_size, mem;
-         u8 *s = 0;
-          u8 *p = 0;
-         char * numa_path = "/sys/devices/system/node/node%u/";
-          char * nonnuma_path = "/sys/kernel/mm/";
-          char * suffix = "hugepages/hugepages-%ukB/free_hugepages%c";
-          char * path = NULL;
-          struct stat sb_numa, sb_nonnuma;
-
-          p = format(p, numa_path, c);
-          if (stat(numa_path, &sb_numa) < 0)
-            sb_numa.st_mode = 0;
-
-          if (stat(nonnuma_path, &sb_nonnuma) < 0)
-            sb_nonnuma.st_mode = 0;
-
-          if (S_ISDIR(sb_numa.st_mode)) {
-            path = (char*)format((u8*)path, "%s%s", p, suffix);
-          } else if (S_ISDIR(sb_nonnuma.st_mode)) {
-            path = (char*)format((u8*)path, "%s%s", nonnuma_path, suffix);
-          } else {
-            use_1g = 0;
-            use_2m = 0;
-            vec_free(p);
-            break;
-          }
+         int pages_avail, page_size, mem;
 
          vec_validate(mem_by_socket, c);
          mem = mem_by_socket[c];
 
          page_size = 1024;
-         pages_avail = 0;
-         s = format (s, path, page_size * 1024, 0);
-         vlib_sysfs_read ((char *) s, "%u", &pages_avail);
-         vec_reset_length (s);
+         pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024);
 
-         if (page_size * pages_avail < mem)
+         if (pages_avail < 0 || page_size * pages_avail < mem)
            use_1g = 0;
 
          page_size = 2;
-         pages_avail = 0;
-         s = format (s, path, page_size * 1024, 0);
-         vlib_sysfs_read ((char *) s, "%u", &pages_avail);
-         vec_reset_length (s);
+         pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024);
 
-         if (page_size * pages_avail < mem)
+         if (pages_avail < 0 || page_size * pages_avail < mem)
            use_2m = 0;
-
-         vec_free(s);
-         vec_free(p);
-         vec_free(path);
       }));
       /* *INDENT-ON* */
 
@@ -1458,7 +1392,7 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
   u8 hw_flags_chg = 0;
 
   /* only update link state for PMD interfaces */
-  if (xd->dev_type != VNET_DPDK_DEV_ETH)
+  if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
     return;
 
   xd->time_last_link_update = now ? now : xd->time_last_link_update;
@@ -1512,7 +1446,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
          break;
        }
     }
-#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
   if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
     {
       hw_flags_chg = 1;
@@ -1540,35 +1473,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
          break;
        }
     }
-#else
-  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
-    {
-      hw_flags_chg = 1;
-      switch (xd->link.link_speed)
-       {
-       case ETH_LINK_SPEED_10:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M;
-         break;
-       case ETH_LINK_SPEED_100:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M;
-         break;
-       case ETH_LINK_SPEED_1000:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
-         break;
-       case ETH_LINK_SPEED_10000:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G;
-         break;
-       case ETH_LINK_SPEED_40G:
-         hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G;
-         break;
-       case 0:
-         break;
-       default:
-         clib_warning ("unknown link speed %d", xd->link.link_speed);
-         break;
-       }
-    }
-#endif
   if (hw_flags_chg)
     {
       if (LINK_STATE_ELOGS)
@@ -1603,9 +1507,6 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
   ethernet_main_t *em = &ethernet_main;
   dpdk_device_t *xd;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-#if DPDK_VHOST_USER
-  void *vu_state;
-#endif
   int i;
 
   error = dpdk_lib_init (dm);
@@ -1630,10 +1531,6 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
   if (error)
     clib_error_report (error);
 
-#if DPDK_VHOST_USER
-  dpdk_vhost_user_process_init (&vu_state);
-#endif
-
   tm->worker_thread_release = 1;
 
   f64 now = vlib_time_now (vm);
@@ -1756,18 +1653,9 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
        if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
          dpdk_update_link_state (xd, now);
 
-#if DPDK_VHOST_USER
-       if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
-         if (dpdk_vhost_user_process_if (vm, xd, vu_state) != 0)
-           continue;
-#endif
       }
     }
 
-#if DPDK_VHOST_USER
-  dpdk_vhost_user_process_cleanup (vu_state);
-#endif
-
   return 0;
 }
 
@@ -1844,10 +1732,6 @@ dpdk_init (vlib_main_t * vm)
   dm->efd.consec_full_frames_hi_thresh =
     DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH;
 
-  /* vhost-user coalescence frames defaults */
-  dm->conf->vhost_coalesce_frames = 32;
-  dm->conf->vhost_coalesce_time = 1e-3;
-
   /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
   dm->buffer_flags_template =
     (VLIB_BUFFER_TOTAL_LENGTH_VALID