Add support for multiple microarchitectures in single binary
[vpp.git] / vnet / vnet / devices / dpdk / init.c
index 891190d..8ee59ff 100644 (file)
@@ -22,7 +22,6 @@
 #include <vnet/devices/dpdk/dpdk.h>
 #include <vlib/unix/physmem.h>
 #include <vlib/pci/pci.h>
-#include <vlib/unix/pci.h>
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -89,6 +88,11 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
     {
       rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc,
                                  xd->cpu_socket, &xd->tx_conf);
+
+      /* retry with any other CPU socket */
+      if (rv < 0)
+        rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc,
+                                   SOCKET_ID_ANY, &xd->tx_conf);
       if (rv < 0)
         break;
     }
@@ -103,6 +107,12 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
       rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc,
                                   xd->cpu_socket, 0,
                                   bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]);
+
+      /* retry with any other CPU socket */
+      if (rv < 0)
+        rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc,
+                                    SOCKET_ID_ANY, 0,
+                                    bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]);
       if (rv < 0)
         return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d",
                                   xd->device_index, rv);
@@ -301,7 +311,7 @@ dpdk_lib_init (dpdk_main_t * dm)
       xd->cpu_socket = (i8) rte_eth_dev_socket_id(i);
       rte_eth_dev_info_get(i, &dev_info);
 
-      memcpy(&xd->tx_conf, &dev_info.default_txconf,
+      clib_memcpy(&xd->tx_conf, &dev_info.default_txconf,
              sizeof(struct rte_eth_txconf));
       if (dm->no_multi_seg)
         {
@@ -314,7 +324,7 @@ dpdk_lib_init (dpdk_main_t * dm)
           port_conf_template.rxmode.jumbo_frame = 1;
         }
 
-      memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf));
+      clib_memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf));
 
       xd->tx_q_used = clib_min(dev_info.max_tx_queues, tm->n_vlib_mains);
 
@@ -503,13 +513,24 @@ dpdk_lib_init (dpdk_main_t * dm)
             }
         }
 
+#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) 
+      /*
+       * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts
+       */
+      if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
+        {
+          xd->port_conf.rxmode.max_rx_pkt_len = 1518;
+          xd->port_conf.rxmode.jumbo_frame = 0;
+        }
+#endif
+
       if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
         {
           f64 now = vlib_time_now(vm);
           u32 rnd;
           rnd = (u32) (now * 1e6);
           rnd = random_u32 (&rnd);
-          memcpy (addr+2, &rnd, sizeof(rnd));
+          clib_memcpy (addr+2, &rnd, sizeof(rnd));
           addr[0] = 2;
           addr[1] = 0xfe;
         }
@@ -616,6 +637,14 @@ dpdk_lib_init (dpdk_main_t * dm)
           rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off);
        }
 
+#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) 
+      /*
+       * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts
+       */
+      else if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
+         hi->max_packet_bytes = 1518;
+#endif
+
       hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 
              xd->port_conf.rxmode.max_rx_pkt_len - sizeof(ethernet_header_t);
 
@@ -678,7 +707,7 @@ dpdk_lib_init (dpdk_main_t * dm)
         rnd = (u32) (now * 1e6);
         rnd = random_u32 (&rnd);
 
-        memcpy (addr+2, &rnd, sizeof(rnd));
+        clib_memcpy (addr+2, &rnd, sizeof(rnd));
         addr[0] = 2;
         addr[1] = 0xfe;
       }
@@ -714,7 +743,7 @@ dpdk_lib_init (dpdk_main_t * dm)
 static void
 dpdk_bind_devices_to_uio (dpdk_main_t * dm)
 {
-  linux_pci_main_t * pm = &linux_pci_main;
+  vlib_pci_main_t * pm = &pci_main;
   clib_error_t * error;
   vlib_pci_device_t * d;
   pci_config_header_t * c;
@@ -775,6 +804,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
   clib_error_t * error = 0;
   dpdk_main_t * dm = &dpdk_main;
   vlib_thread_main_t * tm = vlib_get_thread_main();
+  vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index);
   u8 * s, * tmp = 0;
   u8 * pci_dev_id = 0;
   u8 * rte_cmd = 0, * ethname = 0;
@@ -973,7 +1003,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
              if (x > 1023)
                less_than_1g = 0;
            }
+          /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
          unformat_free(&in);
+          socket_mem = 0;
        }
       else
        {
@@ -990,16 +1022,35 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
         {
          u32 pages_avail, page_size, mem;
          u8 *s = 0;
-         char * path = "/sys/devices/system/node/node%u/hugepages/"
-                       "hugepages-%ukB/free_hugepages%c";
+          u8 *p = 0;
+         char * numa_path = "/sys/devices/system/node/node%u/";
+          char * nonnuma_path = "/sys/kernel/mm/";
+          char * suffix = "hugepages/hugepages-%ukB/free_hugepages%c";
+          char * path = NULL;
+          struct stat sb_numa, sb_nonnuma;
+
+          p = format(p, numa_path, c);
+          stat(numa_path, &sb_numa);
+          stat(nonnuma_path, &sb_nonnuma);
+
+          if (S_ISDIR(sb_numa.st_mode)) {
+            path = (char*)format((u8*)path, "%s%s", p, suffix);
+          } else if (S_ISDIR(sb_nonnuma.st_mode)) {
+            path = (char*)format((u8*)path, "%s%s", nonnuma_path, suffix);
+          } else {
+            use_1g = 0;
+            use_2m = 0;
+            vec_free(p);
+            break;
+          }
 
          vec_validate(mem_by_socket, c);
          mem = mem_by_socket[c];
 
          page_size = 1024;
          pages_avail = 0;
-         s = format (s, path, c, page_size * 1024, 0);
-         read_sys_fs ((char *) s, "%u", &pages_avail);
+         s = format (s, path, page_size * 1024, 0);
+         vlib_sysfs_read ((char *) s, "%u", &pages_avail);
          vec_reset_length (s);
 
          if (page_size * pages_avail < mem)
@@ -1007,19 +1058,20 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
 
          page_size = 2;
          pages_avail = 0;
-         s = format (s, path, c, page_size * 1024, 0);
-         read_sys_fs ((char *) s, "%u", &pages_avail);
+         s = format (s, path, page_size * 1024, 0);
+         vlib_sysfs_read ((char *) s, "%u", &pages_avail);
          vec_reset_length (s);
 
          if (page_size * pages_avail < mem)
            use_2m = 0;
 
          vec_free(s);
+         vec_free(p);
+         vec_free(path);
       }));
       _vec_len (mem_by_socket) = c + 1;
 
       /* regenerate socket_mem string */
-      vec_free (socket_mem);
       vec_foreach_index (x, mem_by_socket)
        socket_mem = format (socket_mem, "%s%u",
                             socket_mem ? "," : "",
@@ -1200,10 +1252,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
     }
 
   if (dm->use_rss)
-    {
-      vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index);
-      rt->function = dpdk_input_rss;
-    }
+    rt->function = dpdk_input_rss_multiarch_select();
+  else
+    rt->function = dpdk_input_multiarch_select();
  done:
   return error;
 }
@@ -1424,8 +1475,8 @@ dpdk_process (vlib_main_t * vm,
                  bhi = vnet_get_hw_interface(
                      vnm, dm->devices[i].vlib_hw_if_index);
                  bei = pool_elt_at_index(em->interfaces, bhi->hw_instance);
-                 memcpy(bhi->hw_address, addr, 6);
-                 memcpy(bei->address, addr, 6);
+                 clib_memcpy(bhi->hw_address, addr, 6);
+                 clib_memcpy(bei->address, addr, 6);
                  /* Init l3 packet size allowed on bonded interface */
                  bhi->max_l3_packet_bytes[VLIB_RX] = 
                  bhi->max_l3_packet_bytes[VLIB_TX] =