Fix duplicate free
[vpp.git] / vnet / vnet / devices / dpdk / init.c
index 716377c..de141a5 100644 (file)
@@ -89,6 +89,11 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
     {
       rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc,
                                  xd->cpu_socket, &xd->tx_conf);
+
+      /* retry with any other CPU socket */
+      if (rv < 0)
+        rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc,
+                                   SOCKET_ID_ANY, &xd->tx_conf);
       if (rv < 0)
         break;
     }
@@ -103,6 +108,12 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
       rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc,
                                   xd->cpu_socket, 0,
                                   bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]);
+
+      /* retry with any other CPU socket */
+      if (rv < 0)
+        rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc,
+                                    SOCKET_ID_ANY, 0,
+                                    bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]);
       if (rv < 0)
         return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d",
                                   xd->device_index, rv);
@@ -503,15 +514,6 @@ dpdk_lib_init (dpdk_main_t * dm)
             }
         }
 
-      /*
-       * DAW-FIXME: VMXNET3 driver doesn't support jumbo / multi-buffer pkts
-       */
-      if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
-        {
-          xd->port_conf.rxmode.max_rx_pkt_len = 1518;
-          xd->port_conf.rxmode.jumbo_frame = 0;
-        }
-
       if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
         {
           f64 now = vlib_time_now(vm);
@@ -624,11 +626,6 @@ dpdk_lib_init (dpdk_main_t * dm)
           vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
           rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off);
        }
-      /*
-       * DAW-FIXME: VMXNET3 driver doesn't support jumbo / multi-buffer pkts
-       */
-      else if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
-         hi->max_packet_bytes = 1518;
 
       hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 
              xd->port_conf.rxmode.max_rx_pkt_len - sizeof(ethernet_header_t);
@@ -783,38 +780,6 @@ dpdk_bind_devices_to_uio (dpdk_main_t * dm)
   vec_free (pci_addr);
 }
 
-static u32
-get_node_free_hugepages_num (u32 node, u32 page_size)
-{
-  FILE * fp;
-  u8 * tmp;
-
-  tmp = format (0, "/sys/devices/system/node/node%u/hugepages/hugepages-%ukB/"
-                "free_hugepages%c", node, page_size, 0);
-  fp = fopen ((char *) tmp, "r");
-  vec_free(tmp);
-
-  if (fp != NULL)
-    {
-      u8 * buffer = 0;
-      u32 pages_avail = 0;
-
-      vec_validate (buffer, 256-1);
-      if (fgets ((char *)buffer, 256, fp))
-        {
-          unformat_input_t in;
-          unformat_init_string (&in, (char *) buffer, strlen ((char *) buffer));
-          unformat(&in, "%u", &pages_avail);
-          unformat_free (&in);
-        }
-      vec_free(buffer);
-      fclose(fp);
-      return pages_avail;
-    }
-
-  return 0;
-}
-
 static clib_error_t *
 dpdk_config (vlib_main_t * vm, unformat_input_t * input)
 {
@@ -910,6 +875,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
         ;
       else if (unformat (input, "uio-driver %s", &dm->uio_driver_name))
        ;
+      else if (unformat (input, "socket-mem %s", &socket_mem))
+       ;
       else if (unformat (input, "vhost-user-coalesce-frames %d", &dm->vhost_coalesce_frames))
         ;
       else if (unformat (input, "vhost-user-coalesce-time %f", &dm->vhost_coalesce_time))
@@ -937,8 +904,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
               huge_dir = 1;                           \
             else if (!strncmp(#a, "file-prefix", 11)) \
               file_prefix = 1;                        \
-            else if (!strncmp(#a, "socket-mem", 10))  \
-              socket_mem = vec_dup (s);               \
            tmp = format (0, "--%s%c", #a, 0);        \
            vec_add1 (dm->eal_init_args, tmp);        \
            vec_add1 (s, 0);                          \
@@ -982,56 +947,98 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
     }
 
   if (!dm->uio_driver_name)
-    dm->uio_driver_name = format (0, "igb_uio");
+    dm->uio_driver_name = format (0, "igb_uio%c", 0);
 
   /*
    * Use 1G huge pages if available.
    */
   if (!no_huge && !huge_dir)
     {
-      uword * mem_by_socket = hash_create (0, sizeof (uword));
-      uword c;
+      u32 x, * mem_by_socket = 0;
+      uword c = 0;
       u8 use_1g = 1;
       u8 use_2m = 1;
+      u8 less_than_1g = 1;
       int rv;
 
       umount(DEFAULT_HUGE_DIR);
 
       /* Process "socket-mem" parameter value */
       if (vec_len (socket_mem))
-        {
-          unformat_input_t in;
-          unformat_init_vector(&in, socket_mem);
-          unformat(&in, "%U", unformat_socket_mem, &mem_by_socket);
-          unformat_free(&in);
-        }
+       {
+         unformat_input_t in;
+         unformat_init_vector(&in, socket_mem);
+         while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT)
+           {
+             if (unformat (&in, "%u,", &x))
+               ;
+             else if (unformat (&in, "%u", &x))
+               ;
+             else if (unformat (&in, ","))
+               x = 0;
+             else
+               break;
+
+             vec_add1(mem_by_socket, x);
+
+             if (x > 1023)
+               less_than_1g = 0;
+           }
+          /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
+         unformat_free(&in);
+          socket_mem = 0;
+       }
       else
-        use_1g = 0;
+       {
+         clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
+           {
+             vec_validate(mem_by_socket, c);
+             mem_by_socket[c] = 512; /* default per-socket mem */
+           }
+         ));
+       }
 
       /* check if available enough 1GB pages for each socket */
-      clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ({
-         uword * p = hash_get (mem_by_socket, c);
-         if (p)
-           {
-             u32 mem = p[0];
-             if (mem)
-               {
-                 u32 pages_num_1g = mem / 1024;
-                 u32 pages_num_2m = mem / 2;
-                 u32 pages_avail;
-
-                 pages_avail = get_node_free_hugepages_num(c, 1048576);
-                 if (!pages_avail || !(pages_avail >= pages_num_1g))
-                   use_1g = 0;
-
-                 pages_avail = get_node_free_hugepages_num(c, 2048);
-                 if (!pages_avail || !(pages_avail >= pages_num_2m))
-                   use_2m = 0;
-              }
-           }
+      clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
+        {
+         u32 pages_avail, page_size, mem;
+         u8 *s = 0;
+         char * path = "/sys/devices/system/node/node%u/hugepages/"
+                       "hugepages-%ukB/free_hugepages%c";
+
+         vec_validate(mem_by_socket, c);
+         mem = mem_by_socket[c];
+
+         page_size = 1024;
+         pages_avail = 0;
+         s = format (s, path, c, page_size * 1024, 0);
+         read_sys_fs ((char *) s, "%u", &pages_avail);
+         vec_reset_length (s);
+
+         if (page_size * pages_avail < mem)
+           use_1g = 0;
+
+         page_size = 2;
+         pages_avail = 0;
+         s = format (s, path, c, page_size * 1024, 0);
+         read_sys_fs ((char *) s, "%u", &pages_avail);
+         vec_reset_length (s);
+
+         if (page_size * pages_avail < mem)
+           use_2m = 0;
+
+         vec_free(s);
       }));
+      _vec_len (mem_by_socket) = c + 1;
+
+      /* regenerate socket_mem string */
+      vec_foreach_index (x, mem_by_socket)
+       socket_mem = format (socket_mem, "%s%u",
+                            socket_mem ? "," : "",
+                            mem_by_socket[x]);
+      socket_mem = format (socket_mem, "%c", 0);
 
-      hash_free (mem_by_socket);
+      vec_free (mem_by_socket);
 
       rv = mkdir(VPP_RUN_DIR, 0755);
       if (rv && errno != EEXIST)
@@ -1049,7 +1056,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
           goto done;
         }
 
-      if (use_1g)
+      if (use_1g && !(less_than_1g && use_2m))
         {
           rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G");
         }
@@ -1156,6 +1163,12 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
   tmp = format (0, "%u%c", tm->main_lcore, 0);
   vec_add1 (dm->eal_init_args, tmp);
 
+  /* set socket-mem */
+  tmp = format (0, "--socket-mem%c", 0);
+  vec_add1 (dm->eal_init_args, tmp);
+  tmp = format (0, "%s%c", socket_mem, 0);
+  vec_add1 (dm->eal_init_args, tmp);
+
   /* NULL terminate the "argv" vector, in case of stupidity */
   vec_add1 (dm->eal_init_args, 0);
   _vec_len(dm->eal_init_args) -= 1;
@@ -1394,8 +1407,12 @@ dpdk_process (vlib_main_t * vm,
       dpdk_update_link_state (xd, now);
     }
 
-{ // Setup MACs for bond interfaces and their links which was initialized in
-  // dpdk_port_setup() but needs to be done again here to take effect.
+{ // Extra set up for bond interfaces:
+  // 1. Setup MACs for bond interfaces and their slave links which was set
+  //    in dpdk_port_setup() but needs to be done again here to take effect.
+  // 2. Set max L3 packet size of each bond interface to the lowerst value of 
+  //    its slave links 
+  // 3. Set up info for bond interface related CLI support.
   int nports = rte_eth_dev_count();
   if (nports > 0) {
       for (i = 0; i < nports; i++) {
@@ -1421,6 +1438,10 @@ dpdk_process (vlib_main_t * vm,
                  bei = pool_elt_at_index(em->interfaces, bhi->hw_instance);
                  memcpy(bhi->hw_address, addr, 6);
                  memcpy(bei->address, addr, 6);
+                 /* Init l3 packet size allowed on bonded interface */
+                 bhi->max_l3_packet_bytes[VLIB_RX] = 
+                 bhi->max_l3_packet_bytes[VLIB_TX] = 
+                     ETHERNET_MAX_PACKET_BYTES - sizeof(ethernet_header_t);
                  while (nlink >= 1) { /* for all slave links */
                      int slave = slink[--nlink];
                      dpdk_device_t * sdev = &dm->devices[slave];
@@ -1437,6 +1458,12 @@ dpdk_process (vlib_main_t * vm,
                      ssi = vnet_get_sw_interface(vnm, sdev->vlib_sw_if_index);
                      shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE;
                      ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE;
+                     /* Set l3 packet size allowed as the lowest of slave */
+                     if (bhi->max_l3_packet_bytes[VLIB_RX] >
+                         shi->max_l3_packet_bytes[VLIB_RX]) 
+                         bhi->max_l3_packet_bytes[VLIB_RX] =
+                         bhi->max_l3_packet_bytes[VLIB_TX] =
+                             shi->max_l3_packet_bytes[VLIB_RX];
                  }
              }
          }