vppinfra: remove linux/syscall.h

author Damjan Marion <damarion@cisco.com>

Fri, 9 Oct 2020 15:16:55 +0000 (17:16 +0200)

committer Damjan Marion <damarion@cisco.com>

Sun, 18 Apr 2021 13:22:50 +0000 (15:22 +0200)
author Damjan Marion <damarion@cisco.com>
Fri, 9 Oct 2020 15:16:55 +0000 (17:16 +0200)
committer Damjan Marion <damarion@cisco.com>
Sun, 18 Apr 2021 13:22:50 +0000 (15:22 +0200)
diff --git a/extras/scripts/build_static_vppctl.sh b/extras/scripts/build_static_vppctl.sh

index 72408ad..1c09055 100755 (executable)
--- a/extras/scripts/build_static_vppctl.sh
+++ b/extras/scripts/build_static_vppctl.sh
@@ -11,8 +11,6 @@ ${CC:-cc} \
   -static \
   -I ${src} \
   -I ${tmp} \
- -DHAVE_MEMFD_CREATE \
- -DHAVE_GETCPU \
   ${src}/vppinfra/backtrace.c \
   ${src}/vppinfra/dlmalloc.c \
   ${src}/vppinfra/elf.c \
diff --git a/src/cmake/syscall.cmake b/src/cmake/syscall.cmake

index aa3bac4..6a4c75e 100644 (file)
--- a/src/cmake/syscall.cmake
+++ b/src/cmake/syscall.cmake
@@ -11,29 +11,6 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
-##############################################################################
-# Check for memfd_create headers and libs
-##############################################################################
-check_c_source_compiles("
-  #define _GNU_SOURCE
-  #include <sys/mman.h>
-  int main() { return memfd_create (\"/dev/false\", 0); }
-" HAVE_MEMFD_CREATE)
-
-if (HAVE_MEMFD_CREATE)
-    add_definitions(-DHAVE_MEMFD_CREATE)
-endif()
-
-check_c_source_compiles("
-  #define _GNU_SOURCE
-  #include <sched.h>
-  int main() { return getcpu (0, 0); }
-" HAVE_GETCPU)
-
-if (HAVE_GETCPU)
-    add_definitions(-DHAVE_GETCPU)
-endif()
-
  check_c_source_compiles("
    #define _GNU_SOURCE
    #include <fcntl.h>
diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c

index 31a34cc..bce17ef 100644 (file)
--- a/src/plugins/memif/memif.c
+++ b/src/plugins/memif/memif.c
@@ -33,7 +33,6 @@
  
  #include <vlib/vlib.h>
  #include <vlib/unix/unix.h>
-#include <vppinfra/linux/syscall.h>
  #include <vnet/plugin/plugin.h>
  #include <vnet/ethernet/ethernet.h>
  #include <vnet/interface/rx_queue_funcs.h>
diff --git a/src/vlib/physmem.c b/src/vlib/physmem.c

index 846f9a0..a36444f 100644 (file)
--- a/src/vlib/physmem.c
+++ b/src/vlib/physmem.c
@@ -21,7 +21,6 @@
  #include <sys/stat.h>
  #include <unistd.h>
  
-#include <vppinfra/linux/syscall.h>
  #include <vppinfra/linux/sysfs.h>
  #include <vlib/vlib.h>
  #include <vlib/physmem.h>
diff --git a/src/vpp/stats/stat_segment.c b/src/vpp/stats/stat_segment.c

index 5042ba8..ddf6e19 100644 (file)
--- a/src/vpp/stats/stat_segment.c
+++ b/src/vpp/stats/stat_segment.c
@@ -19,8 +19,6 @@
  #include "stat_segment.h"
  #include <vnet/vnet.h>
  #include <vnet/devices/devices.h>      /* vnet_get_aggregate_rx_packets */
-#undef HAVE_MEMFD_CREATE
-#include <vppinfra/linux/syscall.h>
  #include <vpp-api/client/stat_client.h>
  
  stat_segment_main_t stat_segment_main;
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt

index 3f6e9f5..8cebd32 100644 (file)
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -192,7 +192,6 @@ set(VPPINFRA_HEADERS
    vector_sse42.h
    warnings.h
    xxhash.h
-  linux/syscall.h
    linux/sysfs.h
  )
  
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c

index 555c2e0..0c6aa2a 100644 (file)
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -272,7 +272,7 @@ void BV (clib_bihash_initiator_init_svm)
  
    ASSERT (memory_size < (1ULL << 32));
    /* Set up for memfd sharing */
-  if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
+  if ((fd = clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT, name) == -1)
      {
        clib_unix_warning ("memfd_create");
        return;
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h

index 1ca0ae4..da2f684 100644 (file)
--- a/src/vppinfra/bihash_template.h
+++ b/src/vppinfra/bihash_template.h
@@ -34,7 +34,6 @@
  #endif
  
  #ifdef BIHASH_32_64_SVM
-#undef HAVE_MEMFD_CREATE
  #include <vppinfra/linux/syscall.h>
  #include <fcntl.h>
  #define F_LINUX_SPECIFIC_BASE 1024
diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c

index 4631eb3..d2edc61 100644 (file)
--- a/src/vppinfra/cpu.c
+++ b/src/vppinfra/cpu.c
@@ -222,7 +222,21 @@ format_cpu_flags (u8 * s, va_list * args)
  #endif
  }
  
+__clib_export u32
+clib_get_current_cpu_id ()
+{
+  unsigned cpu, node;
+  syscall (__NR_getcpu, &cpu, &node, 0);
+  return cpu;
+}
  
+__clib_export u32
+clib_get_current_numa_node ()
+{
+  unsigned cpu, node;
+  syscall (__NR_getcpu, &cpu, &node, 0);
+  return node;
+}
  
  /*
   * fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h

index bc4ee58..6925d58 100644 (file)
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -167,21 +167,8 @@ _ (asimddp,    20) \
  _ (sha512,     21) \
  _ (sve,        22)
  
-static inline u32
-clib_get_current_cpu_id ()
-{
-  unsigned cpu, node;
-  syscall (__NR_getcpu, &cpu, &node, 0);
-  return cpu;
-}
-
-static inline u32
-clib_get_current_numa_node ()
-{
-  unsigned cpu, node;
-  syscall (__NR_getcpu, &cpu, &node, 0);
-  return node;
-}
+u32 clib_get_current_cpu_id ();
+u32 clib_get_current_numa_node ();
  
  #if defined(__x86_64__)
  #include "cpuid.h"
diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c

index 11a1e9e..cb46df8 100644 (file)
--- a/src/vppinfra/linux/mem.c
+++ b/src/vppinfra/linux/mem.c
@@ -30,7 +30,6 @@
  #include <vppinfra/time.h>
  #include <vppinfra/format.h>
  #include <vppinfra/clib_error.h>
-#include <vppinfra/linux/syscall.h>
  #include <vppinfra/linux/sysfs.h>
  
  #ifndef F_LINUX_SPECIFIC_BASE
@@ -149,7 +148,7 @@ clib_mem_main_init ()
    mm->log2_page_sz = min_log2 (page_size);
  
    /* default system hugeppage size */
-  if ((fd = memfd_create ("test", MFD_HUGETLB)) != -1)
+  if ((fd = syscall (__NR_memfd_create, "test", MFD_HUGETLB)) != -1)
      {
        mm->log2_default_hugepage_sz = clib_mem_get_fd_log2_page_size (fd);
        close (fd);
@@ -169,7 +168,7 @@ clib_mem_main_init ()
    for (int i = 0; i < CLIB_MAX_NUMAS; i++)
      {
        int status;
-      if (move_pages (0, 1, &va, &i, &status, 0) == 0)
+      if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0)
         mm->numa_node_bitmap |= 1ULL << i;
      }
  
@@ -298,7 +297,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
    vec_add1 (s, 0);
  
    /* memfd_create introduced in kernel 3.17, we don't support older kernels */
-  fd = memfd_create ((char *) s, memfd_flags);
+  fd = syscall (__NR_memfd_create, (char *) s, memfd_flags);
  
    /* kernel versions < 4.14 does not support memfd_create for huge pages */
    if (fd == -1 && errno == EINVAL &&
@@ -568,7 +567,7 @@ clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
    stats->total = n_pages;
    stats->log2_page_sz = log2_page_size;
  
-  if (move_pages (0, n_pages, ptr, 0, status, 0) != 0)
+  if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
      {
        stats->unknown = n_pages;
        goto done;
@@ -658,7 +657,8 @@ clib_mem_set_numa_affinity (u8 numa_node, int force)
  
    mask[0] = 1 << numa_node;
  
-  if (set_mempolicy (force ? MPOL_BIND : MPOL_PREFERRED, mask, mask_len))
+  if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, mask,
+              mask_len))
      goto error;
  
    vec_reset_length (mm->error);
@@ -675,7 +675,7 @@ clib_mem_set_default_numa_affinity ()
  {
    clib_mem_main_t *mm = &clib_mem_main;
  
-  if (set_mempolicy (MPOL_DEFAULT, 0, 0))
+  if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))
      {
        vec_reset_length (mm->error);
        mm->error = clib_error_return_unix (mm->error, (char *) __func__);
diff --git a/src/vppinfra/linux/syscall.h b/src/vppinfra/linux/syscall.h

deleted file mode 100644 (file)

index c07cad6..0000000
--- a/src/vppinfra/linux/syscall.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_linux_syscall_h
-#define included_linux_syscall_h
-
-#include <unistd.h>
-#include <sys/syscall.h>
-
-#ifndef HAVE_GETCPU
-static inline int
-getcpu (unsigned *cpu, unsigned *node)
-{
-  return syscall (__NR_getcpu, cpu, node, 0);
-}
-#endif
-
-static inline long
-set_mempolicy (int mode, const unsigned long *nodemask, unsigned long maxnode)
-{
-  return syscall (__NR_set_mempolicy, mode, nodemask, maxnode);
-}
-
-static inline int
-get_mempolicy (int *mode, unsigned long *nodemask, unsigned long maxnode,
-              void *addr, unsigned long flags)
-{
-  return syscall (__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags);
-}
-
-static inline long
-move_pages (int pid, unsigned long count, void **pages, const int *nodes,
-           int *status, int flags)
-{
-  return syscall (__NR_move_pages, pid, count, pages, nodes, status, flags);
-}
-
-#ifndef HAVE_MEMFD_CREATE
-static inline int
-memfd_create (const char *name, unsigned int flags)
-{
-  return syscall (__NR_memfd_create, name, flags);
-}
-#endif
-
-#endif /* included_linux_syscall_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c

index 546a4fe..27738e1 100644 (file)
--- a/src/vppinfra/pmalloc.c
+++ b/src/vppinfra/pmalloc.c
@@ -19,16 +19,14 @@
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <unistd.h>
-#include <linux/mempolicy.h>
-#include <linux/memfd.h>
  #include <sched.h>
  
  #include <vppinfra/format.h>
-#include <vppinfra/linux/syscall.h>
  #include <vppinfra/linux/sysfs.h>
  #include <vppinfra/mem.h>
  #include <vppinfra/hash.h>
  #include <vppinfra/pmalloc.h>
+#include <vppinfra/cpu.h>
  
  #if __SIZEOF_POINTER__ >= 8
  #define DEFAULT_RESERVED_MB 16384
@@ -48,18 +46,6 @@ pmalloc_size2pages (uword size, u32 log2_page_sz)
    return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz;
  }
  
-static inline int
-pmalloc_validate_numa_node (u32 * numa_node)
-{
-  if (*numa_node == CLIB_PMALLOC_NUMA_LOCAL)
-    {
-      u32 cpu;
-      if (getcpu (&cpu, numa_node) != 0)
-       return 1;
-    }
-  return 0;
-}
-
  __clib_export int
  clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size)
  {
@@ -241,12 +227,10 @@ static inline clib_pmalloc_page_t *
  pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
                    u32 numa_node, u32 n_pages)
  {
+  clib_mem_page_stats_t stats = {};
    clib_pmalloc_page_t *pp = 0;
-  int status, rv, i, mmap_flags;
+  int rv, i, mmap_flags;
    void *va = MAP_FAILED;
-  int old_mpol = -1;
-  long unsigned int mask[16] = { 0 };
-  long unsigned int old_mask[16] = { 0 };
    uword size = (uword) n_pages << pm->def_log2_page_sz;
  
    clib_error_free (pm->error);
@@ -266,17 +250,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
         return 0;
      }
  
-  rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0);
-  /* failure to get mempolicy means we can only proceed with numa 0 maps */
-  if (rv == -1 && numa_node != 0)
-    {
-      pm->error = clib_error_return_unix (0, "failed to get mempolicy");
-      return 0;
-    }
-
-  mask[0] = 1 << numa_node;
-  rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
-  if (rv == -1 && numa_node != 0)
+  rv = clib_mem_set_numa_affinity (numa_node, /* force */ 1);
+  if (rv == CLIB_MEM_ERROR && numa_node != 0)
      {
        pm->error = clib_error_return_unix (0, "failed to set mempolicy for "
                                           "numa node %u", numa_node);
@@ -323,8 +298,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
  
    clib_memset (va, 0, size);
  
-  rv = set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1);
-  if (rv == -1 && numa_node != 0)
+  rv = clib_mem_set_default_numa_affinity ();
+  if (rv == CLIB_MEM_ERROR && numa_node != 0)
      {
        pm->error = clib_error_return_unix (0, "failed to restore mempolicy");
        goto error;
@@ -332,14 +307,23 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
  
    /* we tolerate move_pages failure only if request os for numa node 0
       to support non-numa kernels */
-  rv = move_pages (0, 1, &va, 0, &status, 0);
-  if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0))
+  clib_mem_get_page_stats (va, CLIB_MEM_PAGE_SZ_DEFAULT, 1, &stats);
+
+  if (stats.per_numa[numa_node] != 1)
      {
-      pm->error = rv == -1 ?
-       clib_error_return_unix (0, "page allocated on wrong node, numa node "
-                               "%u status %d", numa_node, status) :
-       clib_error_return (0, "page allocated on wrong node, numa node "
-                          "%u status %d", numa_node, status);
+      u16 allocated_at = ~0;
+      if (stats.unknown)
+       clib_error_return (0,
+                          "unable to get information about numa allocation");
+
+      for (u16 i = 0; i < CLIB_MAX_NUMAS; i++)
+       if (stats.per_numa[i] == 1)
+         allocated_at = i;
+
+      clib_error_return (0,
+                        "page allocated on the wrong numa node (%u), "
+                        "expected %u",
+                        allocated_at, numa_node);
  
        goto error;
      }
@@ -407,8 +391,8 @@ clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name,
    if (n_pages + vec_len (pm->pages) > pm->max_pages)
      return 0;
  
-  if (pmalloc_validate_numa_node (&numa_node))
-    return 0;
+  if (numa_node == CLIB_PMALLOC_NUMA_LOCAL)
+    numa_node = clib_get_current_numa_node ();
  
    pool_get (pm->arenas, a);
    a->index = a - pm->arenas;
@@ -438,8 +422,8 @@ clib_pmalloc_alloc_inline (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
  
    ASSERT (is_pow2 (align));
  
-  if (pmalloc_validate_numa_node (&numa_node))
-    return 0;
+  if (numa_node == CLIB_PMALLOC_NUMA_LOCAL)
+    numa_node = clib_get_current_numa_node ();
  
    if (a == 0)
      {
diff --git a/src/vppinfra/unix-formats.c b/src/vppinfra/unix-formats.c

index af1eb1a..cd137e5 100644 (file)
--- a/src/vppinfra/unix-formats.c
+++ b/src/vppinfra/unix-formats.c
@@ -63,8 +63,6 @@
  
  #include <vppinfra/time.h>
  #if __linux__
-#include <vppinfra/linux/syscall.h>
-
  #ifdef AF_NETLINK
  #include <linux/types.h>
  #include <linux/netlink.h>
author	Damjan Marion <damarion@cisco.com>
	Fri, 9 Oct 2020 15:16:55 +0000 (17:16 +0200)
committer	Damjan Marion <damarion@cisco.com>
	Sun, 18 Apr 2021 13:22:50 +0000 (15:22 +0200)
extras/scripts/build_static_vppctl.sh		patch \| blob \| history
src/cmake/syscall.cmake		patch \| blob \| history
src/plugins/memif/memif.c		patch \| blob \| history
src/vlib/physmem.c		patch \| blob \| history
src/vpp/stats/stat_segment.c		patch \| blob \| history
src/vppinfra/CMakeLists.txt		patch \| blob \| history
src/vppinfra/bihash_template.c		patch \| blob \| history
src/vppinfra/bihash_template.h		patch \| blob \| history
src/vppinfra/cpu.c		patch \| blob \| history
src/vppinfra/cpu.h		patch \| blob \| history
src/vppinfra/linux/mem.c		patch \| blob \| history
src/vppinfra/linux/syscall.h	[deleted file]	patch \| blob \| history
src/vppinfra/pmalloc.c		patch \| blob \| history
src/vppinfra/unix-formats.c		patch \| blob \| history