#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
+#ifdef __FreeBSD__
+#include <sys/memrange.h>
+#endif /* __FreeBSD__ */
#include <fcntl.h>
#include <unistd.h>
-#include <linux/mempolicy.h>
-#include <linux/memfd.h>
#include <sched.h>
#include <vppinfra/format.h>
-#include <vppinfra/linux/syscall.h>
+#ifdef __linux__
#include <vppinfra/linux/sysfs.h>
+#endif
#include <vppinfra/mem.h>
#include <vppinfra/hash.h>
#include <vppinfra/pmalloc.h>
+#include <vppinfra/cpu.h>
#if __SIZEOF_POINTER__ >= 8
#define DEFAULT_RESERVED_MB 16384
return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz;
}
-static inline int
-pmalloc_validate_numa_node (u32 * numa_node)
-{
- if (*numa_node == CLIB_PMALLOC_NUMA_LOCAL)
- {
- u32 cpu;
- if (getcpu (&cpu, numa_node) != 0)
- return 1;
- }
- return 0;
-}
-
-int
+__clib_export int
clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size)
{
uword base, pagesize;
}
static void
-pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
+pmalloc_update_lookup_table (clib_pmalloc_main_t *pm, u32 first, u32 count)
{
+#ifdef __linux
uword seek, va, pa, p;
int fd;
u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz);
if (fd != -1)
close (fd);
+#elif defined(__FreeBSD__)
+ struct mem_extract meme;
+ uword p;
+ int fd;
+ u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz);
+
+ vec_validate_aligned (pm->lookup_table,
+ vec_len (pm->pages) * elts_per_page - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ p = (uword) first * elts_per_page;
+ if (pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP)
+ {
+ while (p < (uword) elts_per_page * count)
+ {
+ pm->lookup_table[p] =
+ pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
+ p++;
+ }
+ return;
+ }
+
+ fd = open ((char *) "/dev/mem", O_RDONLY);
+ if (fd == -1)
+ return;
+
+ while (p < (uword) elts_per_page * count)
+ {
+ meme.me_vaddr =
+ pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
+ if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1)
+ continue;
+ pm->lookup_table[p] = meme.me_vaddr - meme.me_paddr;
+ p++;
+ }
+ return;
+#else
+#error "Unsupported OS"
+#endif
}
static inline clib_pmalloc_page_t *
pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
u32 numa_node, u32 n_pages)
{
+ clib_mem_page_stats_t stats = {};
clib_pmalloc_page_t *pp = 0;
- int status, rv, i, mmap_flags;
+ int rv, i, mmap_flags;
void *va = MAP_FAILED;
- int old_mpol = -1;
- long unsigned int mask[16] = { 0 };
- long unsigned int old_mask[16] = { 0 };
uword size = (uword) n_pages << pm->def_log2_page_sz;
clib_error_free (pm->error);
return 0;
}
+#ifdef __linux__
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_sysfs_prealloc_hugepages (numa_node,
if (pm->error)
return 0;
}
+#endif /* __linux__ */
- rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0);
- /* failure to get mempolicy means we can only proceed with numa 0 maps */
- if (rv == -1 && numa_node != 0)
- {
- pm->error = clib_error_return_unix (0, "failed to get mempolicy");
- return 0;
- }
-
- mask[0] = 1 << numa_node;
- rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
- if (rv == -1 && numa_node != 0)
+ rv = clib_mem_set_numa_affinity (numa_node, /* force */ 1);
+ if (rv == CLIB_MEM_ERROR && numa_node != 0)
{
pm->error = clib_error_return_unix (0, "failed to set mempolicy for "
"numa node %u", numa_node);
}
else
{
+#ifdef __linux__
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
mmap_flags |= MAP_HUGETLB;
+#endif /* __linux__ */
mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
a->fd = -1;
clib_memset (va, 0, size);
- rv = set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1);
- if (rv == -1 && numa_node != 0)
+ rv = clib_mem_set_default_numa_affinity ();
+ if (rv == CLIB_MEM_ERROR && numa_node != 0)
{
pm->error = clib_error_return_unix (0, "failed to restore mempolicy");
goto error;
/* we tolerate move_pages failure only if request os for numa node 0
to support non-numa kernels */
- rv = move_pages (0, 1, &va, 0, &status, 0);
- if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0))
+ clib_mem_get_page_stats (va, CLIB_MEM_PAGE_SZ_DEFAULT, 1, &stats);
+
+ if (stats.per_numa[numa_node] != 1 &&
+ !(numa_node == 0 && stats.unknown == 1))
{
- pm->error = rv == -1 ?
- clib_error_return_unix (0, "page allocated on wrong node, numa node "
- "%u status %d", numa_node, status) :
- clib_error_return (0, "page allocated on wrong node, numa node "
- "%u status %d", numa_node, status);
+ u16 allocated_at = ~0;
+ if (stats.unknown)
+ clib_error_return (0,
+ "unable to get information about numa allocation");
+
+ for (u16 i = 0; i < CLIB_MAX_NUMAS; i++)
+ if (stats.per_numa[i] == 1)
+ allocated_at = i;
+
+ clib_error_return (0,
+ "page allocated on the wrong numa node (%u), "
+ "expected %u",
+ allocated_at, numa_node);
goto error;
}
return 0;
}
-void *
+__clib_export void *
clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name,
uword size, u32 log2_page_sz, u32 numa_node)
{
if (n_pages + vec_len (pm->pages) > pm->max_pages)
return 0;
- if (pmalloc_validate_numa_node (&numa_node))
- return 0;
+ if (numa_node == CLIB_PMALLOC_NUMA_LOCAL)
+ numa_node = clib_get_current_numa_node ();
pool_get (pm->arenas, a);
a->index = a - pm->arenas;
ASSERT (is_pow2 (align));
- if (pmalloc_validate_numa_node (&numa_node))
- return 0;
+ if (numa_node == CLIB_PMALLOC_NUMA_LOCAL)
+ numa_node = clib_get_current_numa_node ();
if (a == 0)
{
return 0;
}
-void *
+__clib_export void *
clib_pmalloc_alloc_aligned_on_numa (clib_pmalloc_main_t * pm, uword size,
uword align, u32 numa_node)
{
return clib_pmalloc_alloc_inline (pm, 0, size, align, numa_node);
}
-void *
-clib_pmalloc_alloc_aligned (clib_pmalloc_main_t * pm, uword size, uword align)
+__clib_export void *
+clib_pmalloc_alloc_aligned (clib_pmalloc_main_t *pm, uword size, uword align)
{
return clib_pmalloc_alloc_inline (pm, 0, size, align,
CLIB_PMALLOC_NUMA_LOCAL);
}
-void *
-clib_pmalloc_alloc_from_arena (clib_pmalloc_main_t * pm, void *arena_va,
+__clib_export void *
+clib_pmalloc_alloc_from_arena (clib_pmalloc_main_t *pm, void *arena_va,
uword size, uword align)
{
clib_pmalloc_arena_t *a = clib_pmalloc_get_arena (pm, arena_va);
return 1;
}
-void
+__clib_export void
clib_pmalloc_free (clib_pmalloc_main_t * pm, void *va)
{
clib_pmalloc_page_t *pp;
return s;
}
-u8 *
+__clib_export u8 *
format_pmalloc (u8 * s, va_list * va)
{
clib_pmalloc_main_t *pm = va_arg (*va, clib_pmalloc_main_t *);
format_clib_error, pm->error);
- /* *INDENT-OFF* */
- pool_foreach (a, pm->arenas,
+ pool_foreach (a, pm->arenas)
{
u32 *page_index;
s = format (s, "\n%Uarena '%s' pages %u subpage-size %U numa-node %u",
s = format (s, "\n%U%U", format_white_space, indent + 4,
format_pmalloc_page, pp, verbose);
}
- });
- /* *INDENT-ON* */
+ }
return s;
}
-u8 *
+__clib_export u8 *
format_pmalloc_map (u8 * s, va_list * va)
{
clib_pmalloc_main_t *pm = va_arg (*va, clib_pmalloc_main_t *);