#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <unistd.h>
#include <linux/mempolicy.h>
#include <linux/memfd.h>
+#include <sched.h>
#include <vppinfra/format.h>
#include <vppinfra/linux/syscall.h>
if (*numa_node == CLIB_PMALLOC_NUMA_LOCAL)
{
u32 cpu;
- if (getcpu (&cpu, numa_node, 0) != 0)
+ if (getcpu (&cpu, numa_node) != 0)
return 1;
}
return 0;
}
-int
-clib_pmalloc_init (clib_pmalloc_main_t * pm, uword size)
+__clib_export int
+clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size)
{
- uword off, pagesize;
+ uword base, pagesize;
+ u64 *pt = 0;
ASSERT (pm->error == 0);
pagesize = clib_mem_get_default_hugepage_size ();
pm->def_log2_page_sz = min_log2 (pagesize);
- pm->sys_log2_page_sz = min_log2 (sysconf (_SC_PAGESIZE));
pm->lookup_log2_page_sz = pm->def_log2_page_sz;
+ /* check if pagemap is accessible */
+ pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1);
+ if (pt == 0 || pt[0] == 0)
+ pm->flags |= CLIB_PMALLOC_F_NO_PAGEMAP;
+
size = size ? size : ((u64) DEFAULT_RESERVED_MB) << 20;
size = round_pow2 (size, pagesize);
pm->max_pages = size >> pm->def_log2_page_sz;
- /* reserve VA space for future growth */
- pm->base = mmap (0, size + pagesize, PROT_NONE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ base = clib_mem_vm_reserve (base_addr, size, pm->def_log2_page_sz);
- if (pm->base == MAP_FAILED)
+ if (base == ~0)
{
- pm->error = clib_error_return_unix (0, "failed to reserve %u pages");
+ pm->error = clib_error_return (0, "failed to reserve %u pages",
+ pm->max_pages);
return -1;
}
- off = round_pow2 (pointer_to_uword (pm->base), pagesize) -
- pointer_to_uword (pm->base);
-
- /* trim start and end of reservation to be page aligned */
- if (off)
- {
- munmap (pm->base, off);
- pm->base += off;
- }
-
- munmap (pm->base + (pm->max_pages * pagesize), pagesize - off);
+ pm->base = uword_to_pointer (base, void *);
return 0;
}
pp->n_free_chunks = a->subpages_per_page;
}
+ if (pp->n_free_blocks < n_blocks)
+ return 0;
+
alloc_chunk_index = pp->first_chunk_index;
next_chunk:
vec_validate_aligned (pm->lookup_table, vec_len (pm->pages) *
elts_per_page - 1, CLIB_CACHE_LINE_BYTES);
- fd = open ((char *) "/proc/self/pagemap", O_RDONLY);
+ p = (uword) first *elts_per_page;
+ if (pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP)
+ {
+ while (p < (uword) elts_per_page * count)
+ {
+ pm->lookup_table[p] = pointer_to_uword (pm->base) +
+ (p << pm->lookup_log2_page_sz);
+ p++;
+ }
+ return;
+ }
- p = first * elts_per_page;
- while (p < elts_per_page * count)
+ fd = open ((char *) "/proc/self/pagemap", O_RDONLY);
+ while (p < (uword) elts_per_page * count)
{
va = pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
- seek = (va >> pm->sys_log2_page_sz) * sizeof (pa);
+ pa = 0;
+ seek = (va >> clib_mem_get_log2_page_size ()) * sizeof (pa);
if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek &&
read (fd, &pa, sizeof (pa)) == (sizeof (pa)) &&
pa & (1ULL << 63) /* page present bit */ )
{
- pa = (pa & pow2_mask (55)) << pm->sys_log2_page_sz;
+ pa = (pa & pow2_mask (55)) << clib_mem_get_log2_page_size ();
}
pm->lookup_table[p] = va - pa;
p++;
{
clib_pmalloc_page_t *pp = 0;
int status, rv, i, mmap_flags;
- void *va;
+ void *va = MAP_FAILED;
int old_mpol = -1;
long unsigned int mask[16] = { 0 };
long unsigned int old_mask[16] = { 0 };
return 0;
}
- if (a->log2_subpage_sz != pm->sys_log2_page_sz)
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_sysfs_prealloc_hugepages (numa_node,
a->log2_subpage_sz, n_pages);
return 0;
}
- mmap_flags = MAP_FIXED | MAP_ANONYMOUS | MAP_LOCKED;
-
- if (a->log2_subpage_sz != pm->sys_log2_page_sz)
- mmap_flags |= MAP_HUGETLB;
+ mmap_flags = MAP_FIXED;
if (a->flags & CLIB_PMALLOC_ARENA_F_SHARED_MEM)
{
mmap_flags |= MAP_SHARED;
- if (mmap_flags & MAP_HUGETLB)
- pm->error = clib_mem_create_hugetlb_fd ((char *) a->name, &a->fd);
- else
- pm->error = clib_mem_create_fd ((char *) a->name, &a->fd);
+ a->fd = clib_mem_vm_create_fd (a->log2_subpage_sz, "%s", a->name);
if (a->fd == -1)
goto error;
+ if ((ftruncate (a->fd, size)) == -1)
+ goto error;
}
else
{
- mmap_flags |= MAP_PRIVATE;
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
+ mmap_flags |= MAP_HUGETLB;
+
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
a->fd = -1;
}
pm->error = clib_error_return_unix (0, "failed to mmap %u pages at %p "
"fd %d numa %d flags 0x%x", n_pages,
va, a->fd, numa_node, mmap_flags);
+ va = MAP_FAILED;
+ goto error;
+ }
+
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size () &&
+ mlock (va, size) != 0)
+ {
+ pm->error = clib_error_return_unix (0, "Unable to lock pages");
goto error;
}
clib_error_return (0, "page allocated on wrong node, numa node "
"%u status %d", numa_node, status);
- /* unmap & reesrve */
- munmap (va, size);
- mmap (va, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
- -1, 0);
goto error;
}
return pp - (n_pages - 1);
error:
+ if (va != MAP_FAILED)
+ {
+ /* unmap & reserve */
+ munmap (va, size);
+ mmap (va, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ }
if (a->fd != -1)
close (a->fd);
return 0;
}
-void *
+__clib_export void *
clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name,
uword size, u32 log2_page_sz, u32 numa_node)
{
if (log2_page_sz == 0)
log2_page_sz = pm->def_log2_page_sz;
else if (log2_page_sz != pm->def_log2_page_sz &&
- log2_page_sz != pm->sys_log2_page_sz)
+ log2_page_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_error_create ("unsupported page size (%uKB)",
1 << (log2_page_sz - 10));
return 0;
}
- return pm->base + (pp->index << pm->def_log2_page_sz);
+ return pm->base + ((uword) pp->index << pm->def_log2_page_sz);
}
static inline void *
return 0;
}
-void *
+__clib_export void *
clib_pmalloc_alloc_aligned_on_numa (clib_pmalloc_main_t * pm, uword size,
uword align, u32 numa_node)
{
return 1;
}
-void
+__clib_export void
clib_pmalloc_free (clib_pmalloc_main_t * pm, void *va)
{
clib_pmalloc_page_t *pp;
}
}
-static u8 *
-format_log2_page_size (u8 * s, va_list * va)
-{
- u32 log2_page_sz = va_arg (*va, u32);
-
- if (log2_page_sz >= 30)
- return format (s, "%uGB", 1 << (log2_page_sz - 30));
-
- if (log2_page_sz >= 20)
- return format (s, "%uMB", 1 << (log2_page_sz - 20));
-
- if (log2_page_sz >= 10)
- return format (s, "%uKB", 1 << (log2_page_sz - 10));
-
- return format (s, "%uB", 1 << log2_page_sz);
-}
-
-
static u8 *
format_pmalloc_page (u8 * s, va_list * va)
{
int verbose = va_arg (*va, int);
u32 indent = format_get_indent (s);
- s = format (s, "page %u: phys-addr %p ", pp->index, pp->pa);
-
if (pp->chunks == 0)
return s;
return s;
}
-u8 *
+__clib_export u8 *
format_pmalloc (u8 * s, va_list * va)
{
clib_pmalloc_main_t *pm = va_arg (*va, clib_pmalloc_main_t *);
clib_pmalloc_arena_t *a;
s = format (s, "used-pages %u reserved-pages %u default-page-size %U "
- "lookup-page-size %U", vec_len (pm->pages), pm->max_pages,
+ "lookup-page-size %U%s", vec_len (pm->pages), pm->max_pages,
format_log2_page_size, pm->def_log2_page_sz,
- format_log2_page_size, pm->lookup_log2_page_sz);
+ format_log2_page_size, pm->lookup_log2_page_sz,
+ pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP ? " no-pagemap" : "");
if (verbose >= 2)
return s;
}
+__clib_export u8 *
+format_pmalloc_map (u8 * s, va_list * va)
+{
+ clib_pmalloc_main_t *pm = va_arg (*va, clib_pmalloc_main_t *);
+
+ u32 index;
+ s = format (s, "%16s %13s %8s", "virtual-addr", "physical-addr", "size");
+ vec_foreach_index (index, pm->lookup_table)
+ {
+ uword *lookup_val, pa, va;
+ lookup_val = vec_elt_at_index (pm->lookup_table, index);
+ va =
+ pointer_to_uword (pm->base) +
+ ((uword) index << pm->lookup_log2_page_sz);
+ pa = va - *lookup_val;
+ s =
+ format (s, "\n %16p %13p %8U", uword_to_pointer (va, u64),
+ uword_to_pointer (pa, u64), format_log2_page_size,
+ pm->lookup_log2_page_sz);
+ }
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*