X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fpmalloc.c;h=85b9db9d56c59391f6751487ff1aa074c2c10654;hb=HEAD;hp=cb6c7e3ba3be9e960136d433bb584c54e6ffb82d;hpb=bdbb0c5436b52b4dc6c35d05f227cdf934306d83;p=vpp.git diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index cb6c7e3ba3b..85b9db9d56c 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -17,18 +17,21 @@ #include #include #include +#ifdef __FreeBSD__ +#include +#endif /* __FreeBSD__ */ #include #include -#include -#include #include #include -#include +#ifdef __linux__ #include +#endif #include #include #include +#include #if __SIZEOF_POINTER__ >= 8 #define DEFAULT_RESERVED_MB 16384 @@ -48,19 +51,7 @@ pmalloc_size2pages (uword size, u32 log2_page_sz) return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz; } -static inline int -pmalloc_validate_numa_node (u32 * numa_node) -{ - if (*numa_node == CLIB_PMALLOC_NUMA_LOCAL) - { - u32 cpu; - if (getcpu (&cpu, numa_node) != 0) - return 1; - } - return 0; -} - -int +__clib_export int clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size) { uword base, pagesize; @@ -196,8 +187,9 @@ next_chunk: } static void -pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count) +pmalloc_update_lookup_table (clib_pmalloc_main_t *pm, u32 first, u32 count) { +#ifdef __linux uword seek, va, pa, p; int fd; u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz); @@ -235,18 +227,55 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count) if (fd != -1) close (fd); +#elif defined(__FreeBSD__) + struct mem_extract meme; + uword p; + int fd; + u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz); + + vec_validate_aligned (pm->lookup_table, + vec_len (pm->pages) * elts_per_page - 1, + CLIB_CACHE_LINE_BYTES); + + p = (uword) first * elts_per_page; + if (pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP) + { + while (p < (uword) elts_per_page * count) + { + pm->lookup_table[p] = + pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz); + p++; + } + return; + } + + fd = open ((char *) "/dev/mem", O_RDONLY); + if (fd == -1) + return; + + while (p < (uword) elts_per_page * count) + { + meme.me_vaddr = + pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz); + if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1) + continue; + pm->lookup_table[p] = meme.me_vaddr - meme.me_paddr; + p++; + } + return; +#else +#error "Unsupported OS" +#endif } static inline clib_pmalloc_page_t * pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, u32 numa_node, u32 n_pages) { + clib_mem_page_stats_t stats = {}; clib_pmalloc_page_t *pp = 0; - int status, rv, i, mmap_flags; + int rv, i, mmap_flags; void *va = MAP_FAILED; - int old_mpol = -1; - long unsigned int mask[16] = { 0 }; - long unsigned int old_mask[16] = { 0 }; uword size = (uword) n_pages << pm->def_log2_page_sz; clib_error_free (pm->error); @@ -257,6 +286,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, return 0; } +#ifdef __linux__ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ()) { pm->error = clib_sysfs_prealloc_hugepages (numa_node, @@ -265,18 +295,10 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, if (pm->error) return 0; } +#endif /* __linux__ */ - rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0); - /* failure to get mempolicy means we can only proceed with numa 0 maps */ - if (rv == -1 && numa_node != 0) - { - pm->error = clib_error_return_unix (0, "failed to get mempolicy"); - return 0; - } - - mask[0] = 1 << numa_node; - rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1); - if (rv == -1 && numa_node != 0) + rv = clib_mem_set_numa_affinity (numa_node, /* force */ 1); + if (rv == CLIB_MEM_ERROR && numa_node != 0) { pm->error = clib_error_return_unix (0, "failed to set mempolicy for " "numa node %u", numa_node); @@ -296,8 +318,10 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, } else { +#ifdef __linux__ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ()) mmap_flags |= MAP_HUGETLB; +#endif /* __linux__ */ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; a->fd = -1; @@ -323,8 +347,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, clib_memset (va, 0, size); - rv = set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1); - if (rv == -1 && numa_node != 0) + rv = clib_mem_set_default_numa_affinity (); + if (rv == CLIB_MEM_ERROR && numa_node != 0) { pm->error = clib_error_return_unix (0, "failed to restore mempolicy"); goto error; @@ -332,14 +356,24 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, /* we tolerate move_pages failure only if request os for numa node 0 to support non-numa kernels */ - rv = move_pages (0, 1, &va, 0, &status, 0); - if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0)) + clib_mem_get_page_stats (va, CLIB_MEM_PAGE_SZ_DEFAULT, 1, &stats); + + if (stats.per_numa[numa_node] != 1 && + !(numa_node == 0 && stats.unknown == 1)) { - pm->error = rv == -1 ? - clib_error_return_unix (0, "page allocated on wrong node, numa node " - "%u status %d", numa_node, status) : - clib_error_return (0, "page allocated on wrong node, numa node " - "%u status %d", numa_node, status); + u16 allocated_at = ~0; + if (stats.unknown) + clib_error_return (0, + "unable to get information about numa allocation"); + + for (u16 i = 0; i < CLIB_MAX_NUMAS; i++) + if (stats.per_numa[i] == 1) + allocated_at = i; + + clib_error_return (0, + "page allocated on the wrong numa node (%u), " + "expected %u", + allocated_at, numa_node); goto error; } @@ -382,7 +416,7 @@ error: return 0; } -void * +__clib_export void * clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name, uword size, u32 log2_page_sz, u32 numa_node) { @@ -407,8 +441,8 @@ clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name, if (n_pages + vec_len (pm->pages) > pm->max_pages) return 0; - if (pmalloc_validate_numa_node (&numa_node)) - return 0; + if (numa_node == CLIB_PMALLOC_NUMA_LOCAL) + numa_node = clib_get_current_numa_node (); pool_get (pm->arenas, a); a->index = a - pm->arenas; @@ -438,8 +472,8 @@ clib_pmalloc_alloc_inline (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, ASSERT (is_pow2 (align)); - if (pmalloc_validate_numa_node (&numa_node)) - return 0; + if (numa_node == CLIB_PMALLOC_NUMA_LOCAL) + numa_node = clib_get_current_numa_node (); if (a == 0) { @@ -484,22 +518,22 @@ clib_pmalloc_alloc_inline (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, return 0; } -void * +__clib_export void * clib_pmalloc_alloc_aligned_on_numa (clib_pmalloc_main_t * pm, uword size, uword align, u32 numa_node) { return clib_pmalloc_alloc_inline (pm, 0, size, align, numa_node); } -void * -clib_pmalloc_alloc_aligned (clib_pmalloc_main_t * pm, uword size, uword align) +__clib_export void * +clib_pmalloc_alloc_aligned (clib_pmalloc_main_t *pm, uword size, uword align) { return clib_pmalloc_alloc_inline (pm, 0, size, align, CLIB_PMALLOC_NUMA_LOCAL); } -void * -clib_pmalloc_alloc_from_arena (clib_pmalloc_main_t * pm, void *arena_va, +__clib_export void * +clib_pmalloc_alloc_from_arena (clib_pmalloc_main_t *pm, void *arena_va, uword size, uword align) { clib_pmalloc_arena_t *a = clib_pmalloc_get_arena (pm, arena_va); @@ -528,7 +562,7 @@ pmalloc_chunks_mergeable (clib_pmalloc_arena_t * a, clib_pmalloc_page_t * pp, return 1; } -void +__clib_export void clib_pmalloc_free (clib_pmalloc_main_t * pm, void *va) { clib_pmalloc_page_t *pp; @@ -617,7 +651,7 @@ format_pmalloc_page (u8 * s, va_list * va) return s; } -u8 * +__clib_export u8 * format_pmalloc (u8 * s, va_list * va) { clib_pmalloc_main_t *pm = va_arg (*va, clib_pmalloc_main_t *); @@ -642,8 +676,7 @@ format_pmalloc (u8 * s, va_list * va) format_clib_error, pm->error); - /* *INDENT-OFF* */ - pool_foreach (a, pm->arenas, + pool_foreach (a, pm->arenas) { u32 *page_index; s = format (s, "\n%Uarena '%s' pages %u subpage-size %U numa-node %u", @@ -659,13 +692,12 @@ format_pmalloc (u8 * s, va_list * va) s = format (s, "\n%U%U", format_white_space, indent + 4, format_pmalloc_page, pp, verbose); } - }); - /* *INDENT-ON* */ + } return s; } -u8 * +__clib_export u8 * format_pmalloc_map (u8 * s, va_list * va) { clib_pmalloc_main_t *pm = va_arg (*va, clib_pmalloc_main_t *);