X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Flinux%2Fmem.c;h=1b3694b1af436e4143e0ded26a1135eb771e979e;hb=459a0c4e3be1473c4c2b93811280c738e60d0524;hp=96fb0db5b733ee8724b5e396fd617525bd9028cc;hpb=6bfd07670b991c30761ef74fb09f42181dbfd182;p=vpp.git diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 96fb0db5b73..1b3694b1af4 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -54,6 +54,10 @@ #define MAP_HUGE_SHIFT 26 #endif +#ifndef MFD_HUGE_SHIFT +#define MFD_HUGE_SHIFT 26 +#endif + #ifndef MAP_FIXED_NOREPLACE #define MAP_FIXED_NOREPLACE 0x100000 #endif @@ -192,76 +196,10 @@ clib_mem_vm_randomize_va (uword * requested_va, (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size); } -clib_error_t * -clib_mem_create_fd (char *name, int *fdp) -{ - int fd; - - ASSERT (name); - - if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - close (fd); - return clib_error_return_unix (0, "fcntl (F_ADD_SEALS)"); - } - - *fdp = fd; - return 0; -} - -clib_error_t * -clib_mem_create_hugetlb_fd (char *name, int *fdp) -{ - clib_error_t *err = 0; - int fd = -1; - static int memfd_hugetlb_supported = 1; - char *mount_dir; - char template[] = "/tmp/hugepage_mount.XXXXXX"; - u8 *filename; - - ASSERT (name); - - if (memfd_hugetlb_supported) - { - if ((fd = memfd_create (name, MFD_HUGETLB)) != -1) - goto done; - - /* avoid further tries if memfd MFD_HUGETLB is not supported */ - if (errno == EINVAL && strnlen (name, 256) <= 249) - memfd_hugetlb_supported = 0; - } - - mount_dir = mkdtemp (template); - if (mount_dir == 0) - return clib_error_return_unix (0, "mkdtemp \'%s\'", template); - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - rmdir ((char *) mount_dir); - err = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - } - - filename = format (0, "%s/%s%c", mount_dir, name, 0); - fd = open ((char *) filename, O_CREAT | O_RDWR, 0755); - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - - if (fd == -1) - err = clib_error_return_unix (0, "open"); - -done: - if (fd != -1) - fdp[0] = fd; - return err; -} - clib_error_t * clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) { + clib_mem_main_t *mm = &clib_mem_main; int fd = -1; clib_error_t *err = 0; void *addr = 0; @@ -301,15 +239,16 @@ clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) /* if hugepages are needed we need to create mount point */ if (a->flags & CLIB_MEM_VM_F_HUGETLB) { - if ((err = clib_mem_create_hugetlb_fd (a->name, &fd))) - goto error; - + log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE; mmap_flags |= MAP_LOCKED; } else + log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT; + + if ((fd = clib_mem_vm_create_fd (log2_page_size, "%s", a->name)) == -1) { - if ((err = clib_mem_create_fd (a->name, &fd))) - goto error; + err = clib_error_return (0, "%U", format_clib_error, mm->error); + goto error; } log2_page_size = clib_mem_get_fd_log2_page_size (fd); @@ -414,6 +353,111 @@ clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a) } } +static int +legacy_memfd_create (u8 * name) +{ + clib_mem_main_t *mm = &clib_mem_main; + int fd = -1; + char *mount_dir; + u8 *filename; + + /* create mount directory */ + if ((mount_dir = mkdtemp ("/tmp/hugepage_mount.XXXXXX")) == 0) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "mkdtemp"); + return CLIB_MEM_ERROR; + } + + if (mount ("none", mount_dir, "hugetlbfs", 0, NULL)) + { + rmdir ((char *) mount_dir); + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "mount"); + return CLIB_MEM_ERROR; + } + + filename = format (0, "%s/%s%c", mount_dir, name, 0); + + if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "mkdtemp"); + } + + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + vec_free (filename); + + return fd; +} + +int +clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...) +{ + clib_mem_main_t *mm = &clib_mem_main; + int fd; + unsigned int memfd_flags; + va_list va; + u8 *s = 0; + + if (log2_page_size == mm->log2_page_sz) + log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT; + + switch (log2_page_size) + { + case CLIB_MEM_PAGE_SZ_UNKNOWN: + return CLIB_MEM_ERROR; + case CLIB_MEM_PAGE_SZ_DEFAULT: + memfd_flags = MFD_ALLOW_SEALING; + break; + case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE: + memfd_flags = MFD_HUGETLB; + break; + default: + memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT; + } + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + /* memfd_create maximum string size is 249 chars without trailing zero */ + if (vec_len (s) > 249) + _vec_len (s) = 249; + vec_add1 (s, 0); + + /* memfd_create introduced in kernel 3.17, we don't support older kernels */ + fd = memfd_create ((char *) s, memfd_flags); + + /* kernel versions < 4.14 does not support memfd_create for huge pages */ + if (fd == -1 && errno == EINVAL && + log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE) + { + fd = legacy_memfd_create (s); + } + else if (fd == -1) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "memfd_create"); + vec_free (s); + return CLIB_MEM_ERROR; + } + + vec_free (s); + + if ((memfd_flags & MFD_ALLOW_SEALING) && + ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)"); + close (fd); + return CLIB_MEM_ERROR; + } + + return fd; +} + uword clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz) { @@ -579,6 +623,7 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz, hdr->base_addr = (uword) base; hdr->log2_page_sz = log2_page_sz; hdr->num_pages = size >> log2_page_sz; + hdr->fd = fd; snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name); hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0; mprotect (hdr, sys_page_sz, PROT_NONE); @@ -595,11 +640,11 @@ clib_mem_vm_unmap (void *base) clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;; if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0) - return -1; + return CLIB_MEM_ERROR; size = hdr->num_pages << hdr->log2_page_sz; if (munmap ((void *) hdr->base_addr, size) != 0) - return -1; + return CLIB_MEM_ERROR; if (hdr->next) { @@ -620,7 +665,7 @@ clib_mem_vm_unmap (void *base) mm->first_map = hdr->next; if (munmap (hdr, sys_page_sz) != 0) - return -1; + return CLIB_MEM_ERROR; return 0; } @@ -705,62 +750,53 @@ done: return r; } -clib_error_t * -clib_mem_vm_ext_map (clib_mem_vm_map_t * a) +int +clib_mem_set_numa_affinity (u8 numa_node, int force) { - long unsigned int old_mask[16] = { 0 }; - int mmap_flags = MAP_SHARED; - clib_error_t *err = 0; - int old_mpol = -1; - void *addr; - int rv; + clib_mem_main_t *mm = &clib_mem_main; + long unsigned int mask[16] = { 0 }; + int mask_len = sizeof (mask) * 8 + 1; - if (a->numa_node) + /* no numa support */ + if (mm->numa_node_bitmap == 0) { - rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, - 0); - - if (rv == -1) + if (numa_node) { - err = clib_error_return_unix (0, "get_mempolicy"); - goto done; + vec_reset_length (mm->error); + mm->error = clib_error_return (mm->error, "%s: numa not supported", + (char *) __func__); + return CLIB_MEM_ERROR; } + else + return 0; } - if (a->requested_va) - mmap_flags |= MAP_FIXED; + mask[0] = 1 << numa_node; - if (old_mpol != -1) - { - long unsigned int mask[16] = { 0 }; - mask[0] = 1 << a->numa_node; - rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1); - if (rv == -1) - { - err = clib_error_return_unix (0, "set_mempolicy"); - goto done; - } - } + if (set_mempolicy (force ? MPOL_BIND : MPOL_PREFERRED, mask, mask_len)) + goto error; - addr = (void *) mmap (uword_to_pointer (a->requested_va, void *), a->size, - PROT_READ | PROT_WRITE, mmap_flags, a->fd, 0); + vec_reset_length (mm->error); + return 0; - if (addr == MAP_FAILED) - return clib_error_return_unix (0, "mmap"); +error: + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, (char *) __func__); + return CLIB_MEM_ERROR; +} - /* re-apply old numa memory policy */ - if (old_mpol != -1 && - set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1) +int +clib_mem_set_default_numa_affinity () +{ + clib_mem_main_t *mm = &clib_mem_main; + + if (set_mempolicy (MPOL_DEFAULT, 0, 0)) { - err = clib_error_return_unix (0, "set_mempolicy"); - goto done; + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, (char *) __func__); + return CLIB_MEM_ERROR; } - - a->addr = addr; - CLIB_MEM_UNPOISON (addr, a->size); - -done: - return err; + return 0; } /*