X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Flinux%2Fphysmem.c;h=6c6b072f70a287afa676beb6a0675b175e0e499c;hb=6a5adc369591fcac2447e9809deaa22f56b53911;hp=d8c5dc9b2f2044d67ee5a0b1a599ae12bc4f6768;hpb=d9226b25f145c64e5bc4a38c3fee7e9b2eaac2de;p=vpp.git diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c old mode 100644 new mode 100755 index d8c5dc9b2f2..6c6b072f70a --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -43,14 +43,15 @@ #include #include #include -#include -#include +#include +#include +#include #include #include #include -#include -#include +#include +#include static void * unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, @@ -68,23 +69,28 @@ unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, while (1) { +#if USE_DLMALLOC == 0 + mheap_get_aligned (pr->heap, n_bytes, /* align */ alignment, /* align offset */ 0, &lo_offset); +#else + lo_offset = (uword) mspace_get_aligned (pr->heap, n_bytes, + alignment, ~0ULL /* offset */ ); + if (lo_offset == 0) + lo_offset = ~0ULL; +#endif /* Allocation failed? */ if (lo_offset == ~0) break; - if (pr->flags & VLIB_PHYSMEM_F_FAKE) - break; - /* Make sure allocation does not span DMA physical chunk boundary. */ hi_offset = lo_offset + n_bytes - 1; - if ((lo_offset >> pr->log2_page_size) == - (hi_offset >> pr->log2_page_size)) + if (((pointer_to_uword (pr->heap) + lo_offset) >> pr->log2_page_size) == + ((pointer_to_uword (pr->heap) + hi_offset) >> pr->log2_page_size)) break; /* Allocation would span chunk boundary, queue it to be freed as soon as @@ -96,7 +102,13 @@ unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, { uword i; for (i = 0; i < vec_len (to_free); i++) - mheap_put (pr->heap, to_free[i]); + { +#if USE_DLMALLOC == 0 + mheap_put (pr->heap, to_free[i]); +#else + mspace_put_no_offset (pr->heap, (void *) to_free[i]); +#endif + } vec_free (to_free); } @@ -108,32 +120,11 @@ unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) { vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); /* Return object to region's heap. */ +#if USE_DLMALLOC == 0 mheap_put (pr->heap, x - pr->heap); -} - -static u64 -get_page_paddr (int fd, uword addr) -{ - int pagesize = sysconf (_SC_PAGESIZE); - u64 seek, pagemap = 0; - - seek = ((u64) addr / pagesize) * sizeof (u64); - if (lseek (fd, seek, SEEK_SET) != seek) - { - clib_unix_warning ("lseek to 0x%llx", seek); - return 0; - } - if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) - { - clib_unix_warning ("read ptbits"); - return 0; - } - if ((pagemap & (1ULL << 63)) == 0) - return 0; - - pagemap &= pow2_mask (55); - - return pagemap * pagesize; +#else + mspace_put_no_offset (pr->heap, x); +#endif } static clib_error_t * @@ -141,19 +132,11 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, u8 numa_node, u32 flags, vlib_physmem_region_index_t * idx) { - vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_main_t *vpm = &physmem_main; vlib_physmem_region_t *pr; clib_error_t *error = 0; - int pagemap_fd = -1; - u8 *mount_dir = 0; - u8 *filename = 0; - struct stat st; - int old_mpol; - int mmap_flags; - struct bitmask *old_mask = numa_allocate_nodemask (); - - if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) - return clib_error_return (0, "not allowed"); + clib_mem_vm_alloc_t alloc = { 0 }; + int i; pool_get (vpm->regions, pr); @@ -163,194 +146,86 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, goto error; } - pr->index = pr - vpm->regions; - pr->fd = -1; - pr->flags = flags; + alloc.name = name; + alloc.size = size; + alloc.numa_node = numa_node; - if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) - == -1) - { - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; - } - else - old_mpol = -1; - } + alloc.flags = (flags & VLIB_PHYSMEM_F_SHARED) ? + CLIB_MEM_VM_F_SHARED : CLIB_MEM_VM_F_LOCKED; - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + if ((flags & VLIB_PHYSMEM_F_HUGETLB)) { - if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) - { - error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); - goto error; - } - - mount_dir = format (0, "%s/physmem_region%d%c", - vlib_unix_get_runtime_dir (), pr->index, 0); - filename = format (0, "%s/mem%c", mount_dir, 0); - - unlink ((char *) mount_dir); - - error = vlib_unix_recursive_mkdir ((char *) mount_dir); - if (error) - goto error; - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - error = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - goto error; - } - - if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) - { - error = clib_error_return_unix (0, "open"); - goto error; - } - - mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC; + alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE; } else { - if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - error = - clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - goto error; - } - mmap_flags = MAP_SHARED; + alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER; } - if (fstat (pr->fd, &st)) - { - error = clib_error_return_unix (0, "fstat"); - goto error; - } - - pr->log2_page_size = min_log2 (st.st_blksize); - pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; - size = pr->n_pages * (1 << pr->log2_page_size); - - if ((ftruncate (pr->fd, size)) == -1) - { - error = clib_error_return_unix (0, "ftruncate length: %d", size); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = vlib_sysfs_prealloc_hugepages (numa_node, - 1 << (pr->log2_page_size - 10), - pr->n_pages); - if (error) - goto error; - } - - if (old_mpol != -1) - numa_set_preferred (numa_node); - - pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); - - if (pr->mem == MAP_FAILED) - { - pr->mem = 0; - error = clib_error_return_unix (0, "mmap"); - goto error; - } - - if (old_mpol != -1 && - set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) - { - error = clib_error_return_unix (0, "set_mempolicy"); - goto error; - } + error = clib_mem_vm_ext_alloc (&alloc); + if (error) + goto error; - pr->size = pr->n_pages << pr->log2_page_size; + pr->index = pr - vpm->regions; + pr->flags = flags; + pr->fd = alloc.fd; + pr->mem = alloc.addr; + pr->log2_page_size = alloc.log2_page_size; + pr->n_pages = alloc.n_pages; + pr->size = (u64) pr->n_pages << (u64) pr->log2_page_size; pr->page_mask = (1 << pr->log2_page_size) - 1; pr->numa_node = numa_node; - pr->name = format (0, "%s", name); + pr->name = format (0, "%s%c", name, 0); - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + for (i = 0; i < pr->n_pages; i++) { - int i; - for (i = 0; i < pr->n_pages; i++) + void *ptr = pr->mem + ((u64) i << pr->log2_page_size); + int node; + if ((move_pages (0, 1, &ptr, 0, &node, 0) == 0) && (numa_node != node)) { - void *ptr = pr->mem + (i << pr->log2_page_size); - int node; - move_pages (0, 1, &ptr, 0, &node, 0); - if (numa_node != node) - { - clib_warning - ("physmem page for region \'%s\' allocated on the wrong" - " numa node (requested %u actual %u)", pr->name, - pr->numa_node, node, i); - break; - } + clib_warning ("physmem page for region \'%s\' allocated on the" + " wrong numa node (requested %u actual %u)", + pr->name, pr->numa_node, node, i); + break; } } + pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size, + pr->n_pages); + + linux_vfio_dma_map_regions (vm); + if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) { +#if USE_DLMALLOC == 0 pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, /* Don't want mheap mmap/munmap with IO memory. */ MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE); - } - - if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) - { - vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size); +#else + pr->heap = create_mspace_with_base (pr->mem, pr->size, 1 /* locked */ ); + mspace_disable_expand (pr->heap); +#endif } *idx = pr->index; - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - uword vaddr = - pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); - u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); - vec_add1 (pr->page_table, page_paddr); - } - } - goto done; error: - if (pr->fd > -1) - close (pr->fd); - - if (pr->mem) - munmap (pr->mem, size); - memset (pr, 0, sizeof (*pr)); pool_put (vpm->regions, pr); done: - if (mount_dir) - { - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - vec_free (mount_dir); - } - numa_free_cpumask (old_mask); - vec_free (filename); - if (pagemap_fd > -1) - close (pagemap_fd); return error; } static void unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) { - vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_main_t *vpm = &physmem_main; vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); if (pr->fd > 0) @@ -363,12 +238,23 @@ unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) clib_error_t * unix_physmem_init (vlib_main_t * vm) { + vlib_physmem_main_t *vpm = &physmem_main; clib_error_t *error = 0; + u64 *pt = 0; /* Avoid multiple calls. */ if (vm->os_physmem_alloc_aligned) return error; + /* check if pagemap is accessible */ + pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1); + if (pt[0]) + vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP; + vec_free (pt); + + if ((error = linux_vfio_init (vm))) + return error; + vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; vm->os_physmem_free = unix_physmem_free; vm->os_physmem_region_alloc = unix_physmem_region_alloc; @@ -381,7 +267,7 @@ static clib_error_t * show_physmem (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_main_t *vpm = &physmem_main; vlib_physmem_region_t *pr; /* *INDENT-OFF* */