X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_memory.c;h=bac969a1285a1cb6f51b45a79fb26a9ab2583b02;hb=6e7cbd63706f3435b9d9a2057a37db1da01db9a7;hp=52791282f383db0a5ec0c2b22fe13b9eb01aa973;hpb=f239aed5e674965691846e8ce3f187dd47523689;p=deb_dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 52791282..bac969a1 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -59,7 +59,6 @@ #include #include -#include #include #include #include @@ -75,13 +74,6 @@ #define PFN_MASK_SIZE 8 -#ifdef RTE_LIBRTE_XEN_DOM0 -int rte_xen_dom0_supported(void) -{ - return internal_config.xen_dom0_support; -} -#endif - /** * @file * Huge page mapping under linux @@ -96,6 +88,23 @@ int rte_xen_dom0_supported(void) static uint64_t baseaddr_offset; +#ifdef RTE_ARCH_64 +/* + * Linux kernel uses a really high address as starting address for serving + * mmaps calls. If there exists addressing limitations and IOVA mode is VA, + * this starting address is likely too high for those devices. However, it + * is possible to use a lower address in the process virtual address space + * as with 64 bits there is a lot of available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting address + * at 4GB implies there are 508GB or 1020GB for mapping the available + * hugepages. This is likely enough for most systems, although a device with + * addressing limitations should call rte_dev_check_dma_mask for ensuring all + * memory is within supported range. + */ +static uint64_t baseaddr = 0x100000000; +#endif + static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" @@ -103,13 +112,9 @@ static bool phys_addrs_available = true; static void test_phys_addrs_available(void) { - uint64_t tmp; + uint64_t tmp = 0; phys_addr_t physaddr; - /* For dom0, phys addresses can always be available */ - if (rte_xen_dom0_supported()) - return; - if (!rte_eal_has_hugepages()) { RTE_LOG(ERR, EAL, "Started without hugepages support, physical addresses not available\n"); @@ -119,10 +124,11 @@ test_phys_addrs_available(void) physaddr = rte_mem_virt2phy(&tmp); if (physaddr == RTE_BAD_PHYS_ADDR) { - RTE_LOG(ERR, EAL, - "Cannot obtain physical addresses: %s. " - "Only vfio will function.\n", - strerror(errno)); + if (rte_eal_iova_mode() == RTE_IOVA_PA) + RTE_LOG(ERR, EAL, + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", + strerror(errno)); phys_addrs_available = false; } } @@ -139,32 +145,9 @@ rte_mem_virt2phy(const void *virtaddr) int page_size; off_t offset; - /* when using dom0, /proc/self/pagemap always returns 0, check in - * dpdk memory by browsing the memsegs */ - if (rte_xen_dom0_supported()) { - struct rte_mem_config *mcfg; - struct rte_memseg *memseg; - unsigned i; - - mcfg = rte_eal_get_configuration()->mem_config; - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - memseg = &mcfg->memseg[i]; - if (memseg->addr == NULL) - break; - if (virtaddr > memseg->addr && - virtaddr < RTE_PTR_ADD(memseg->addr, - memseg->len)) { - return memseg->phys_addr + - RTE_PTR_DIFF(virtaddr, memseg->addr); - } - } - - return RTE_BAD_PHYS_ADDR; - } - /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ if (!phys_addrs_available) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; /* standard page size */ page_size = getpagesize(); @@ -173,7 +156,7 @@ rte_mem_virt2phy(const void *virtaddr) if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } virt_pfn = (unsigned long)virtaddr / page_size; @@ -182,7 +165,7 @@ rte_mem_virt2phy(const void *virtaddr) RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } retval = read(fd, &page, PFN_MASK_SIZE); @@ -190,12 +173,12 @@ rte_mem_virt2phy(const void *virtaddr) if (retval < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } else if (retval != PFN_MASK_SIZE) { RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap " "but expected %d:\n", __func__, retval, PFN_MASK_SIZE); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } /* @@ -203,7 +186,7 @@ rte_mem_virt2phy(const void *virtaddr) * pagemap.txt in linux Documentation) */ if ((page & 0x7fffffffffffffULL) == 0) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -211,6 +194,14 @@ rte_mem_virt2phy(const void *virtaddr) return physaddr; } +rte_iova_t +rte_mem_virt2iova(const void *virtaddr) +{ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + return (uintptr_t)virtaddr; + return rte_mem_virt2phy(virtaddr); +} + /* * For each hugepage in hugepg_tbl, fill the physaddr value. We find * it by browsing the /proc/self/pagemap special file. @@ -276,6 +267,23 @@ aslr_enabled(void) } } +static void * +get_addr_hint(void) +{ + if (internal_config.base_virtaddr != 0) { + return (void *) (uintptr_t) + (internal_config.base_virtaddr + + baseaddr_offset); + } else { +#ifdef RTE_ARCH_64 + return (void *) (uintptr_t) (baseaddr + + baseaddr_offset); +#else + return NULL; +#endif + } +} + /* * Try to mmap *size bytes in /dev/zero. If it is successful, return the * pointer to the mmap'd area and keep *size unmodified. Else, retry @@ -286,16 +294,10 @@ aslr_enabled(void) static void * get_virtual_area(size_t *size, size_t hugepage_sz) { - void *addr; + void *addr, *addr_hint; int fd; long aligned_addr; - if (internal_config.base_virtaddr != 0) { - addr = (void*) (uintptr_t) (internal_config.base_virtaddr + - baseaddr_offset); - } - else addr = NULL; - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); fd = open("/dev/zero", O_RDONLY); @@ -304,7 +306,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz) return NULL; } do { - addr = mmap(addr, + addr_hint = get_addr_hint(); + + addr = mmap(addr_hint, (*size) + hugepage_sz, PROT_READ, #ifdef RTE_ARCH_PPC_64 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, @@ -312,8 +316,15 @@ get_virtual_area(size_t *size, size_t hugepage_sz) MAP_PRIVATE, #endif fd, 0); - if (addr == MAP_FAILED) + if (addr == MAP_FAILED) { + /* map failed. Let's try with less memory */ *size -= hugepage_sz; + } else if (addr_hint && addr != addr_hint) { + /* hint was not used. Try with another offset */ + munmap(addr, (*size) + hugepage_sz); + addr = MAP_FAILED; + baseaddr_offset += 0x100000000; + } } while (addr == MAP_FAILED && *size > 0); if (addr == MAP_FAILED) { @@ -370,7 +381,7 @@ void numa_error(char *where) * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to - * map continguous physical blocks in contiguous virtual blocks. + * map contiguous physical blocks in contiguous virtual blocks. */ static unsigned map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, @@ -517,6 +528,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, hugepg_tbl[i].orig_va = virtaddr; } else { + /* rewrite physical addresses in IOVA as VA mode */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + hugepg_tbl[i].physaddr = (uintptr_t)virtaddr; hugepg_tbl[i].final_va = virtaddr; } @@ -716,6 +730,8 @@ create_shared_memory(const char *filename, const size_t mem_size) } retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); + if (retval == MAP_FAILED) + return NULL; return retval; } @@ -1059,7 +1075,10 @@ rte_eal_hugepage_init(void) strerror(errno)); return -1; } - mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + mcfg->memseg[0].iova = (uintptr_t)addr; + else + mcfg->memseg[0].iova = RTE_BAD_IOVA; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -1067,17 +1086,6 @@ rte_eal_hugepage_init(void) return 0; } -/* check if app runs on Xen Dom0 */ - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - /* use dom0_mm kernel driver to init memory */ - if (rte_xen_dom0_memory_init() < 0) - return -1; - else - return 0; -#endif - } - /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1141,7 +1149,8 @@ rte_eal_hugepage_init(void) continue; } - if (phys_addrs_available) { + if (phys_addrs_available && + rte_eal_iova_mode() != RTE_IOVA_VA) { /* find physical addresses for each hugepage */ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { RTE_LOG(DEBUG, EAL, "Failed to find phys addr " @@ -1319,7 +1328,7 @@ rte_eal_hugepage_init(void) if (j == RTE_MAX_MEMSEG) break; - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; mcfg->memseg[j].len = hugepage[i].size; mcfg->memseg[j].socket_id = hugepage[i].socket_id; @@ -1330,7 +1339,7 @@ rte_eal_hugepage_init(void) #ifdef RTE_ARCH_PPC_64 /* Use the phy and virt address of the last page as segment * address for IBM Power architecture */ - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; #endif mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; @@ -1400,17 +1409,6 @@ rte_eal_hugepage_attach(void) test_phys_addrs_available(); - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary " - "process\n"); - return -1; - } - return 0; -#endif - } - fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); @@ -1542,7 +1540,7 @@ error: return -1; } -bool +int rte_eal_using_phys_addrs(void) { return phys_addrs_available;