X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=lib%2Flibrte_eal%2Fcommon%2Finclude%2Frte_memory.h;h=ce9370582c1c83ebb4e724c2cd9bb357509211f6;hb=8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82;hp=f8dbece0045ca6d7b1cb1374eebbb6387805ca59;hpb=97f17497d162afdb82c8704bf097f0fee3724b2e;p=deb_dpdk.git diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index f8dbece0..ce937058 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -1,34 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation */ #ifndef _RTE_MEMORY_H_ @@ -44,16 +15,18 @@ #include #include -#ifdef RTE_EXEC_ENV_LINUXAPP -#include -#endif - #ifdef __cplusplus extern "C" { #endif #include +#include +#include + +/* forward declaration for pointers */ +struct rte_memseg_list; +__extension__ enum rte_page_sizes { RTE_PGSIZE_4K = 1ULL << 12, RTE_PGSIZE_64K = 1ULL << 16, @@ -95,30 +68,40 @@ enum rte_page_sizes { */ #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) -typedef uint64_t phys_addr_t; /**< Physical address definition. */ +typedef uint64_t phys_addr_t; /**< Physical address. */ #define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) +/** + * IO virtual address type. + * When the physical addressing mode (IOVA as PA) is in use, + * the translation from an IO virtual address (IOVA) to a physical address + * is a direct mapping, i.e. the same value. + * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation. + */ +typedef uint64_t rte_iova_t; +#define RTE_BAD_IOVA ((rte_iova_t)-1) /** * Physical memory segment descriptor. */ +#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0) +/**< Prevent this segment from being freed back to the OS. */ struct rte_memseg { - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64 bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the segment. */ uint64_t hugepage_sz; /**< The pagesize of underlying memory */ int32_t socket_id; /**< NUMA socket ID. */ uint32_t nchannel; /**< Number of channels. */ uint32_t nrank; /**< Number of ranks. */ -#ifdef RTE_LIBRTE_XEN_DOM0 - /**< store segment MFNs */ - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -#endif + uint32_t flags; /**< Memseg-specific flags */ } __rte_packed; /** @@ -139,29 +122,312 @@ int rte_mem_lock_page(const void *virt); * @param virt * The virtual address. * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. + * The physical address or RTE_BAD_IOVA on error. */ phys_addr_t rte_mem_virt2phy(const void *virt); /** - * Get the layout of the available physical memory. + * Get IO virtual address of any mapped virtual address in the current process. + * + * @param virt + * The virtual address. + * @return + * The IO address or RTE_BAD_IOVA on error. + */ +rte_iova_t rte_mem_virt2iova(const void *virt); + +/** + * Get virtual memory address corresponding to iova address. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @param iova + * The iova address. + * @return + * Virtual address corresponding to iova address (or NULL if address does not + * exist within DPDK memory map). + */ +__rte_experimental void * +rte_mem_iova2virt(rte_iova_t iova); + +/** + * Get memseg to which a particular virtual address belongs. + * + * @param virt + * The virtual address. + * @param msl + * The memseg list in which to look up based on ``virt`` address + * (can be NULL). + * @return + * Memseg pointer on success, or NULL on error. + */ +__rte_experimental struct rte_memseg * +rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); + +/** + * Get memseg list corresponding to virtual memory address. + * + * @param virt + * The virtual address. + * @return + * Memseg list to which this virtual address belongs to. + */ +__rte_experimental struct rte_memseg_list * +rte_mem_virt2memseg_list(const void *virt); + +/** + * Memseg walk function prototype. + * + * Returning 0 will continue walk + * Returning 1 will stop the walk + * Returning -1 will stop the walk and report error + */ +typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, void *arg); + +/** + * Memseg contig walk function prototype. This will trigger a callback on every + * VA-contiguous are starting at memseg ``ms``, so total valid VA space at each + * callback call will be [``ms->addr``, ``ms->addr + len``). + * + * Returning 0 will continue walk + * Returning 1 will stop the walk + * Returning -1 will stop the walk and report error + */ +typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, size_t len, void *arg); + +/** + * Memseg list walk function prototype. This will trigger a callback on every + * allocated memseg list. + * + * Returning 0 will continue walk + * Returning 1 will stop the walk + * Returning -1 will stop the walk and report error + */ +typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, + void *arg); + +/** + * Walk list of all memsegs. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @note This function will also walk through externally allocated segments. It + * is up to the user to decide whether to skip through these segments. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_walk(rte_memseg_walk_t func, void *arg); + +/** + * Walk each VA-contiguous area. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @note This function will also walk through externally allocated segments. It + * is up to the user to decide whether to skip through these segments. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); + +/** + * Walk each allocated memseg list. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @note This function will also walk through externally allocated segments. It + * is up to the user to decide whether to skip through these segments. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); + +/** + * Walk list of all memsegs without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); + +/** + * Walk each VA-contiguous area without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg); + +/** + * Walk each allocated memseg list without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg); + +/** + * Return file descriptor associated with a particular memseg (if available). + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @note This returns an internal file descriptor. Performing any operations on + * this file descriptor is inherently dangerous, so it should be treated + * as read-only for all intents and purposes. + * + * @param ms + * A pointer to memseg for which to get file descriptor. + * + * @return + * Valid file descriptor in case of success. + * -1 in case of error, with ``rte_errno`` set to the following values: + * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg + * - ENODEV - ``ms`` fd is not available + * - ENOENT - ``ms`` is an unused segment + * - ENOTSUP - segment fd's are not supported + */ +int __rte_experimental +rte_memseg_get_fd(const struct rte_memseg *ms); + +/** + * Return file descriptor associated with a particular memseg (if available). * - * It can be useful for an application to have the full physical - * memory layout to decide the size of a memory zone to reserve. This - * table is stored in rte_config (see rte_eal_get_configuration()). + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @note This returns an internal file descriptor. Performing any operations on + * this file descriptor is inherently dangerous, so it should be treated + * as read-only for all intents and purposes. + * + * @param ms + * A pointer to memseg for which to get file descriptor. + * + * @return + * Valid file descriptor in case of success. + * -1 in case of error, with ``rte_errno`` set to the following values: + * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg + * - ENODEV - ``ms`` fd is not available + * - ENOENT - ``ms`` is an unused segment + * - ENOTSUP - segment fd's are not supported + */ +int __rte_experimental +rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms); + +/** + * Get offset into segment file descriptor associated with a particular memseg + * (if available). + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @param ms + * A pointer to memseg for which to get file descriptor. + * @param offset + * A pointer to offset value where the result will be stored. * * @return - * - On success, return a pointer to a read-only table of struct - * rte_physmem_desc elements, containing the layout of all - * addressable physical memory. The last element of the table - * contains a NULL address. - * - On error, return NULL. This should not happen since it is a fatal - * error that will probably cause the entire system to panic. + * Valid file descriptor in case of success. + * -1 in case of error, with ``rte_errno`` set to the following values: + * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg + * - EINVAL - ``offset`` pointer was NULL + * - ENODEV - ``ms`` fd is not available + * - ENOENT - ``ms`` is an unused segment + * - ENOTSUP - segment fd's are not supported */ -const struct rte_memseg *rte_eal_get_physmem_layout(void); +int __rte_experimental +rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset); /** - * Dump the physical memory layout to the console. + * Get offset into segment file descriptor associated with a particular memseg + * (if available). + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param ms + * A pointer to memseg for which to get file descriptor. + * @param offset + * A pointer to offset value where the result will be stored. + * + * @return + * Valid file descriptor in case of success. + * -1 in case of error, with ``rte_errno`` set to the following values: + * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg + * - EINVAL - ``offset`` pointer was NULL + * - ENODEV - ``ms`` fd is not available + * - ENOENT - ``ms`` is an unused segment + * - ENOTSUP - segment fd's are not supported + */ +int __rte_experimental +rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, + size_t *offset); + +/** + * Dump the physical memory layout to a file. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. * * @param f * A pointer to a file for output @@ -171,6 +437,9 @@ void rte_dump_physmem_layout(FILE *f); /** * Get the total amount of available physical memory. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @return * The total amount of available physical memory in bytes. */ @@ -194,67 +463,150 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); -#ifdef RTE_LIBRTE_XEN_DOM0 +/* check memsegs iovas are within a range based on dma mask */ +int __rte_experimental rte_eal_check_dma_mask(uint8_t maskbits); + +/** + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. + * + * @return + * 1 if the system is able to obtain physical addresses. + * 0 if using DMA addresses through an IOMMU. + */ +int rte_eal_using_phys_addrs(void); + -/**< Internal use only - should DOM0 memory mapping be used */ -int rte_xen_dom0_supported(void); +/** + * Enum indicating which kind of memory event has happened. Used by callbacks to + * distinguish between memory allocations and deallocations. + */ +enum rte_mem_event { + RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */ + RTE_MEM_EVENT_FREE, /**< Deallocation event. */ +}; +#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64 +/**< maximum length of callback name */ -/**< Internal use only - phys to virt mapping for xen */ -phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t); +/** + * Function typedef used to register callbacks for memory events. + */ +typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type, + const void *addr, size_t len, void *arg); /** - * Return the physical address of elt, which is an element of the pool mp. + * Function used to register callbacks for memory events. * - * @param memseg_id - * The mempool is from which memory segment. - * @param phy_addr - * physical address of elt. + * @note callbacks will happen while memory hotplug subsystem is write-locked, + * therefore some functions (e.g. `rte_memseg_walk()`) will cause a + * deadlock when called from within such callbacks. + * + * @note mem event callbacks not being supported is an expected error condition, + * so user code needs to handle this situation. In these cases, return + * value will be -1, and rte_errno will be set to ENOTSUP. + * + * @param name + * Name associated with specified callback to be added to the list. + * + * @param clb + * Callback function pointer. + * + * @param arg + * Argument to pass to the callback. * * @return - * The physical address or error. - */ -static inline phys_addr_t -rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) -{ - if (rte_xen_dom0_supported()) - return rte_xen_mem_phy2mch(memseg_id, phy_addr); - else - return phy_addr; -} + * 0 on successful callback register + * -1 on unsuccessful callback register, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb, + void *arg); /** - * Memory init for supporting application running on Xen domain0. + * Function used to unregister callbacks for memory events. + * + * @param name + * Name associated with specified callback to be removed from the list. * - * @param void + * @param arg + * Argument to look for among callbacks with specified callback name. * * @return - * 0: successfully - * negative: error + * 0 on successful callback unregister + * -1 on unsuccessful callback unregister, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_event_callback_unregister(const char *name, void *arg); + + +#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64 +/**< maximum length of alloc validator name */ +/** + * Function typedef used to register memory allocation validation callbacks. + * + * Returning 0 will allow allocation attempt to continue. Returning -1 will + * prevent allocation from succeeding. */ -int rte_xen_dom0_memory_init(void); +typedef int (*rte_mem_alloc_validator_t)(int socket_id, + size_t cur_limit, size_t new_len); /** - * Attach to memory setments of primary process on Xen domain0. + * @brief Register validator callback for memory allocations. + * + * Callbacks registered by this function will be called right before memory + * allocator is about to trigger allocation of more pages from the system if + * said allocation will bring total memory usage above specified limit on + * specified socket. User will be able to cancel pending allocation if callback + * returns -1. + * + * @note callbacks will happen while memory hotplug subsystem is write-locked, + * therefore some functions (e.g. `rte_memseg_walk()`) will cause a + * deadlock when called from within such callbacks. * - * @param void + * @note validator callbacks not being supported is an expected error condition, + * so user code needs to handle this situation. In these cases, return + * value will be -1, and rte_errno will be set to ENOTSUP. + * + * @param name + * Name associated with specified callback to be added to the list. + * + * @param clb + * Callback function pointer. + * + * @param socket_id + * Socket ID on which to watch for allocations. + * + * @param limit + * Limit above which to trigger callbacks. * * @return - * 0: successfully - * negative: error + * 0 on successful callback register + * -1 on unsuccessful callback register, with rte_errno value indicating + * reason for failure. */ -int rte_xen_dom0_memory_attach(void); -#else -static inline int rte_xen_dom0_supported(void) -{ - return 0; -} +int __rte_experimental +rte_mem_alloc_validator_register(const char *name, + rte_mem_alloc_validator_t clb, int socket_id, size_t limit); -static inline phys_addr_t -rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr) -{ - return phy_addr; -} -#endif +/** + * @brief Unregister validator callback for memory allocations. + * + * @param name + * Name associated with specified callback to be removed from the list. + * + * @param socket_id + * Socket ID on which to watch for allocations. + * + * @return + * 0 on successful callback unregister + * -1 on unsuccessful callback unregister, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_alloc_validator_unregister(const char *name, int socket_id); #ifdef __cplusplus }