New upstream version 18.11-rc1
[deb_dpdk.git] / drivers / common / dpaax / dpaax_iova_table.c
diff --git a/drivers/common/dpaax/dpaax_iova_table.c b/drivers/common/dpaax/dpaax_iova_table.c
new file mode 100644 (file)
index 0000000..2dd38a9
--- /dev/null
@@ -0,0 +1,465 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#include <rte_memory.h>
+
+#include "dpaax_iova_table.h"
+#include "dpaax_logs.h"
+
+/* Global dpaax logger identifier */
+int dpaax_logger;
+
+/* Global table reference */
+struct dpaax_iova_table *dpaax_iova_table_p;
+
+static int dpaax_handle_memevents(void);
+
+/* A structure representing the device-tree node available in /proc/device-tree.
+ */
+struct reg_node {
+       phys_addr_t addr;
+       size_t len;
+};
+
+/* A ntohll equivalent routine
+ * XXX: This is only applicable for 64 bit environment.
+ */
+static void
+rotate_8(unsigned char *arr)
+{
+       uint32_t temp;
+       uint32_t *first_half;
+       uint32_t *second_half;
+
+       first_half = (uint32_t *)(arr);
+       second_half = (uint32_t *)(arr + 4);
+
+       temp = *first_half;
+       *first_half = *second_half;
+       *second_half = temp;
+
+       *first_half = ntohl(*first_half);
+       *second_half = ntohl(*second_half);
+}
+
+/* read_memory_nodes
+ * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
+ * are populated by Uboot and available in device tree:
+ * /proc/device-tree/memory@<address>/reg <= register.
+ * Entries are of the form:
+ *  (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
+ *
+ * @param count
+ *    OUT populate number of entries found in memory node
+ * @return
+ *    Pointer to array of reg_node elements, count size
+ */
+static struct reg_node *
+read_memory_node(unsigned int *count)
+{
+       int fd, ret, i;
+       unsigned int j;
+       glob_t result = {0};
+       struct stat statbuf = {0};
+       char file_data[MEM_NODE_FILE_LEN];
+       struct reg_node *nodes = NULL;
+
+       *count = 0;
+
+       ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
+       if (ret != 0) {
+               DPAAX_DEBUG("Unable to glob device-tree memory node: (%s)(%d)",
+                           MEM_NODE_PATH_GLOB, ret);
+               goto out;
+       }
+
+       if (result.gl_pathc != 1) {
+               /* Either more than one memory@<addr> node found, or none.
+                * In either case, cannot work ahead.
+                */
+               DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
+                           result.gl_pathc);
+               goto out;
+       }
+
+       DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
+                   result.gl_pathv[0]);
+       fd = open(result.gl_pathv[0], O_RDONLY);
+       if (fd < 0) {
+               DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
+                           MEM_NODE_PATH_GLOB, fd);
+               goto cleanup;
+       }
+
+       /* Stat to get the file size */
+       ret = fstat(fd, &statbuf);
+       if (ret != 0) {
+               DPAAX_DEBUG("Unable to get device-tree memory node size.");
+               goto cleanup;
+       }
+
+       DPAAX_DEBUG("Size of device-tree mem node: %lu", statbuf.st_size);
+       if (statbuf.st_size > MEM_NODE_FILE_LEN) {
+               DPAAX_DEBUG("More memory nodes available than assumed.");
+               DPAAX_DEBUG("System may not work properly!");
+       }
+
+       ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
+                                 MEM_NODE_FILE_LEN : statbuf.st_size);
+       if (ret <= 0) {
+               DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
+                           ret);
+               goto cleanup;
+       }
+
+       /* The reg node should be multiple of 16 bytes, 8 bytes each for addr
+        * and len.
+        */
+       *count = (statbuf.st_size / 16);
+       if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
+               DPAAX_DEBUG("Invalid memory node values or count. (size=%lu)",
+                           statbuf.st_size);
+               goto cleanup;
+       }
+
+       /* each entry is of 16 bytes, and size/16 is total count of entries */
+       nodes = malloc(sizeof(struct reg_node) * (*count));
+       if (!nodes) {
+               DPAAX_DEBUG("Failure in allocating working memory.");
+               goto cleanup;
+       }
+       memset(nodes, 0, sizeof(struct reg_node) * (*count));
+
+       for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
+               memcpy(&nodes[j], file_data + i, 16);
+               /* Rotate (ntohl) each 8 byte entry */
+               rotate_8((unsigned char *)(&(nodes[j].addr)));
+               rotate_8((unsigned char *)(&(nodes[j].len)));
+       }
+
+       DPAAX_DEBUG("Device-tree memory node data:");
+       do {
+               DPAAX_DEBUG("\n    %08" PRIx64 " %08zu", nodes[j].addr, nodes[j].len);
+       } while (--j);
+
+cleanup:
+       close(fd);
+       globfree(&result);
+out:
+       return nodes;
+}
+
+int
+dpaax_iova_table_populate(void)
+{
+       int ret;
+       unsigned int i, node_count;
+       size_t tot_memory_size, total_table_size;
+       struct reg_node *nodes;
+       struct dpaax_iovat_element *entry;
+
+       /* dpaax_iova_table_p is a singleton - only one instance should be
+        * created.
+        */
+       if (dpaax_iova_table_p) {
+               DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
+                           dpaax_iova_table_p);
+               /* This can be an error case as well - some path not cleaning
+                * up table - but, for now, it is assumed that if IOVA Table
+                * pointer is valid, table is allocated.
+                */
+               return 0;
+       }
+
+       nodes = read_memory_node(&node_count);
+       if (nodes == NULL) {
+               DPAAX_WARN("PA->VA translation not available;");
+               DPAAX_WARN("Expect performance impact.");
+               return -1;
+       }
+
+       tot_memory_size = 0;
+       for (i = 0; i < node_count; i++)
+               tot_memory_size += nodes[i].len;
+
+       DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
+
+       /* Total table size = meta data + tot_memory_size/8 */
+       total_table_size = sizeof(struct dpaax_iova_table) +
+                          (sizeof(struct dpaax_iovat_element) * node_count) +
+                          ((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
+
+       /* TODO: This memory doesn't need to shared but needs to be always
+        * pinned to RAM (no swap out) - using hugepage rather than malloc
+        */
+       dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
+       if (dpaax_iova_table_p == NULL) {
+               DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
+               DPAAX_WARN("PA->VA translation not available;");
+               DPAAX_WARN("Expect performance impact.");
+               free(nodes);
+               return -1;
+       }
+
+       /* Initialize table */
+       dpaax_iova_table_p->count = node_count;
+       entry = dpaax_iova_table_p->entries;
+
+       DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
+       DPAAX_DEBUG("\t(entry),(start),(len),(next)");
+
+       for (i = 0; i < node_count; i++) {
+               /* dpaax_iova_table_p
+                * |   dpaax_iova_table_p->entries
+                * |      |
+                * |      |
+                * V      V
+                * +------+------+-------+---+----------+---------+---
+                * |iova_ |entry | entry |   | pages    | pages   |
+                * |table | 1    |  2    |...| entry 1  | entry2  |
+                * +-----'+.-----+-------+---+;---------+;--------+---
+                *         \      \          /          /
+                *          `~~~~~~|~~~~~>pages        /
+                *                  \                 /
+                *                   `~~~~~~~~~~~>pages
+                */
+               entry[i].start = nodes[i].addr;
+               entry[i].len = nodes[i].len;
+               if (i > 0)
+                       entry[i].pages = entry[i-1].pages +
+                               ((entry[i-1].len/DPAAX_MEM_SPLIT));
+               else
+                       entry[i].pages = (uint64_t *)((unsigned char *)entry +
+                                        (sizeof(struct dpaax_iovat_element) *
+                                        node_count));
+
+               DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
+                           i, entry[i].start, entry[i].len, entry[i].pages);
+       }
+
+       /* Release memory associated with nodes array - not required now */
+       free(nodes);
+
+       DPAAX_DEBUG("Adding mem-event handler\n");
+       ret = dpaax_handle_memevents();
+       if (ret) {
+               DPAAX_ERR("Unable to add mem-event handler");
+               DPAAX_WARN("Cases with non-buffer pool mem won't work!");
+       }
+
+       return 0;
+}
+
+void
+dpaax_iova_table_depopulate(void)
+{
+       if (dpaax_iova_table_p == NULL)
+               return;
+
+       rte_free(dpaax_iova_table_p->entries);
+       dpaax_iova_table_p = NULL;
+
+       DPAAX_DEBUG("IOVA Table cleanedup");
+}
+
+int
+dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
+{
+       int found = 0;
+       unsigned int i;
+       size_t req_length = length, e_offset;
+       struct dpaax_iovat_element *entry;
+       uintptr_t align_vaddr;
+       phys_addr_t align_paddr;
+
+       if (unlikely(dpaax_iova_table_p == NULL))
+               return -1;
+
+       align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
+       align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
+
+       /* Check if paddr is available in table */
+       entry = dpaax_iova_table_p->entries;
+       for (i = 0; i < dpaax_iova_table_p->count; i++) {
+               if (align_paddr < entry[i].start) {
+                       /* Address lower than start, but not found in previous
+                        * iteration shouldn't exist.
+                        */
+                       DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
+                                 "(%"PRIu64")", paddr);
+                       DPAAX_ERR("Add: Lowest address: %"PRIu64"",
+                                 entry[i].start);
+                       return -1;
+               }
+
+               if (align_paddr > (entry[i].start + entry[i].len))
+                       continue;
+
+               /* align_paddr >= start && align_paddr < (start + len) */
+               found = 1;
+
+               do {
+                       e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
+                       /* TODO: Whatif something already exists at this
+                        * location - is that an error? For now, ignoring the
+                        * case.
+                        */
+                       entry[i].pages[e_offset] = align_vaddr;
+                       DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
+                                   " remaining len %zu", align_vaddr,
+                                   align_paddr, e_offset, req_length);
+
+                       /* Incoming request can be larger than the
+                        * DPAAX_MEM_SPLIT size - in which case, multiple
+                        * entries in entry->pages[] are filled up.
+                        */
+                       if (req_length <= DPAAX_MEM_SPLIT)
+                               break;
+                       align_paddr += DPAAX_MEM_SPLIT;
+                       align_vaddr += DPAAX_MEM_SPLIT;
+                       req_length -= DPAAX_MEM_SPLIT;
+               } while (1);
+
+               break;
+       }
+
+       if (!found) {
+               /* There might be case where the incoming physical address is
+                * beyond the address discovered in the memory node of
+                * device-tree. Specially if some malloc'd area is used by EAL
+                * and the memevent handlers passes that across. But, this is
+                * not necessarily an error.
+                */
+               DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
+                           " phy(%"PRIu64")",
+                           vaddr, paddr);
+               return -1;
+       }
+
+       DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
+                   " phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
+                   vaddr, paddr, length);
+       return 0;
+}
+
+/* dpaax_iova_table_dump
+ * Dump the table, with its entries, on screen. Only works in Debug Mode
+ * Not for weak hearted - the tables can get quite large
+ */
+void
+dpaax_iova_table_dump(void)
+{
+       unsigned int i, j;
+       struct dpaax_iovat_element *entry;
+
+       /* In case DEBUG is not enabled, some 'if' conditions might misbehave
+        * as they have nothing else in them  except a DPAAX_DEBUG() which if
+        * tuned out would leave 'if' naked.
+        */
+       if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
+               DPAAX_ERR("Set log level to Debug for PA->Table dump!");
+               return;
+       }
+
+       DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
+       if (dpaax_iova_table_p == NULL)
+               DPAAX_DEBUG("\tNULL");
+
+       entry = dpaax_iova_table_p->entries;
+       for (i = 0; i < dpaax_iova_table_p->count; i++) {
+               DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
+                           i, entry[i].start, entry[i].len, entry[i].pages);
+               DPAAX_DEBUG("\t\t          (PA),          (VA)");
+               for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
+                       if (entry[i].pages[j] == 0)
+                               continue;
+                       DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
+                                   (entry[i].start + (j * sizeof(uint64_t))),
+                                   entry[i].pages[j]);
+               }
+       }
+       DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
+}
+
+static void
+dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
+                 void *arg __rte_unused)
+{
+       struct rte_memseg_list *msl;
+       struct rte_memseg *ms;
+       size_t cur_len = 0, map_len = 0;
+       phys_addr_t phys_addr;
+       void *virt_addr;
+       int ret;
+
+       DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
+
+       msl = rte_mem_virt2memseg_list(addr);
+
+       while (cur_len < len) {
+               const void *va = RTE_PTR_ADD(addr, cur_len);
+
+               ms = rte_mem_virt2memseg(va, msl);
+               phys_addr = rte_mem_virt2phy(ms->addr);
+               virt_addr = ms->addr;
+               map_len = ms->len;
+
+               DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
+                           "iova=%"PRIu64", map_len=%zu",
+                           type == RTE_MEM_EVENT_ALLOC ?
+                           "alloc" : "dealloc",
+                           va, virt_addr, phys_addr, map_len);
+
+               if (type == RTE_MEM_EVENT_ALLOC)
+                       ret = dpaax_iova_table_update(phys_addr, virt_addr,
+                                                     map_len);
+               else
+                       /* In case of mem_events for MEM_EVENT_FREE, complete
+                        * hugepage is released and its PA entry is set to 0.
+                        */
+                       ret = dpaax_iova_table_update(phys_addr, 0, map_len);
+
+               if (ret != 0) {
+                       DPAAX_DEBUG("PA-Table entry update failed. "
+                                   "Map=%d, addr=%p, len=%zu, err:(%d)",
+                                   type, va, map_len, ret);
+                       return;
+               }
+
+               cur_len += map_len;
+       }
+}
+
+static int
+dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
+                           const struct rte_memseg *ms, size_t len,
+                           void *arg __rte_unused)
+{
+       DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
+                   ms->addr, ms->phys_addr, len);
+       dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
+       return 0;
+}
+
+static int
+dpaax_handle_memevents(void)
+{
+       /* First, walk through all memsegs and pin them, before installing
+        * handler. This assures that all memseg which have already been
+        * identified/allocated by EAL, are already part of PA->VA Table. This
+        * is especially for cases where application allocates memory before
+        * the EAL or this is an externally allocated memory passed to EAL.
+        */
+       rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
+
+       return rte_mem_event_callback_register("dpaax_memevents_cb",
+                                              dpaax_memevent_cb, NULL);
+}
+
+RTE_INIT(dpaax_log)
+{
+       dpaax_logger = rte_log_register("pmd.common.dpaax");
+       if (dpaax_logger >= 0)
+               rte_log_set_level(dpaax_logger, RTE_LOG_ERR);
+}