New upstream version 18.08
[deb_dpdk.git] / lib / librte_eal / bsdapp / eal / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_eal_memconfig.h>
15 #include <rte_errno.h>
16 #include <rte_log.h>
17 #include <rte_string_fns.h>
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21
22 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
23
24 /*
25  * Get physical address of any mapped virtual address in the current process.
26  */
27 phys_addr_t
28 rte_mem_virt2phy(const void *virtaddr)
29 {
30         /* XXX not implemented. This function is only used by
31          * rte_mempool_virt2iova() when hugepages are disabled. */
32         (void)virtaddr;
33         return RTE_BAD_IOVA;
34 }
35 rte_iova_t
36 rte_mem_virt2iova(const void *virtaddr)
37 {
38         return rte_mem_virt2phy(virtaddr);
39 }
40
41 int
42 rte_eal_hugepage_init(void)
43 {
44         struct rte_mem_config *mcfg;
45         uint64_t total_mem = 0;
46         void *addr;
47         unsigned int i, j, seg_idx = 0;
48
49         /* get pointer to global configuration */
50         mcfg = rte_eal_get_configuration()->mem_config;
51
52         /* for debug purposes, hugetlbfs can be disabled */
53         if (internal_config.no_hugetlbfs) {
54                 struct rte_memseg_list *msl;
55                 struct rte_fbarray *arr;
56                 struct rte_memseg *ms;
57                 uint64_t page_sz;
58                 int n_segs, cur_seg;
59
60                 /* create a memseg list */
61                 msl = &mcfg->memsegs[0];
62
63                 page_sz = RTE_PGSIZE_4K;
64                 n_segs = internal_config.memory / page_sz;
65
66                 if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
67                                 sizeof(struct rte_memseg))) {
68                         RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
69                         return -1;
70                 }
71
72                 addr = mmap(NULL, internal_config.memory,
73                                 PROT_READ | PROT_WRITE,
74                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
75                 if (addr == MAP_FAILED) {
76                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
77                                         strerror(errno));
78                         return -1;
79                 }
80                 msl->base_va = addr;
81                 msl->page_sz = page_sz;
82                 msl->socket_id = 0;
83
84                 /* populate memsegs. each memseg is 1 page long */
85                 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
86                         arr = &msl->memseg_arr;
87
88                         ms = rte_fbarray_get(arr, cur_seg);
89                         if (rte_eal_iova_mode() == RTE_IOVA_VA)
90                                 ms->iova = (uintptr_t)addr;
91                         else
92                                 ms->iova = RTE_BAD_IOVA;
93                         ms->addr = addr;
94                         ms->hugepage_sz = page_sz;
95                         ms->len = page_sz;
96                         ms->socket_id = 0;
97
98                         rte_fbarray_set_used(arr, cur_seg);
99
100                         addr = RTE_PTR_ADD(addr, page_sz);
101                 }
102                 return 0;
103         }
104
105         /* map all hugepages and sort them */
106         for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
107                 struct hugepage_info *hpi;
108                 rte_iova_t prev_end = 0;
109                 int prev_ms_idx = -1;
110                 uint64_t page_sz, mem_needed;
111                 unsigned int n_pages, max_pages;
112
113                 hpi = &internal_config.hugepage_info[i];
114                 page_sz = hpi->hugepage_sz;
115                 max_pages = hpi->num_pages[0];
116                 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
117                                 page_sz);
118
119                 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
120
121                 for (j = 0; j < n_pages; j++) {
122                         struct rte_memseg_list *msl;
123                         struct rte_fbarray *arr;
124                         struct rte_memseg *seg;
125                         int msl_idx, ms_idx;
126                         rte_iova_t physaddr;
127                         int error;
128                         size_t sysctl_size = sizeof(physaddr);
129                         char physaddr_str[64];
130                         bool is_adjacent;
131
132                         /* first, check if this segment is IOVA-adjacent to
133                          * the previous one.
134                          */
135                         snprintf(physaddr_str, sizeof(physaddr_str),
136                                         "hw.contigmem.physaddr.%d", j);
137                         error = sysctlbyname(physaddr_str, &physaddr,
138                                         &sysctl_size, NULL, 0);
139                         if (error < 0) {
140                                 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
141                                                 "from %s\n", j, hpi->hugedir);
142                                 return -1;
143                         }
144
145                         is_adjacent = prev_end != 0 && physaddr == prev_end;
146                         prev_end = physaddr + hpi->hugepage_sz;
147
148                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
149                                         msl_idx++) {
150                                 bool empty, need_hole;
151                                 msl = &mcfg->memsegs[msl_idx];
152                                 arr = &msl->memseg_arr;
153
154                                 if (msl->page_sz != page_sz)
155                                         continue;
156
157                                 empty = arr->count == 0;
158
159                                 /* we need a hole if this isn't an empty memseg
160                                  * list, and if previous segment was not
161                                  * adjacent to current one.
162                                  */
163                                 need_hole = !empty && !is_adjacent;
164
165                                 /* we need 1, plus hole if not adjacent */
166                                 ms_idx = rte_fbarray_find_next_n_free(arr,
167                                                 0, 1 + (need_hole ? 1 : 0));
168
169                                 /* memseg list is full? */
170                                 if (ms_idx < 0)
171                                         continue;
172
173                                 if (need_hole && prev_ms_idx == ms_idx - 1)
174                                         ms_idx++;
175                                 prev_ms_idx = ms_idx;
176
177                                 break;
178                         }
179                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
180                                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
181                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
182                                         RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
183                                 return -1;
184                         }
185                         arr = &msl->memseg_arr;
186                         seg = rte_fbarray_get(arr, ms_idx);
187
188                         addr = RTE_PTR_ADD(msl->base_va,
189                                         (size_t)msl->page_sz * ms_idx);
190
191                         /* address is already mapped in memseg list, so using
192                          * MAP_FIXED here is safe.
193                          */
194                         addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
195                                         MAP_SHARED | MAP_FIXED,
196                                         hpi->lock_descriptor,
197                                         j * EAL_PAGE_SIZE);
198                         if (addr == MAP_FAILED) {
199                                 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
200                                                 j, hpi->hugedir);
201                                 return -1;
202                         }
203
204                         seg->addr = addr;
205                         seg->iova = physaddr;
206                         seg->hugepage_sz = page_sz;
207                         seg->len = page_sz;
208                         seg->nchannel = mcfg->nchannel;
209                         seg->nrank = mcfg->nrank;
210                         seg->socket_id = 0;
211
212                         rte_fbarray_set_used(arr, ms_idx);
213
214                         RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
215                                         PRIx64", len %zu\n",
216                                         seg_idx++, addr, physaddr, page_sz);
217
218                         total_mem += seg->len;
219                 }
220                 if (total_mem >= internal_config.memory)
221                         break;
222         }
223         if (total_mem < internal_config.memory) {
224                 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
225                                 "requested: %" PRIu64 "M "
226                                 "available: %" PRIu64 "M\n",
227                                 internal_config.memory >> 20, total_mem >> 20);
228                 return -1;
229         }
230         return 0;
231 }
232
233 struct attach_walk_args {
234         int fd_hugepage;
235         int seg_idx;
236 };
237 static int
238 attach_segment(const struct rte_memseg_list *msl __rte_unused,
239                 const struct rte_memseg *ms, void *arg)
240 {
241         struct attach_walk_args *wa = arg;
242         void *addr;
243
244         addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
245                         MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
246                         wa->seg_idx * EAL_PAGE_SIZE);
247         if (addr == MAP_FAILED || addr != ms->addr)
248                 return -1;
249         wa->seg_idx++;
250
251         return 0;
252 }
253
254 int
255 rte_eal_hugepage_attach(void)
256 {
257         const struct hugepage_info *hpi;
258         int fd_hugepage = -1;
259         unsigned int i;
260
261         hpi = &internal_config.hugepage_info[0];
262
263         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
264                 const struct hugepage_info *cur_hpi = &hpi[i];
265                 struct attach_walk_args wa;
266
267                 memset(&wa, 0, sizeof(wa));
268
269                 /* Obtain a file descriptor for contiguous memory */
270                 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
271                 if (fd_hugepage < 0) {
272                         RTE_LOG(ERR, EAL, "Could not open %s\n",
273                                         cur_hpi->hugedir);
274                         goto error;
275                 }
276                 wa.fd_hugepage = fd_hugepage;
277                 wa.seg_idx = 0;
278
279                 /* Map the contiguous memory into each memory segment */
280                 if (rte_memseg_walk(attach_segment, &wa) < 0) {
281                         RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
282                                 wa.seg_idx, cur_hpi->hugedir);
283                         goto error;
284                 }
285
286                 close(fd_hugepage);
287                 fd_hugepage = -1;
288         }
289
290         /* hugepage_info is no longer required */
291         return 0;
292
293 error:
294         if (fd_hugepage >= 0)
295                 close(fd_hugepage);
296         return -1;
297 }
298
299 int
300 rte_eal_using_phys_addrs(void)
301 {
302         return 0;
303 }
304
305 static uint64_t
306 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
307 {
308         uint64_t area_sz, max_pages;
309
310         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
311         max_pages = RTE_MAX_MEMSEG_PER_LIST;
312         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
313
314         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
315
316         /* make sure the list isn't smaller than the page size */
317         area_sz = RTE_MAX(area_sz, page_sz);
318
319         return RTE_ALIGN(area_sz, page_sz);
320 }
321
322 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
323 static int
324 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
325                 int n_segs, int socket_id, int type_msl_idx)
326 {
327         char name[RTE_FBARRAY_NAME_LEN];
328
329         snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
330                  type_msl_idx);
331         if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
332                         sizeof(struct rte_memseg))) {
333                 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
334                         rte_strerror(rte_errno));
335                 return -1;
336         }
337
338         msl->page_sz = page_sz;
339         msl->socket_id = socket_id;
340         msl->base_va = NULL;
341
342         RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
343                         (size_t)page_sz >> 10, socket_id);
344
345         return 0;
346 }
347
348 static int
349 alloc_va_space(struct rte_memseg_list *msl)
350 {
351         uint64_t page_sz;
352         size_t mem_sz;
353         void *addr;
354         int flags = 0;
355
356 #ifdef RTE_ARCH_PPC_64
357         flags |= MAP_HUGETLB;
358 #endif
359
360         page_sz = msl->page_sz;
361         mem_sz = page_sz * msl->memseg_arr.len;
362
363         addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
364         if (addr == NULL) {
365                 if (rte_errno == EADDRNOTAVAIL)
366                         RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
367                                 (unsigned long long)mem_sz, msl->base_va);
368                 else
369                         RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
370                 return -1;
371         }
372         msl->base_va = addr;
373
374         return 0;
375 }
376
377
378 static int
379 memseg_primary_init(void)
380 {
381         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
382         int hpi_idx, msl_idx = 0;
383         struct rte_memseg_list *msl;
384         uint64_t max_mem, total_mem;
385
386         /* no-huge does not need this at all */
387         if (internal_config.no_hugetlbfs)
388                 return 0;
389
390         /* FreeBSD has an issue where core dump will dump the entire memory
391          * contents, including anonymous zero-page memory. Therefore, while we
392          * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
393          * also be further limiting total memory amount to whatever memory is
394          * available to us through contigmem driver (plus spacing blocks).
395          *
396          * so, at each stage, we will be checking how much memory we are
397          * preallocating, and adjust all the values accordingly.
398          */
399
400         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
401         total_mem = 0;
402
403         /* create memseg lists */
404         for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
405                         hpi_idx++) {
406                 uint64_t max_type_mem, total_type_mem = 0;
407                 uint64_t avail_mem;
408                 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
409                 struct hugepage_info *hpi;
410                 uint64_t hugepage_sz;
411
412                 hpi = &internal_config.hugepage_info[hpi_idx];
413                 hugepage_sz = hpi->hugepage_sz;
414
415                 /* no NUMA support on FreeBSD */
416
417                 /* check if we've already exceeded total memory amount */
418                 if (total_mem >= max_mem)
419                         break;
420
421                 /* first, calculate theoretical limits according to config */
422                 max_type_mem = RTE_MIN(max_mem - total_mem,
423                         (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
424                 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
425
426                 /* now, limit all of that to whatever will actually be
427                  * available to us, because without dynamic allocation support,
428                  * all of that extra memory will be sitting there being useless
429                  * and slowing down core dumps in case of a crash.
430                  *
431                  * we need (N*2)-1 segments because we cannot guarantee that
432                  * each segment will be IOVA-contiguous with the previous one,
433                  * so we will allocate more and put spaces inbetween segments
434                  * that are non-contiguous.
435                  */
436                 avail_segs = (hpi->num_pages[0] * 2) - 1;
437                 avail_mem = avail_segs * hugepage_sz;
438
439                 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
440                 max_segs = RTE_MIN(avail_segs, max_segs);
441
442                 type_msl_idx = 0;
443                 while (total_type_mem < max_type_mem &&
444                                 total_segs < max_segs) {
445                         uint64_t cur_max_mem, cur_mem;
446                         unsigned int n_segs;
447
448                         if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
449                                 RTE_LOG(ERR, EAL,
450                                         "No more space in memseg lists, please increase %s\n",
451                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
452                                 return -1;
453                         }
454
455                         msl = &mcfg->memsegs[msl_idx++];
456
457                         cur_max_mem = max_type_mem - total_type_mem;
458
459                         cur_mem = get_mem_amount(hugepage_sz,
460                                         cur_max_mem);
461                         n_segs = cur_mem / hugepage_sz;
462
463                         if (alloc_memseg_list(msl, hugepage_sz, n_segs,
464                                         0, type_msl_idx))
465                                 return -1;
466
467                         total_segs += msl->memseg_arr.len;
468                         total_type_mem = total_segs * hugepage_sz;
469                         type_msl_idx++;
470
471                         if (alloc_va_space(msl)) {
472                                 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
473                                 return -1;
474                         }
475                 }
476                 total_mem += total_type_mem;
477         }
478         return 0;
479 }
480
481 static int
482 memseg_secondary_init(void)
483 {
484         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
485         int msl_idx = 0;
486         struct rte_memseg_list *msl;
487
488         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
489
490                 msl = &mcfg->memsegs[msl_idx];
491
492                 /* skip empty memseg lists */
493                 if (msl->memseg_arr.len == 0)
494                         continue;
495
496                 if (rte_fbarray_attach(&msl->memseg_arr)) {
497                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
498                         return -1;
499                 }
500
501                 /* preallocate VA space */
502                 if (alloc_va_space(msl)) {
503                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
504                         return -1;
505                 }
506         }
507
508         return 0;
509 }
510
511 int
512 rte_eal_memseg_init(void)
513 {
514         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
515                         memseg_primary_init() :
516                         memseg_secondary_init();
517 }