2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * physmem.c: Unix physical memory
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/unix/physmem.h>
42 static physmem_main_t physmem_main;
45 unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes, uword alignment)
47 physmem_main_t * pm = &physmem_main;
48 uword lo_offset, hi_offset;
52 clib_warning ("unsafe alloc!");
55 /* IO memory is always at least cache aligned. */
56 alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
60 mheap_get_aligned (pm->heap, n_bytes,
61 /* align */ alignment,
65 /* Allocation failed? */
69 /* Make sure allocation does not span DMA physical chunk boundary. */
70 hi_offset = lo_offset + n_bytes - 1;
72 if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
73 (hi_offset >> vpm->log2_n_bytes_per_page))
76 /* Allocation would span chunk boundary, queue it to be freed as soon as
77 we find suitable chunk. */
78 vec_add1 (to_free, lo_offset);
84 for (i = 0; i < vec_len (to_free); i++)
85 mheap_put (pm->heap, to_free[i]);
89 return lo_offset != ~0 ? pm->heap + lo_offset : 0;
92 static void unix_physmem_free (void * x)
94 physmem_main_t * pm = &physmem_main;
96 /* Return object to region's heap. */
97 mheap_put (pm->heap, x - pm->heap);
100 static void htlb_shutdown(void)
102 physmem_main_t * pm = &physmem_main;
106 shmctl (pm->shmid, IPC_RMID, 0);
110 /* try to use huge TLB pgs if possible */
111 static int htlb_init (vlib_main_t * vm)
113 vlib_physmem_main_t * vpm = &vm->physmem_main;
114 physmem_main_t * pm = &physmem_main;
115 u64 hugepagesize, pagesize;
117 u64 cur, physaddr, ptbits;
120 pm->shmid = shmget (11 /* key, my amp goes to 11 */, pm->mem_size,
121 IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
124 clib_unix_warning ("shmget");
128 pm->mem = shmat (pm->shmid, NULL, 0 /* flags */);
131 shmctl (pm->shmid, IPC_RMID, 0);
135 memset (pm->mem, 0, pm->mem_size);
137 /* $$$ get page size info from /proc/meminfo */
138 hugepagesize = 2<<20;
140 vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
141 vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
143 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
144 vpm->virtual.start = pointer_to_uword (pm->mem);
145 vpm->virtual.size = pm->mem_size;
146 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
148 fd = open("/proc/self/pagemap", O_RDONLY);
152 (void) shmdt (pm->mem);
156 pm->heap = mheap_alloc_with_flags
157 (pm->mem, pm->mem_size,
158 /* Don't want mheap mmap/munmap with IO memory. */
159 MHEAP_FLAG_DISABLE_VM);
161 cur = pointer_to_uword(pm->mem);
164 while (cur < pointer_to_uword(pm->mem) + pm->mem_size)
166 pfn = (u64) cur / pagesize;
167 seek_loc = pfn * sizeof (u64);
168 if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
170 clib_unix_warning ("lseek to 0x%llx", seek_loc);
171 shmctl (pm->shmid, IPC_RMID, 0);
175 if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof(ptbits)))
177 clib_unix_warning ("read ptbits");
178 shmctl (pm->shmid, IPC_RMID, 0);
183 /* bits 0-54 are the physical page number */
184 physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
186 fformat(stderr, "pm: virtual 0x%llx physical 0x%llx\n",
188 vpm->page_table[i++] = physaddr;
193 atexit (htlb_shutdown);
197 int vlib_app_physmem_init (vlib_main_t * vm,
198 physmem_main_t * pm, int) __attribute__ ((weak));
199 int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
204 clib_error_t * unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
206 vlib_physmem_main_t * vpm = &vm->physmem_main;
207 physmem_main_t * pm = &physmem_main;
208 clib_error_t * error = 0;
209 char * dev_uio_dma_file = "/dev/uio-dma";
210 int using_fake_memory = 0;
212 /* Avoid multiple calls. */
213 if (vm->os_physmem_alloc_aligned)
216 vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
217 vm->os_physmem_free = unix_physmem_free;
218 pm->mem = MAP_FAILED;
220 if (pm->mem_size == 0)
221 pm->mem_size = 16 << 20;
223 /* OK, Mr. App, you tell us */
224 if (vlib_app_physmem_init (vm, pm, physical_memory_required))
227 if (physical_memory_required)
229 if (!pm->no_hugepages && htlb_init(vm))
231 fformat(stderr, "%s: use huge pages\n", __FUNCTION__);
234 pm->uio_dma_fd = open (dev_uio_dma_file, O_RDWR);
239 if (pm->uio_dma_fd < 0)
241 if (physical_memory_required)
243 error = clib_error_return_unix (0, "open `%s'", dev_uio_dma_file);
247 using_fake_memory = 1;
248 pm->mem = mmap (0, pm->mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
249 if (pm->mem == MAP_FAILED)
251 error = clib_error_return_unix (0, "mmap");
255 pm->heap = mheap_alloc (pm->mem, pm->mem_size);
257 /* Identity map with a single page. */
258 vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
259 vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
262 error = clib_error_return (0, "uio_dma deprecated");
264 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
265 vpm->virtual.start = pointer_to_uword (pm->mem);
266 vpm->virtual.size = pm->mem_size;
267 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
269 if (using_fake_memory)
270 fformat(stderr, "%s: use fake dma pages\n", __FUNCTION__);
272 fformat(stderr, "%s: use uio dma pages\n", __FUNCTION__);
277 if (pm->mem != MAP_FAILED)
278 munmap (pm->mem, pm->mem_size);
279 if (pm->uio_dma_fd >= 0)
281 close (pm->uio_dma_fd);
288 static clib_error_t *
289 show_physmem (vlib_main_t * vm,
290 unformat_input_t * input,
291 vlib_cli_command_t * cmd)
294 vlib_cli_output (vm, "Not supported with DPDK drivers.");
296 physmem_main_t * pm = &physmem_main;
299 vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 0);
301 vlib_cli_output (vm, "No physmem allocated.");
306 VLIB_CLI_COMMAND (show_physmem_command, static) = {
307 .path = "show physmem",
308 .short_help = "Show physical memory allocation",
309 .function = show_physmem,
312 static clib_error_t *
313 show_affinity (vlib_main_t * vm,
314 unformat_input_t * input,
315 vlib_cli_command_t * cmd)
318 cpu_set_t *setp = &set;
321 int first_set_bit_in_run = -1;
322 int last_set_bit_in_run = -1;
325 rv = sched_getaffinity (0 /* pid, 0 = this proc */,
326 sizeof (*setp), setp);
329 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
334 for (i = 0; i < 64; i++)
336 if (CPU_ISSET(i, setp))
338 if (first_set_bit_in_run == -1)
340 first_set_bit_in_run = i;
341 last_set_bit_in_run = i;
344 s = format (s, "%d-", i);
349 if (i == (last_set_bit_in_run+1))
350 last_set_bit_in_run = i;
355 if (first_set_bit_in_run != -1)
357 if (first_set_bit_in_run == (i-1))
359 _vec_len (s) -= 2 + ((first_set_bit_in_run/10));
361 s = format (s, "%d", last_set_bit_in_run);
362 first_set_bit_in_run = -1;
363 last_set_bit_in_run = -1;
368 if (first_set_bit_in_run != -1)
369 s = format (s, "%d", first_set_bit_in_run);
371 vlib_cli_output (vm, "Process runs on: %v", s);
375 VLIB_CLI_COMMAND (show_affinity_command, static) = {
376 .path = "show affinity",
377 .short_help = "Show process cpu affinity",
378 .function = show_affinity,
381 static clib_error_t *
382 set_affinity (vlib_main_t * vm,
383 unformat_input_t * input,
384 vlib_cli_command_t * cmd)
387 cpu_set_t *setp = &set;
392 memset (setp, 0, sizeof (*setp));
396 if (unformat (input, "%d-%d,", &first, &last))
398 if (first > 64 || last > 64)
401 vlib_cli_output (vm, "range %d-%d invalid", first, last);
405 for (i = first; i <= last; i++)
409 else if (unformat (input, "%d-%d", &first, &last))
411 if (first > 64 || last > 64)
414 for (i = first; i <= last; i++)
417 else if (unformat (input, "%d,", &first))
422 vlib_cli_output (vm, "cpu %d invalid", first);
425 CPU_SET(first, setp);
428 else if (unformat (input, "%d", &first))
433 CPU_SET(first, setp);
435 } while (another_round);
437 rv = sched_setaffinity (0 /* pid, 0 = this proc */,
438 sizeof (*setp), setp);
442 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
446 return show_affinity (vm, input, cmd);
449 VLIB_CLI_COMMAND (set_affinity_command, static) = {
450 .path = "set affinity",
451 .short_help = "Set process cpu affinity",
452 .function = set_affinity,
455 static clib_error_t *
456 vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
458 physmem_main_t * pm = &physmem_main;
461 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
463 if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
464 pm->no_hugepages = 1;
466 else if (unformat(input, "size-in-mb %d", &size_in_mb) ||
467 unformat(input, "size %d", &size_in_mb))
468 pm->mem_size = size_in_mb << 20;
470 return unformat_parse_error (input);
473 unformat_free (input);
477 VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");