2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 #include <vppinfra/bitops.h>
39 #include <vppinfra/hash.h>
40 #include <vppinfra/format.h>
41 #include <vppinfra/mheap.h>
42 #include <vppinfra/os.h>
43 #include <vppinfra/time.h>
46 #include <vppinfra/elf_clib.h>
49 static void mheap_get_trace (void *v, uword offset, uword size);
50 static void mheap_put_trace (void *v, uword offset, uword size);
51 static int mheap_trace_sort (const void *t1, const void *t2);
54 mheap_maybe_lock (void *v)
56 mheap_t *h = mheap_header (v);
57 if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
59 u32 my_cpu = os_get_thread_index ();
60 if (h->owner_cpu == my_cpu)
66 while (__sync_lock_test_and_set (&h->lock, 1))
69 h->owner_cpu = my_cpu;
70 h->recursion_count = 1;
75 mheap_maybe_unlock (void *v)
77 mheap_t *h = mheap_header (v);
78 if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
80 ASSERT (os_get_thread_index () == h->owner_cpu);
81 if (--h->recursion_count == 0)
84 CLIB_MEMORY_BARRIER ();
90 /* Find bin for objects with size at least n_user_data_bytes. */
92 user_data_size_to_bin_index (uword n_user_data_bytes)
94 uword n_user_data_words;
95 word small_bin, large_bin;
97 /* User size must be at least big enough to hold free elt. */
98 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
100 /* Round to words. */
102 (round_pow2 (n_user_data_bytes, MHEAP_USER_DATA_WORD_BYTES) /
103 MHEAP_USER_DATA_WORD_BYTES);
105 ASSERT (n_user_data_words > 0);
108 (MHEAP_MIN_USER_DATA_BYTES / MHEAP_USER_DATA_WORD_BYTES);
109 ASSERT (small_bin >= 0);
112 MHEAP_N_SMALL_OBJECT_BINS + max_log2 (n_user_data_bytes) -
113 MHEAP_LOG2_N_SMALL_OBJECT_BINS;
115 return small_bin < MHEAP_N_SMALL_OBJECT_BINS ? small_bin : large_bin;
119 mheap_elt_size_to_user_n_bytes (uword n_bytes)
121 ASSERT (n_bytes >= sizeof (mheap_elt_t));
122 return (n_bytes - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
125 always_inline uword __attribute__ ((unused))
126 mheap_elt_size_to_user_n_words (uword n_bytes)
128 ASSERT (n_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
129 return mheap_elt_size_to_user_n_bytes (n_bytes) /
130 MHEAP_USER_DATA_WORD_BYTES;
134 mheap_elt_set_size (void *v,
135 uword uoffset, uword n_user_data_bytes, uword is_free)
139 e = mheap_elt_at_uoffset (v, uoffset);
141 ASSERT (n_user_data_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
143 e->n_user_data = n_user_data_bytes / MHEAP_USER_DATA_WORD_BYTES;
144 e->is_free = is_free;
145 ASSERT (e->prev_n_user_data * sizeof (e->user_data[0]) >=
146 MHEAP_MIN_USER_DATA_BYTES);
148 n = mheap_next_elt (e);
149 n->prev_n_user_data = e->n_user_data;
150 n->prev_is_free = is_free;
154 set_first_free_elt_offset (mheap_t * h, uword bin, uword uoffset)
158 h->first_free_elt_uoffset_by_bin[bin] = uoffset;
160 i0 = bin / BITS (h->non_empty_free_elt_heads[0]);
161 i1 = (uword) 1 << (uword) (bin % BITS (h->non_empty_free_elt_heads[0]));
163 ASSERT (i0 < ARRAY_LEN (h->non_empty_free_elt_heads));
164 if (h->first_free_elt_uoffset_by_bin[bin] == MHEAP_GROUNDED)
165 h->non_empty_free_elt_heads[i0] &= ~i1;
167 h->non_empty_free_elt_heads[i0] |= i1;
171 set_free_elt (void *v, uword uoffset, uword n_user_data_bytes)
173 mheap_t *h = mheap_header (v);
174 mheap_elt_t *e = mheap_elt_at_uoffset (v, uoffset);
175 mheap_elt_t *n = mheap_next_elt (e);
176 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
178 ASSERT (n->prev_is_free);
181 e->free_elt.prev_uoffset = MHEAP_GROUNDED;
182 e->free_elt.next_uoffset = h->first_free_elt_uoffset_by_bin[bin];
184 /* Fill in next free elt's previous pointer. */
185 if (e->free_elt.next_uoffset != MHEAP_GROUNDED)
187 mheap_elt_t *nf = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
188 ASSERT (nf->is_free);
189 nf->free_elt.prev_uoffset = uoffset;
192 set_first_free_elt_offset (h, bin, uoffset);
196 new_free_elt (void *v, uword uoffset, uword n_user_data_bytes)
198 mheap_elt_set_size (v, uoffset, n_user_data_bytes, /* is_free */ 1);
199 set_free_elt (v, uoffset, n_user_data_bytes);
203 remove_free_elt (void *v, mheap_elt_t * e, uword bin)
205 mheap_t *h = mheap_header (v);
213 no = e->free_elt.next_uoffset;
215 n = no != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, no) : 0;
216 po = e->free_elt.prev_uoffset;
217 p = po != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, po) : 0;
220 set_first_free_elt_offset (h, bin, no);
222 p->free_elt.next_uoffset = no;
225 n->free_elt.prev_uoffset = po;
229 remove_free_elt2 (void *v, mheap_elt_t * e)
232 bin = user_data_size_to_bin_index (mheap_elt_data_bytes (e));
233 remove_free_elt (v, e, bin);
236 #define MHEAP_VM_MAP (1 << 0)
237 #define MHEAP_VM_UNMAP (1 << 1)
238 #define MHEAP_VM_NOMAP (0 << 1)
239 #define MHEAP_VM_ROUND (1 << 2)
240 #define MHEAP_VM_ROUND_UP MHEAP_VM_ROUND
241 #define MHEAP_VM_ROUND_DOWN (0 << 2)
243 static uword mheap_page_size;
245 static_always_inline uword
246 mheap_page_round (uword addr)
248 return (addr + mheap_page_size - 1) & ~(mheap_page_size - 1);
251 static_always_inline uword
252 mheap_page_truncate (uword addr)
254 return addr & ~(mheap_page_size - 1);
257 static_always_inline uword
258 mheap_vm (void *v, uword flags, clib_address_t start_addr, uword size)
260 mheap_t *h = mheap_header (v);
261 clib_address_t start_page, end_page, end_addr;
264 ASSERT (!(h->flags & MHEAP_FLAG_DISABLE_VM));
266 end_addr = start_addr + size;
268 /* Round start/end address up to page boundary. */
269 start_page = mheap_page_round (start_addr);
271 if ((flags & MHEAP_VM_ROUND) == MHEAP_VM_ROUND_UP)
272 end_page = mheap_page_round (end_addr);
274 end_page = mheap_page_truncate (end_addr);
277 if (end_page > start_page)
279 mapped_bytes = end_page - start_page;
280 if (flags & MHEAP_VM_MAP)
281 clib_mem_vm_map ((void *) start_page, end_page - start_page);
282 else if (flags & MHEAP_VM_UNMAP)
283 clib_mem_vm_unmap ((void *) start_page, end_page - start_page);
289 static_always_inline uword
290 mheap_vm_elt (void *v, uword flags, uword offset)
293 clib_address_t start_addr, end_addr;
295 e = mheap_elt_at_uoffset (v, offset);
296 start_addr = (clib_address_t) ((void *) e->user_data);
297 end_addr = (clib_address_t) mheap_next_elt (e);
298 return mheap_vm (v, flags, start_addr, end_addr - start_addr);
302 mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
306 /* $$$$ ELIOT FIXME: add Altivec version of this routine */
307 #if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__) || defined (__i386__)
310 u8x16 b = u8x16_splat (bin);
314 #define _(i) ((uword) u8x16_compare_byte_mask ((b == c->bins.as_u8x16[i])) << (uword) ((i)*16))
316 if (BITS (uword) > 32)
325 mheap_get_small_object (mheap_t * h, uword bin)
327 mheap_small_object_cache_t *c = &h->small_object_cache;
328 uword mask = mheap_small_object_cache_mask (c, bin + 1);
329 uword offset = MHEAP_GROUNDED;
333 uword i = min_log2 (mask);
334 uword o = c->offsets[i];
335 ASSERT (o != MHEAP_GROUNDED);
336 c->bins.as_u8[i] = 0;
344 mheap_put_small_object (mheap_t * h, uword bin, uword offset)
346 mheap_small_object_cache_t *c = &h->small_object_cache;
347 uword free_mask = mheap_small_object_cache_mask (c, 0);
353 i = min_log2 (free_mask);
354 c->bins.as_u8[i] = b;
355 c->offsets[i] = offset;
359 /* Nothing free with right size: cyclic replacement. */
363 i = c->replacement_index++;
365 c->bins.as_u8[i] = b;
366 old_offset = c->offsets[i];
367 c->offsets[i] = offset;
369 /* Return old offset so it can be freed. */
375 mheap_get_search_free_bin (void *v,
377 uword * n_user_data_bytes_arg,
378 uword align, uword align_offset)
380 mheap_t *h = mheap_header (v);
383 /* Free object is at offset f0 ... f1;
384 Allocatted object is at offset o0 ... o1. */
385 word o0, o1, f0, f1, search_n_user_data_bytes;
386 word lo_free_usize, hi_free_usize;
388 ASSERT (h->first_free_elt_uoffset_by_bin[bin] != MHEAP_GROUNDED);
389 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[bin]);
391 search_n_user_data_bytes = *n_user_data_bytes_arg;
393 /* Silence compiler warning. */
394 o0 = o1 = f0 = f1 = 0;
396 h->stats.free_list.n_search_attempts += 1;
398 /* Find an object that is large enough with correct alignment at given alignment offset. */
401 uword this_object_n_user_data_bytes = mheap_elt_data_bytes (e);
404 if (bin < MHEAP_N_SMALL_OBJECT_BINS)
405 ASSERT (this_object_n_user_data_bytes >= search_n_user_data_bytes);
407 h->stats.free_list.n_objects_searched += 1;
409 if (this_object_n_user_data_bytes < search_n_user_data_bytes)
412 /* Bounds of free object: from f0 to f1. */
413 f0 = ((void *) e->user_data - v);
414 f1 = f0 + this_object_n_user_data_bytes;
416 /* Place candidate object at end of free block and align as requested. */
417 o0 = ((f1 - search_n_user_data_bytes) & ~(align - 1)) - align_offset;
421 /* Make sure that first free fragment is either empty or
422 large enough to be valid. */
425 lo_free_usize = o0 != f0 ? o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES : 0;
426 if (o0 <= f0 || lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES)
431 o1 = o0 + search_n_user_data_bytes;
434 if (o0 >= f0 && o1 <= f1)
438 /* Reached end of free list without finding large enough object. */
439 if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
440 return MHEAP_GROUNDED;
442 /* Otherwise keep searching for large enough object. */
443 e = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
447 /* Free fragment at end. */
448 hi_free_usize = f1 != o1 ? f1 - o1 - MHEAP_ELT_OVERHEAD_BYTES : 0;
450 /* If fragment at end is too small to be a new object,
451 give user's object a bit more space than requested. */
452 if (hi_free_usize < (word) MHEAP_MIN_USER_DATA_BYTES)
454 search_n_user_data_bytes += f1 - o1;
459 /* Need to make sure that relevant memory areas are mapped. */
460 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
462 mheap_elt_t *f0_elt = mheap_elt_at_uoffset (v, f0);
463 mheap_elt_t *f1_elt = mheap_elt_at_uoffset (v, f1);
464 mheap_elt_t *o0_elt = mheap_elt_at_uoffset (v, o0);
465 mheap_elt_t *o1_elt = mheap_elt_at_uoffset (v, o1);
467 uword f0_page_start, f0_page_end;
468 uword o0_page_start, o0_page_end;
470 /* Free elt is mapped. Addresses after that may not be mapped. */
471 f0_page_start = mheap_page_round (pointer_to_uword (f0_elt->user_data));
472 f0_page_end = mheap_page_truncate (pointer_to_uword (f1_elt));
474 o0_page_start = mheap_page_truncate (pointer_to_uword (o0_elt));
475 o0_page_end = mheap_page_round (pointer_to_uword (o1_elt->user_data));
477 if (o0_page_start < f0_page_start)
478 o0_page_start = f0_page_start;
479 if (o0_page_end > f0_page_end)
480 o0_page_end = f0_page_end;
482 if (o0_page_end > o0_page_start)
483 clib_mem_vm_map (uword_to_pointer (o0_page_start, void *),
484 o0_page_end - o0_page_start);
487 /* Remove free object from free list. */
488 remove_free_elt (v, e, bin);
490 /* Free fragment at begining. */
491 if (lo_free_usize > 0)
493 ASSERT (lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES);
494 mheap_elt_set_size (v, f0, lo_free_usize, /* is_free */ 1);
495 new_free_elt (v, f0, lo_free_usize);
498 mheap_elt_set_size (v, o0, search_n_user_data_bytes, /* is_free */ 0);
500 if (hi_free_usize > 0)
502 uword uo = o1 + MHEAP_ELT_OVERHEAD_BYTES;
503 mheap_elt_set_size (v, uo, hi_free_usize, /* is_free */ 1);
504 new_free_elt (v, uo, hi_free_usize);
507 /* Return actual size of block. */
508 *n_user_data_bytes_arg = search_n_user_data_bytes;
510 h->stats.free_list.n_objects_found += 1;
515 /* Search free lists for object with given size and alignment. */
517 mheap_get_search_free_list (void *v,
518 uword * n_user_bytes_arg,
519 uword align, uword align_offset)
521 mheap_t *h = mheap_header (v);
522 uword bin, n_user_bytes, i, bi;
524 n_user_bytes = *n_user_bytes_arg;
525 bin = user_data_size_to_bin_index (n_user_bytes);
527 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
528 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE)
530 && align == STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
531 && align_offset == 0)
533 uword r = mheap_get_small_object (h, bin);
534 h->stats.n_small_object_cache_attempts += 1;
535 if (r != MHEAP_GROUNDED)
537 h->stats.n_small_object_cache_hits += 1;
542 for (i = bin / BITS (uword); i < ARRAY_LEN (h->non_empty_free_elt_heads);
545 uword non_empty_bin_mask = h->non_empty_free_elt_heads[i];
547 /* No need to search smaller bins. */
548 if (i == bin / BITS (uword))
549 non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
551 /* Search each occupied free bin which is large enough. */
553 foreach_set_bit (bi, non_empty_bin_mask,
556 mheap_get_search_free_bin (v, bi + i * BITS (uword),
560 if (r != MHEAP_GROUNDED) return r;
565 return MHEAP_GROUNDED;
568 static never_inline void *
569 mheap_get_extend_vector (void *v,
570 uword n_user_data_bytes,
572 uword align_offset, uword * offset_return)
574 /* Bounds of free and allocated objects (as above). */
575 uword f0, f1, o0, o1;
577 mheap_t *h = mheap_header (v);
580 if (_vec_len (v) == 0)
582 _vec_len (v) = MHEAP_ELT_OVERHEAD_BYTES;
584 /* Create first element of heap. */
585 e = mheap_elt_at_uoffset (v, _vec_len (v));
586 e->prev_n_user_data = MHEAP_N_USER_DATA_INVALID;
591 o0 = round_pow2 (f0, align) - align_offset;
594 free_size = o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES;
595 if (o0 == f0 || free_size >= (word) sizeof (mheap_elt_t))
601 o1 = o0 + n_user_data_bytes;
602 f1 = o1 + MHEAP_ELT_OVERHEAD_BYTES;
605 h = mheap_header (v);
607 /* Make sure we have space for object plus overhead. */
608 if (f1 > h->max_size)
610 *offset_return = MHEAP_GROUNDED;
616 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
618 mheap_elt_t *f0_elt = mheap_elt_at_uoffset (v, f0);
619 mheap_elt_t *f1_elt = mheap_elt_at_uoffset (v, f1);
621 uword f0_page = mheap_page_round (pointer_to_uword (f0_elt->user_data));
622 uword f1_page = mheap_page_round (pointer_to_uword (f1_elt->user_data));
624 if (f1_page > f0_page)
625 mheap_vm (v, MHEAP_VM_MAP, f0_page, f1_page - f0_page);
629 new_free_elt (v, f0, free_size);
631 mheap_elt_set_size (v, o0, n_user_data_bytes, /* is_free */ 0);
633 /* Mark last element. */
634 e = mheap_elt_at_uoffset (v, f1);
635 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
643 mheap_get_aligned (void *v,
644 uword n_user_data_bytes,
645 uword align, uword align_offset, uword * offset_return)
651 cpu_times[0] = clib_cpu_time_now ();
653 align = clib_max (align, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
654 align = max_pow2 (align);
656 /* Correct align offset to be smaller than alignment. */
657 align_offset &= (align - 1);
659 /* Align offset must be multiple of minimum object size. */
660 if (align_offset % STRUCT_SIZE_OF (mheap_elt_t, user_data[0]) != 0)
662 *offset_return = MHEAP_GROUNDED;
667 * Round requested size.
669 * Step 1: round up to the minimum object size.
670 * Step 2: round up to a multiple of the user data size (e.g. 4)
671 * Step 3: if non-trivial alignment requested, round up
672 * so that the object precisely fills a chunk
673 * as big as the alignment request.
675 * Step 3 prevents the code from going into "bin search hyperspace":
676 * looking at a huge number of fractional remainder chunks, none of which
677 * will satisfy the alignment constraint. This fixes an allocator
678 * performance issue when one requests a large number of 16 byte objects
679 * aligned to 64 bytes, to name one variation on the theme.
681 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
683 round_pow2 (n_user_data_bytes,
684 STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
685 if (align > MHEAP_ELT_OVERHEAD_BYTES)
686 n_user_data_bytes = clib_max (n_user_data_bytes,
687 align - MHEAP_ELT_OVERHEAD_BYTES);
689 v = mheap_alloc (0, 64 << 20);
691 mheap_maybe_lock (v);
693 h = mheap_header (v);
695 if (h->flags & MHEAP_FLAG_VALIDATE)
698 /* First search free lists for object. */
700 mheap_get_search_free_list (v, &n_user_data_bytes, align, align_offset);
702 h = mheap_header (v);
704 /* If that fails allocate object at end of heap by extending vector. */
705 if (offset == MHEAP_GROUNDED && _vec_len (v) < h->max_size)
708 mheap_get_extend_vector (v, n_user_data_bytes, align, align_offset,
710 h = mheap_header (v);
711 h->stats.n_vector_expands += offset != MHEAP_GROUNDED;
714 *offset_return = offset;
715 if (offset != MHEAP_GROUNDED)
719 if (h->flags & MHEAP_FLAG_TRACE)
721 /* Recursion block for case when we are traceing main clib heap. */
722 h->flags &= ~MHEAP_FLAG_TRACE;
724 mheap_get_trace (v, offset, n_user_data_bytes);
726 h->flags |= MHEAP_FLAG_TRACE;
730 if (h->flags & MHEAP_FLAG_VALIDATE)
733 mheap_maybe_unlock (v);
735 cpu_times[1] = clib_cpu_time_now ();
736 h->stats.n_clocks_get += cpu_times[1] - cpu_times[0];
737 h->stats.n_gets += 1;
743 free_last_elt (void *v, mheap_elt_t * e)
745 mheap_t *h = mheap_header (v);
747 /* Possibly delete preceeding free element also. */
750 e = mheap_prev_elt (e);
751 remove_free_elt2 (v, e);
754 if (e->prev_n_user_data == MHEAP_N_USER_DATA_INVALID)
756 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
757 mheap_vm_elt (v, MHEAP_VM_UNMAP, mheap_elt_uoffset (v, e));
762 uword uo = mheap_elt_uoffset (v, e);
763 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
764 mheap_vm_elt (v, MHEAP_VM_UNMAP, uo);
765 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
771 mheap_put (void *v, uword uoffset)
774 uword n_user_data_bytes, bin;
776 uword trace_uoffset, trace_n_user_data_bytes;
779 cpu_times[0] = clib_cpu_time_now ();
781 h = mheap_header (v);
783 mheap_maybe_lock (v);
785 if (h->flags & MHEAP_FLAG_VALIDATE)
788 ASSERT (h->n_elts > 0);
790 h->stats.n_puts += 1;
792 e = mheap_elt_at_uoffset (v, uoffset);
793 n = mheap_next_elt (e);
794 n_user_data_bytes = mheap_elt_data_bytes (e);
796 trace_uoffset = uoffset;
797 trace_n_user_data_bytes = n_user_data_bytes;
799 bin = user_data_size_to_bin_index (n_user_data_bytes);
800 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
801 && bin < 255 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE))
803 uoffset = mheap_put_small_object (h, bin, uoffset);
807 e = mheap_elt_at_uoffset (v, uoffset);
808 n = mheap_next_elt (e);
809 n_user_data_bytes = mheap_elt_data_bytes (e);
812 /* Assert that forward and back pointers are equal. */
813 if (e->n_user_data != n->prev_n_user_data)
816 /* Forward and backwards is_free must agree. */
817 if (e->is_free != n->prev_is_free)
820 /* Object was already freed. */
824 /* Special case: delete last element in heap. */
825 if (n->n_user_data == MHEAP_N_USER_DATA_INVALID)
826 free_last_elt (v, e);
830 uword f0, f1, n_combine;
833 f1 = f0 + n_user_data_bytes;
838 mheap_elt_t *p = mheap_prev_elt (e);
839 f0 = mheap_elt_uoffset (v, p);
840 remove_free_elt2 (v, p);
846 mheap_elt_t *m = mheap_next_elt (n);
848 remove_free_elt2 (v, n);
853 mheap_elt_set_size (v, f0, f1 - f0, /* is_free */ 1);
855 e->is_free = n->prev_is_free = 1;
856 set_free_elt (v, f0, f1 - f0);
858 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
859 mheap_vm_elt (v, MHEAP_VM_UNMAP, f0);
863 h = mheap_header (v);
865 if (h->flags & MHEAP_FLAG_TRACE)
867 /* Recursion block for case when we are traceing main clib heap. */
868 h->flags &= ~MHEAP_FLAG_TRACE;
870 mheap_put_trace (v, trace_uoffset, trace_n_user_data_bytes);
872 h->flags |= MHEAP_FLAG_TRACE;
875 if (h->flags & MHEAP_FLAG_VALIDATE)
878 mheap_maybe_unlock (v);
880 cpu_times[1] = clib_cpu_time_now ();
881 h->stats.n_clocks_put += cpu_times[1] - cpu_times[0];
885 mheap_alloc_with_flags (void *memory, uword memory_size, uword flags)
891 if (!mheap_page_size)
892 mheap_page_size = clib_mem_get_page_size ();
896 /* No memory given, try to VM allocate some. */
897 memory = clib_mem_vm_alloc (memory_size);
901 /* No memory region implies we have virtual memory. */
902 flags &= ~MHEAP_FLAG_DISABLE_VM;
905 /* Make sure that given memory is page aligned. */
909 am = pointer_to_uword (memory);
910 av = mheap_page_round (am);
911 v = uword_to_pointer (av, void *);
912 h = mheap_header (v);
913 ah = pointer_to_uword (h);
915 ah += mheap_page_size;
917 h = uword_to_pointer (ah, void *);
918 v = mheap_vector (h);
920 if (PREDICT_FALSE (memory + memory_size < v))
923 * This will happen when the requested memory_size is too
924 * small to cope with the heap header and/or memory alignment.
926 clib_mem_vm_free (memory, memory_size);
930 size = memory + memory_size - v;
933 /* VM map header so we can use memory. */
934 if (!(flags & MHEAP_FLAG_DISABLE_VM))
935 clib_mem_vm_map (h, sizeof (h[0]));
937 /* Zero vector header: both heap header and vector length. */
938 memset (h, 0, sizeof (h[0]));
941 h->vm_alloc_offset_from_header = (void *) h - memory;
942 h->vm_alloc_size = memory_size;
947 /* Set flags based on those given less builtin-flags. */
948 h->flags |= (flags & ~MHEAP_FLAG_TRACE);
950 /* Unmap remainder of heap until we will be ready to use it. */
951 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
952 mheap_vm (v, MHEAP_VM_UNMAP | MHEAP_VM_ROUND_UP,
953 (clib_address_t) v, h->max_size);
955 /* Initialize free list heads to empty. */
956 memset (h->first_free_elt_uoffset_by_bin, 0xFF,
957 sizeof (h->first_free_elt_uoffset_by_bin));
963 mheap_alloc (void *memory, uword size)
968 flags |= MHEAP_FLAG_DISABLE_VM;
970 #ifdef CLIB_HAVE_VEC128
971 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
974 return mheap_alloc_with_flags (memory, size, flags);
978 mheap_alloc_with_lock (void *memory, uword size, int locked)
984 flags |= MHEAP_FLAG_DISABLE_VM;
986 #ifdef CLIB_HAVE_VEC128
987 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
990 rv = mheap_alloc_with_flags (memory, size, flags);
994 mheap_t *h = mheap_header (rv);
995 h->flags |= MHEAP_FLAG_THREAD_SAFE;
1001 _mheap_free (void *v)
1003 mheap_t *h = mheap_header (v);
1006 clib_mem_vm_free ((void *) h - h->vm_alloc_offset_from_header,
1012 /* Call user's function with each object in heap. */
1014 mheap_foreach (void *v,
1015 uword (*func) (void *arg, void *v, void *elt_data,
1016 uword elt_size), void *arg)
1019 u8 *stack_heap, *clib_mem_mheap_save;
1020 u8 tmp_heap_memory[16 * 1024];
1022 mheap_maybe_lock (v);
1024 if (vec_len (v) == 0)
1027 clib_mem_mheap_save = 0;
1030 /* Allocate a new temporary heap on the stack.
1031 This is so that our hash table & user's callback function can
1032 themselves allocate memory somewhere without getting in the way
1033 of the heap we are looking at. */
1034 if (v == clib_mem_get_heap ())
1036 stack_heap = mheap_alloc (tmp_heap_memory, sizeof (tmp_heap_memory));
1037 clib_mem_mheap_save = v;
1038 clib_mem_set_heap (stack_heap);
1042 e->n_user_data != MHEAP_N_USER_DATA_INVALID; e = mheap_next_elt (e))
1044 void *p = mheap_elt_data (v, e);
1047 if ((*func) (arg, v, p, mheap_elt_data_bytes (e)))
1051 /* Restore main CLIB heap. */
1052 if (clib_mem_mheap_save)
1053 clib_mem_set_heap (clib_mem_mheap_save);
1056 mheap_maybe_unlock (v);
1059 /* Bytes in mheap header overhead not including data bytes. */
1061 mheap_bytes_overhead (void *v)
1063 mheap_t *h = mheap_header (v);
1064 return v ? sizeof (h[0]) + h->n_elts * sizeof (mheap_elt_t) : 0;
1067 /* Total number of bytes including both data and overhead. */
1069 mheap_bytes (void *v)
1071 return mheap_bytes_overhead (v) + vec_bytes (v);
1075 mheap_usage_no_lock (void *v, clib_mem_usage_t * usage)
1077 mheap_t *h = mheap_header (v);
1078 uword used = 0, free = 0, free_vm_unmapped = 0;
1080 if (vec_len (v) > 0)
1085 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1086 e = mheap_next_elt (e))
1088 uword size = mheap_elt_data_bytes (e);
1092 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
1094 mheap_vm_elt (v, MHEAP_VM_NOMAP, mheap_elt_uoffset (v, e));
1101 usage->object_count = mheap_elts (v);
1102 usage->bytes_total = mheap_bytes (v);
1103 usage->bytes_overhead = mheap_bytes_overhead (v);
1104 usage->bytes_max = mheap_max_size (v);
1105 usage->bytes_used = used;
1106 usage->bytes_free = free;
1107 usage->bytes_free_reclaimed = free_vm_unmapped;
1111 mheap_usage (void *v, clib_mem_usage_t * usage)
1113 mheap_maybe_lock (v);
1114 mheap_usage_no_lock (v, usage);
1115 mheap_maybe_unlock (v);
1119 format_mheap_byte_count (u8 * s, va_list * va)
1121 uword n_bytes = va_arg (*va, uword);
1123 return format (s, "%wd", n_bytes);
1125 return format (s, "%wdk", n_bytes / 1024);
1128 /* Returns first corrupt heap element. */
1129 static mheap_elt_t *
1130 mheap_first_corrupt (void *v)
1134 if (vec_len (v) == 0)
1140 if (e->n_user_data == MHEAP_N_USER_DATA_INVALID)
1143 n = mheap_next_elt (e);
1145 if (e->n_user_data != n->prev_n_user_data)
1148 if (e->is_free != n->prev_is_free)
1158 format_mheap_stats (u8 * s, va_list * va)
1160 mheap_t *h = va_arg (*va, mheap_t *);
1161 mheap_stats_t *st = &h->stats;
1162 u32 indent = format_get_indent (s);
1166 "alloc. from small object cache: %Ld hits %Ld attempts (%.2f%%) replacements %d",
1167 st->n_small_object_cache_hits, st->n_small_object_cache_attempts,
1168 (st->n_small_object_cache_attempts !=
1169 0 ? 100. * (f64) st->n_small_object_cache_hits /
1170 (f64) st->n_small_object_cache_attempts : 0.),
1171 h->small_object_cache.replacement_index);
1175 "\n%Ualloc. from free-list: %Ld attempts, %Ld hits (%.2f%%), %Ld considered (per-attempt %.2f)",
1176 format_white_space, indent, st->free_list.n_search_attempts,
1177 st->free_list.n_objects_found,
1178 (st->free_list.n_search_attempts !=
1179 0 ? 100. * (f64) st->free_list.n_objects_found /
1180 (f64) st->free_list.n_search_attempts : 0.),
1181 st->free_list.n_objects_searched,
1182 (st->free_list.n_search_attempts !=
1183 0 ? (f64) st->free_list.n_objects_searched /
1184 (f64) st->free_list.n_search_attempts : 0.));
1186 s = format (s, "\n%Ualloc. from vector-expand: %Ld",
1187 format_white_space, indent, st->n_vector_expands);
1189 s = format (s, "\n%Uallocs: %Ld %.2f clocks/call",
1190 format_white_space, indent,
1191 st->n_gets, (f64) st->n_clocks_get / (f64) st->n_gets);
1193 s = format (s, "\n%Ufrees: %Ld %.2f clocks/call",
1194 format_white_space, indent,
1195 st->n_puts, (f64) st->n_clocks_put / (f64) st->n_puts);
1201 format_mheap (u8 * s, va_list * va)
1203 void *v = va_arg (*va, u8 *);
1204 int verbose = va_arg (*va, int);
1209 clib_mem_usage_t usage;
1210 mheap_elt_t *first_corrupt;
1212 mheap_maybe_lock (v);
1214 h = mheap_header (v);
1216 mheap_usage_no_lock (v, &usage);
1218 indent = format_get_indent (s);
1222 "%d objects, %U of %U used, %U free, %U reclaimed, %U overhead",
1223 usage.object_count, format_mheap_byte_count, usage.bytes_used,
1224 format_mheap_byte_count, usage.bytes_total,
1225 format_mheap_byte_count, usage.bytes_free,
1226 format_mheap_byte_count, usage.bytes_free_reclaimed,
1227 format_mheap_byte_count, usage.bytes_overhead);
1229 if (usage.bytes_max != ~0)
1230 s = format (s, ", %U capacity", format_mheap_byte_count, usage.bytes_max);
1232 /* Show histogram of sizes. */
1235 uword hist[MHEAP_N_BINS];
1239 memset (hist, 0, sizeof (hist));
1243 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1244 e = mheap_next_elt (e))
1246 uword n_user_data_bytes = mheap_elt_data_bytes (e);
1247 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
1255 s = format (s, "\n%U%=12s%=12s%=16s",
1256 format_white_space, indent + 2,
1257 "Size", "Count", "Fraction");
1259 for (i = 0; i < ARRAY_LEN (hist); i++)
1263 s = format (s, "\n%U%12d%12wd%16.4f",
1264 format_white_space, indent + 2,
1265 MHEAP_MIN_USER_DATA_BYTES +
1266 i * MHEAP_USER_DATA_WORD_BYTES, hist[i],
1267 (f64) hist[i] / (f64) n_hist);
1272 s = format (s, "\n%U%U",
1273 format_white_space, indent + 2, format_mheap_stats, h);
1275 if ((h->flags & MHEAP_FLAG_TRACE) && vec_len (h->trace_main.traces) > 0)
1277 /* Make a copy of traces since we'll be sorting them. */
1278 mheap_trace_t *t, *traces_copy;
1279 u32 indent, total_objects_traced;
1281 traces_copy = vec_dup (h->trace_main.traces);
1282 qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
1285 total_objects_traced = 0;
1286 s = format (s, "\n");
1287 vec_foreach (t, traces_copy)
1289 /* Skip over free elements. */
1290 if (t->n_allocations == 0)
1293 total_objects_traced += t->n_allocations;
1295 /* When not verbose only report allocations of more than 1k. */
1296 if (!verbose && t->n_bytes < 1024)
1299 if (t == traces_copy)
1300 s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
1302 s = format (s, "%9d%9d %p", t->n_bytes, t->n_allocations,
1304 indent = format_get_indent (s);
1305 for (i = 0; i < ARRAY_LEN (t->callers) && t->callers[i]; i++)
1308 s = format (s, "%U", format_white_space, indent);
1311 format (s, " %U\n", format_clib_elf_symbol_with_address,
1314 s = format (s, " %p\n", t->callers[i]);
1319 s = format (s, "%d total traced objects\n", total_objects_traced);
1321 vec_free (traces_copy);
1324 first_corrupt = mheap_first_corrupt (v);
1327 size = mheap_elt_data_bytes (first_corrupt);
1328 s = format (s, "\n first corrupt object: %p, size %wd\n %U",
1329 first_corrupt, size, format_hex_bytes, first_corrupt, size);
1332 /* FIXME. This output could be wrong in the unlikely case that format
1333 uses the same mheap as we are currently inspecting. */
1339 s = format (s, "\n");
1341 e = mheap_elt_at_uoffset (v, 0);
1346 s = format (s, "%8d: ", i);
1348 o = mheap_elt_uoffset (v, e);
1351 s = format (s, "(%8d) ", o);
1353 s = format (s, " %8d ", o);
1355 if ((i % 8) == 7 || (i + 1) >= h->n_elts)
1356 s = format (s, "\n");
1360 mheap_maybe_unlock (v);
1368 fformat (stderr, "%U", format_mheap, v, 1);
1372 mheap_validate_breakpoint ()
1378 mheap_validate (void *v)
1380 mheap_t *h = mheap_header (v);
1383 uword elt_count, elt_size;
1384 uword free_count_from_free_lists, free_size_from_free_lists;
1385 uword small_elt_free_count, small_elt_free_size;
1387 #define CHECK(x) if (! (x)) { mheap_validate_breakpoint (); os_panic (); }
1389 if (vec_len (v) == 0)
1392 mheap_maybe_lock (v);
1394 /* Validate number of elements and size. */
1395 free_size_from_free_lists = free_count_from_free_lists = 0;
1396 for (i = 0; i < ARRAY_LEN (h->first_free_elt_uoffset_by_bin); i++)
1401 CHECK ((h->first_free_elt_uoffset_by_bin[i] != MHEAP_GROUNDED)
1403 ((h->non_empty_free_elt_heads[i /
1404 BITS (uword)] & ((uword) 1 <<
1410 if (h->first_free_elt_uoffset_by_bin[i] == MHEAP_GROUNDED)
1413 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[i]);
1419 n = mheap_next_elt (e);
1421 /* Object must be marked free. */
1424 /* Next object's previous free bit must also be set. */
1425 CHECK (n->prev_is_free);
1428 CHECK (e->free_elt.prev_uoffset == MHEAP_GROUNDED);
1431 s = mheap_elt_data_bytes (e);
1432 CHECK (user_data_size_to_bin_index (s) == i);
1434 free_count_from_free_lists += 1;
1435 free_size_from_free_lists += s;
1437 if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
1440 n = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
1442 /* Check free element linkages. */
1443 CHECK (n->free_elt.prev_uoffset == mheap_elt_uoffset (v, e));
1449 /* Go through small object cache. */
1450 small_elt_free_count = small_elt_free_size = 0;
1451 for (i = 0; i < ARRAY_LEN (h->small_object_cache.bins.as_u8); i++)
1453 if (h->small_object_cache.bins.as_u8[i] != 0)
1456 uword b = h->small_object_cache.bins.as_u8[i] - 1;
1457 uword o = h->small_object_cache.offsets[i];
1460 e = mheap_elt_at_uoffset (v, o);
1462 /* Object must be allocated. */
1463 CHECK (!e->is_free);
1465 s = mheap_elt_data_bytes (e);
1466 CHECK (user_data_size_to_bin_index (s) == b);
1468 small_elt_free_count += 1;
1469 small_elt_free_size += s;
1475 uword elt_free_size, elt_free_count;
1477 elt_count = elt_size = elt_free_size = elt_free_count = 0;
1478 for (e = v; e->n_user_data != MHEAP_N_USER_DATA_INVALID; e = n)
1480 if (e->prev_n_user_data != MHEAP_N_USER_DATA_INVALID)
1481 CHECK (e->prev_n_user_data * sizeof (e->user_data[0]) >=
1482 MHEAP_MIN_USER_DATA_BYTES);
1484 CHECK (e->n_user_data * sizeof (e->user_data[0]) >=
1485 MHEAP_MIN_USER_DATA_BYTES);
1487 n = mheap_next_elt (e);
1489 CHECK (e->is_free == n->prev_is_free);
1492 s = mheap_elt_data_bytes (e);
1501 /* Consecutive free objects should have been combined. */
1502 CHECK (!(e->prev_is_free && n->prev_is_free));
1505 CHECK (free_count_from_free_lists == elt_free_count);
1506 CHECK (free_size_from_free_lists == elt_free_size);
1507 CHECK (elt_count == h->n_elts + elt_free_count + small_elt_free_count);
1508 CHECK (elt_size + (elt_count + 1) * MHEAP_ELT_OVERHEAD_BYTES ==
1515 for (e = v; e->n_user_data == MHEAP_N_USER_DATA_INVALID; e = n)
1517 n = mheap_next_elt (e);
1518 CHECK (e->n_user_data == n->prev_n_user_data);
1524 mheap_maybe_unlock (v);
1526 h->validate_serial += 1;
1530 mheap_get_trace (void *v, uword offset, uword size)
1533 mheap_trace_main_t *tm;
1535 uword i, n_callers, trace_index, *p;
1536 mheap_trace_t trace;
1538 /* Spurious Coverity warnings be gone. */
1539 memset (&trace, 0, sizeof (trace));
1541 n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers),
1542 /* Skip mheap_get_aligned's frame */ 1);
1546 for (i = n_callers; i < ARRAY_LEN (trace.callers); i++)
1547 trace.callers[i] = 0;
1549 h = mheap_header (v);
1550 tm = &h->trace_main;
1552 if (!tm->trace_by_callers)
1553 tm->trace_by_callers =
1554 hash_create_shmem (0, sizeof (trace.callers), sizeof (uword));
1556 p = hash_get_mem (tm->trace_by_callers, &trace.callers);
1560 t = tm->traces + trace_index;
1564 i = vec_len (tm->trace_free_list);
1567 trace_index = tm->trace_free_list[i - 1];
1568 _vec_len (tm->trace_free_list) = i - 1;
1572 mheap_trace_t *old_start = tm->traces;
1573 mheap_trace_t *old_end = vec_end (tm->traces);
1575 vec_add2 (tm->traces, t, 1);
1577 if (tm->traces != old_start)
1582 hash_foreach_pair (p, tm->trace_by_callers,
1584 q = uword_to_pointer (p->key, mheap_trace_t *);
1585 ASSERT (q >= old_start && q < old_end);
1586 p->key = pointer_to_uword (tm->traces + (q - old_start));
1590 trace_index = t - tm->traces;
1593 t = tm->traces + trace_index;
1595 t->n_allocations = 0;
1597 hash_set_mem (tm->trace_by_callers, t->callers, trace_index);
1600 t->n_allocations += 1;
1602 t->offset = offset; /* keep a sample to autopsy */
1603 hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
1607 mheap_put_trace (void *v, uword offset, uword size)
1610 mheap_trace_main_t *tm;
1612 uword trace_index, *p;
1614 h = mheap_header (v);
1615 tm = &h->trace_main;
1616 p = hash_get (tm->trace_index_by_offset, offset);
1621 hash_unset (tm->trace_index_by_offset, offset);
1622 ASSERT (trace_index < vec_len (tm->traces));
1624 t = tm->traces + trace_index;
1625 ASSERT (t->n_allocations > 0);
1626 ASSERT (t->n_bytes >= size);
1627 t->n_allocations -= 1;
1629 if (t->n_allocations == 0)
1631 hash_unset_mem (tm->trace_by_callers, t->callers);
1632 vec_add1 (tm->trace_free_list, trace_index);
1633 memset (t, 0, sizeof (t[0]));
1638 mheap_trace_sort (const void *_t1, const void *_t2)
1640 const mheap_trace_t *t1 = _t1;
1641 const mheap_trace_t *t2 = _t2;
1644 cmp = (word) t2->n_bytes - (word) t1->n_bytes;
1646 cmp = (word) t2->n_allocations - (word) t1->n_allocations;
1651 mheap_trace_main_free (mheap_trace_main_t * tm)
1653 vec_free (tm->traces);
1654 vec_free (tm->trace_free_list);
1655 hash_free (tm->trace_by_callers);
1656 hash_free (tm->trace_index_by_offset);
1660 mheap_trace (void *v, int enable)
1664 h = mheap_header (v);
1668 h->flags |= MHEAP_FLAG_TRACE;
1672 mheap_trace_main_free (&h->trace_main);
1673 h->flags &= ~MHEAP_FLAG_TRACE;
1678 * fd.io coding-style-patch-verification: ON
1681 * eval: (c-set-style "gnu")