2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 #include <vppinfra/bitops.h>
39 #include <vppinfra/hash.h>
40 #include <vppinfra/format.h>
41 #include <vppinfra/mheap.h>
42 #include <vppinfra/os.h>
43 #include <vppinfra/time.h>
46 #include <vppinfra/elf_clib.h>
49 static void mheap_get_trace (void * v, uword offset, uword size);
50 static void mheap_put_trace (void * v, uword offset, uword size);
51 static int mheap_trace_sort (const void * t1, const void * t2);
53 always_inline void mheap_maybe_lock (void * v)
55 mheap_t * h = mheap_header (v);
56 if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
58 u32 my_cpu = os_get_cpu_number();
59 if (h->owner_cpu == my_cpu)
65 while (__sync_lock_test_and_set (&h->lock, 1))
68 h->owner_cpu = my_cpu;
69 h->recursion_count = 1;
73 always_inline void mheap_maybe_unlock (void * v)
75 mheap_t * h = mheap_header (v);
76 if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
78 ASSERT(os_get_cpu_number() == h->owner_cpu);
79 if (--h->recursion_count == 0)
82 CLIB_MEMORY_BARRIER();
88 /* Find bin for objects with size at least n_user_data_bytes. */
90 user_data_size_to_bin_index (uword n_user_data_bytes)
92 uword n_user_data_words;
93 word small_bin, large_bin;
95 /* User size must be at least big enough to hold free elt. */
96 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
99 n_user_data_words = (round_pow2 (n_user_data_bytes, MHEAP_USER_DATA_WORD_BYTES)
100 / MHEAP_USER_DATA_WORD_BYTES);
102 ASSERT (n_user_data_words > 0);
103 small_bin = n_user_data_words - (MHEAP_MIN_USER_DATA_BYTES / MHEAP_USER_DATA_WORD_BYTES);
104 ASSERT (small_bin >= 0);
106 large_bin = MHEAP_N_SMALL_OBJECT_BINS + max_log2 (n_user_data_bytes) - MHEAP_LOG2_N_SMALL_OBJECT_BINS;
108 return small_bin < MHEAP_N_SMALL_OBJECT_BINS ? small_bin : large_bin;
112 mheap_elt_size_to_user_n_bytes (uword n_bytes)
114 ASSERT (n_bytes >= sizeof (mheap_elt_t));
115 return (n_bytes - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
118 always_inline uword __attribute__((unused))
119 mheap_elt_size_to_user_n_words (uword n_bytes)
121 ASSERT (n_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
122 return mheap_elt_size_to_user_n_bytes (n_bytes) / MHEAP_USER_DATA_WORD_BYTES;
126 mheap_elt_set_size (void * v,
128 uword n_user_data_bytes,
131 mheap_elt_t * e, * n;
133 e = mheap_elt_at_uoffset (v, uoffset);
135 ASSERT (n_user_data_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
136 e->n_user_data = n_user_data_bytes / MHEAP_USER_DATA_WORD_BYTES;
137 e->is_free = is_free;
138 ASSERT (e->prev_n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
140 n = mheap_next_elt (e);
141 n->prev_n_user_data = e->n_user_data;
142 n->prev_is_free = is_free;
145 always_inline void set_first_free_elt_offset (mheap_t * h, uword bin, uword uoffset)
149 h->first_free_elt_uoffset_by_bin[bin] = uoffset;
151 i0 = bin / BITS (h->non_empty_free_elt_heads[0]);
152 i1 = (uword) 1 << (uword) (bin % BITS (h->non_empty_free_elt_heads[0]));
154 ASSERT (i0 < ARRAY_LEN (h->non_empty_free_elt_heads));
155 if (h->first_free_elt_uoffset_by_bin[bin] == ~0)
156 h->non_empty_free_elt_heads[i0] &= ~i1;
158 h->non_empty_free_elt_heads[i0] |= i1;
162 set_free_elt (void * v, uword uoffset, uword n_user_data_bytes)
164 mheap_t * h = mheap_header (v);
165 mheap_elt_t * e = mheap_elt_at_uoffset (v, uoffset);
166 mheap_elt_t * n = mheap_next_elt (e);
167 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
169 ASSERT (n->prev_is_free);
172 e->free_elt.prev_uoffset = ~0;
173 e->free_elt.next_uoffset = h->first_free_elt_uoffset_by_bin[bin];
175 /* Fill in next free elt's previous pointer. */
176 if (e->free_elt.next_uoffset != ~0)
178 mheap_elt_t * nf = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
179 ASSERT (nf->is_free);
180 nf->free_elt.prev_uoffset = uoffset;
183 set_first_free_elt_offset (h, bin, uoffset);
187 new_free_elt (void * v, uword uoffset, uword n_user_data_bytes)
189 mheap_elt_set_size (v, uoffset, n_user_data_bytes, /* is_free */ 1);
190 set_free_elt (v, uoffset, n_user_data_bytes);
194 remove_free_elt (void * v, mheap_elt_t * e, uword bin)
196 mheap_t * h = mheap_header (v);
197 mheap_elt_t * p, * n;
200 no = e->free_elt.next_uoffset;
201 n = no != ~0 ? mheap_elt_at_uoffset (v, no) : 0;
202 po = e->free_elt.prev_uoffset;
203 p = po != ~0 ? mheap_elt_at_uoffset (v, po) : 0;
206 set_first_free_elt_offset (h, bin, no);
208 p->free_elt.next_uoffset = no;
211 n->free_elt.prev_uoffset = po;
215 remove_free_elt2 (void * v, mheap_elt_t * e)
218 bin = user_data_size_to_bin_index (mheap_elt_data_bytes (e));
219 remove_free_elt (v, e, bin);
222 #define MHEAP_VM_MAP (1 << 0)
223 #define MHEAP_VM_UNMAP (1 << 1)
224 #define MHEAP_VM_NOMAP (0 << 1)
225 #define MHEAP_VM_ROUND (1 << 2)
226 #define MHEAP_VM_ROUND_UP MHEAP_VM_ROUND
227 #define MHEAP_VM_ROUND_DOWN (0 << 2)
229 static uword mheap_page_size;
231 static_always_inline uword mheap_page_round (uword addr)
232 { return (addr + mheap_page_size - 1) &~ (mheap_page_size - 1); }
234 static_always_inline uword mheap_page_truncate (uword addr)
235 { return addr &~ (mheap_page_size - 1); }
237 static_always_inline uword
240 clib_address_t start_addr,
243 mheap_t * h = mheap_header (v);
244 clib_address_t start_page, end_page, end_addr;
247 ASSERT (! (h->flags & MHEAP_FLAG_DISABLE_VM));
249 end_addr = start_addr + size;
251 /* Round start/end address up to page boundary. */
252 start_page = mheap_page_round (start_addr);
254 if ((flags & MHEAP_VM_ROUND) == MHEAP_VM_ROUND_UP)
255 end_page = mheap_page_round (end_addr);
257 end_page = mheap_page_truncate (end_addr);
260 if (end_page > start_page)
262 mapped_bytes = end_page - start_page;
263 if (flags & MHEAP_VM_MAP)
264 clib_mem_vm_map ((void *) start_page, end_page - start_page);
265 else if (flags & MHEAP_VM_UNMAP)
266 clib_mem_vm_unmap ((void *) start_page, end_page - start_page);
272 static_always_inline uword
273 mheap_vm_elt (void * v, uword flags, uword offset)
276 clib_address_t start_addr, end_addr;
278 e = mheap_elt_at_uoffset (v, offset);
279 start_addr = (clib_address_t) ((void *) e->user_data);
280 end_addr = (clib_address_t) mheap_next_elt (e);
281 return mheap_vm (v, flags, start_addr, end_addr - start_addr);
285 mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
289 /* $$$$ ELIOT FIXME: add Altivec version of this routine */
290 #if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__)
293 u8x16 b = u8x16_splat (bin);
297 #define _(i) ((uword) u8x16_compare_byte_mask (u8x16_is_equal (b, c->bins.as_u8x16[i])) << (uword) ((i)*16))
298 mask = _ (0) | _ (1);
299 if (BITS (uword) > 32)
300 mask |= _ (2) | _ (3);
308 mheap_get_small_object (mheap_t * h, uword bin)
310 mheap_small_object_cache_t * c = &h->small_object_cache;
311 uword mask = mheap_small_object_cache_mask (c, bin + 1);
316 uword i = min_log2 (mask);
317 uword o = c->offsets[i];
319 c->bins.as_u8[i] = 0;
327 mheap_put_small_object (mheap_t * h, uword bin, uword offset)
329 mheap_small_object_cache_t * c = &h->small_object_cache;
330 uword free_mask = mheap_small_object_cache_mask (c, 0);
336 i = min_log2 (free_mask);
337 c->bins.as_u8[i] = b;
338 c->offsets[i] = offset;
342 /* Nothing free with right size: cyclic replacement. */
346 i = c->replacement_index++;
348 c->bins.as_u8[i] = b;
349 old_offset = c->offsets[i];
350 c->offsets[i] = offset;
352 /* Return old offset so it can be freed. */
358 mheap_get_search_free_bin (void * v,
360 uword * n_user_data_bytes_arg,
364 mheap_t * h = mheap_header (v);
367 /* Free object is at offset f0 ... f1;
368 Allocatted object is at offset o0 ... o1. */
369 word o0, o1, f0, f1, search_n_user_data_bytes;
370 word lo_free_usize, hi_free_usize;
372 ASSERT (h->first_free_elt_uoffset_by_bin[bin] != ~0);
373 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[bin]);
375 search_n_user_data_bytes = *n_user_data_bytes_arg;
377 /* Silence compiler warning. */
378 o0 = o1 = f0 = f1 = 0;
380 h->stats.free_list.n_search_attempts += 1;
382 /* Find an object that is large enough with correct alignment at given alignment offset. */
385 uword this_object_n_user_data_bytes = mheap_elt_data_bytes (e);
388 if (bin < MHEAP_N_SMALL_OBJECT_BINS)
389 ASSERT (this_object_n_user_data_bytes >= search_n_user_data_bytes);
391 h->stats.free_list.n_objects_searched += 1;
393 if (this_object_n_user_data_bytes < search_n_user_data_bytes)
396 /* Bounds of free object: from f0 to f1. */
397 f0 = ((void *) e->user_data - v);
398 f1 = f0 + this_object_n_user_data_bytes;
400 /* Place candidate object at end of free block and align as requested. */
401 o0 = ((f1 - search_n_user_data_bytes) &~ (align - 1)) - align_offset;
405 /* Make sure that first free fragment is either empty or
406 large enough to be valid. */
409 lo_free_usize = o0 != f0 ? o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES : 0;
410 if (o0 <= f0 || lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES)
415 o1 = o0 + search_n_user_data_bytes;
418 if (o0 >= f0 && o1 <= f1)
422 /* Reached end of free list without finding large enough object. */
423 if (e->free_elt.next_uoffset == ~0)
426 /* Otherwise keep searching for large enough object. */
427 e = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
431 /* Free fragment at end. */
432 hi_free_usize = f1 != o1 ? f1 - o1 - MHEAP_ELT_OVERHEAD_BYTES : 0;
434 /* If fragment at end is too small to be a new object,
435 give user's object a bit more space than requested. */
436 if (hi_free_usize < (word) MHEAP_MIN_USER_DATA_BYTES)
438 search_n_user_data_bytes += f1 - o1;
443 /* Need to make sure that relevant memory areas are mapped. */
444 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
446 mheap_elt_t * f0_elt = mheap_elt_at_uoffset (v, f0);
447 mheap_elt_t * f1_elt = mheap_elt_at_uoffset (v, f1);
448 mheap_elt_t * o0_elt = mheap_elt_at_uoffset (v, o0);
449 mheap_elt_t * o1_elt = mheap_elt_at_uoffset (v, o1);
451 uword f0_page_start, f0_page_end;
452 uword o0_page_start, o0_page_end;
454 /* Free elt is mapped. Addresses after that may not be mapped. */
455 f0_page_start = mheap_page_round (pointer_to_uword (f0_elt->user_data));
456 f0_page_end = mheap_page_truncate (pointer_to_uword (f1_elt));
458 o0_page_start = mheap_page_truncate (pointer_to_uword (o0_elt));
459 o0_page_end = mheap_page_round (pointer_to_uword (o1_elt->user_data));
461 if (o0_page_start < f0_page_start)
462 o0_page_start = f0_page_start;
463 if (o0_page_end > f0_page_end)
464 o0_page_end = f0_page_end;
466 if (o0_page_end > o0_page_start)
467 clib_mem_vm_map (uword_to_pointer (o0_page_start, void *),
468 o0_page_end - o0_page_start);
471 /* Remove free object from free list. */
472 remove_free_elt (v, e, bin);
474 /* Free fragment at begining. */
475 if (lo_free_usize > 0)
477 ASSERT (lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES);
478 mheap_elt_set_size (v, f0, lo_free_usize, /* is_free */ 1);
479 new_free_elt (v, f0, lo_free_usize);
482 mheap_elt_set_size (v, o0, search_n_user_data_bytes, /* is_free */ 0);
484 if (hi_free_usize > 0)
486 uword uo = o1 + MHEAP_ELT_OVERHEAD_BYTES;
487 mheap_elt_set_size (v, uo, hi_free_usize, /* is_free */ 1);
488 new_free_elt (v, uo, hi_free_usize);
491 /* Return actual size of block. */
492 *n_user_data_bytes_arg = search_n_user_data_bytes;
494 h->stats.free_list.n_objects_found += 1;
499 /* Search free lists for object with given size and alignment. */
501 mheap_get_search_free_list (void * v,
502 uword * n_user_bytes_arg,
506 mheap_t * h = mheap_header (v);
507 uword bin, n_user_bytes, i, bi;
509 n_user_bytes = *n_user_bytes_arg;
510 bin = user_data_size_to_bin_index (n_user_bytes);
512 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
513 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE)
515 && align == STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
516 && align_offset == 0)
518 uword r = mheap_get_small_object (h, bin);
519 h->stats.n_small_object_cache_attempts += 1;
522 h->stats.n_small_object_cache_hits += 1;
527 for (i = bin / BITS (uword); i < ARRAY_LEN (h->non_empty_free_elt_heads); i++)
529 uword non_empty_bin_mask = h->non_empty_free_elt_heads[i];
531 /* No need to search smaller bins. */
532 if (i == bin / BITS (uword))
533 non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
535 /* Search each occupied free bin which is large enough. */
536 foreach_set_bit (bi, non_empty_bin_mask, ({
537 uword r = mheap_get_search_free_bin (v, bi + i * BITS (uword), n_user_bytes_arg, align, align_offset);
546 static never_inline void *
547 mheap_get_extend_vector (void * v,
548 uword n_user_data_bytes,
551 uword * offset_return)
553 /* Bounds of free and allocated objects (as above). */
554 uword f0, f1, o0, o1;
556 mheap_t * h = mheap_header (v);
559 if (_vec_len (v) == 0)
561 _vec_len (v) = MHEAP_ELT_OVERHEAD_BYTES;
563 /* Create first element of heap. */
564 e = mheap_elt_at_uoffset (v, _vec_len (v));
565 e->prev_n_user_data = MHEAP_N_USER_DATA_INVALID;
570 o0 = round_pow2 (f0, align) - align_offset;
573 free_size = o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES;
574 if (o0 == f0 || free_size >= (word) sizeof (mheap_elt_t))
580 o1 = o0 + n_user_data_bytes;
581 f1 = o1 + MHEAP_ELT_OVERHEAD_BYTES;
584 h = mheap_header (v);
586 /* Make sure we have space for object plus overhead. */
587 if (f1 > h->max_size)
595 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
597 mheap_elt_t * f0_elt = mheap_elt_at_uoffset (v, f0);
598 mheap_elt_t * f1_elt = mheap_elt_at_uoffset (v, f1);
600 uword f0_page = mheap_page_round (pointer_to_uword (f0_elt->user_data));
601 uword f1_page = mheap_page_round (pointer_to_uword (f1_elt->user_data));
603 if (f1_page > f0_page)
604 mheap_vm (v, MHEAP_VM_MAP, f0_page, f1_page - f0_page);
608 new_free_elt (v, f0, free_size);
610 mheap_elt_set_size (v, o0, n_user_data_bytes, /* is_free */ 0);
612 /* Mark last element. */
613 e = mheap_elt_at_uoffset (v, f1);
614 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
621 void * mheap_get_aligned (void * v,
622 uword n_user_data_bytes,
625 uword * offset_return)
631 cpu_times[0] = clib_cpu_time_now ();
633 align = clib_max (align, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
634 align = max_pow2 (align);
636 /* Correct align offset to be smaller than alignment. */
637 align_offset &= (align - 1);
639 /* Align offset must be multiple of minimum object size. */
640 if (align_offset % STRUCT_SIZE_OF (mheap_elt_t, user_data[0]) != 0)
646 /* Round requested size. */
647 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
648 n_user_data_bytes = round_pow2 (n_user_data_bytes, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
651 v = mheap_alloc (0, 64 << 20);
653 mheap_maybe_lock (v);
655 h = mheap_header (v);
657 if (h->flags & MHEAP_FLAG_VALIDATE)
660 /* First search free lists for object. */
661 offset = mheap_get_search_free_list (v, &n_user_data_bytes, align, align_offset);
663 h = mheap_header (v);
665 /* If that fails allocate object at end of heap by extending vector. */
666 if (offset == ~0 && _vec_len (v) < h->max_size)
668 v = mheap_get_extend_vector (v, n_user_data_bytes, align, align_offset, &offset);
669 h = mheap_header (v);
670 h->stats.n_vector_expands += offset != ~0;
673 *offset_return = offset;
678 if (h->flags & MHEAP_FLAG_TRACE)
680 /* Recursion block for case when we are traceing main clib heap. */
681 h->flags &= ~MHEAP_FLAG_TRACE;
683 mheap_get_trace (v, offset, n_user_data_bytes);
685 h->flags |= MHEAP_FLAG_TRACE;
689 if (h->flags & MHEAP_FLAG_VALIDATE)
692 mheap_maybe_unlock (v);
694 cpu_times[1] = clib_cpu_time_now ();
695 h->stats.n_clocks_get += cpu_times[1] - cpu_times[0];
696 h->stats.n_gets += 1;
701 static void free_last_elt (void * v, mheap_elt_t * e)
703 mheap_t * h = mheap_header (v);
705 /* Possibly delete preceeding free element also. */
708 e = mheap_prev_elt (e);
709 remove_free_elt2 (v, e);
712 if (e->prev_n_user_data == MHEAP_N_USER_DATA_INVALID)
714 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
715 mheap_vm_elt (v, MHEAP_VM_UNMAP, mheap_elt_uoffset (v, e));
720 uword uo = mheap_elt_uoffset (v, e);
721 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
722 mheap_vm_elt (v, MHEAP_VM_UNMAP, uo);
723 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
728 void mheap_put (void * v, uword uoffset)
731 uword n_user_data_bytes, bin;
732 mheap_elt_t * e, * n;
733 uword trace_uoffset, trace_n_user_data_bytes;
736 cpu_times[0] = clib_cpu_time_now ();
738 h = mheap_header (v);
740 mheap_maybe_lock (v);
742 if (h->flags & MHEAP_FLAG_VALIDATE)
745 ASSERT (h->n_elts > 0);
747 h->stats.n_puts += 1;
749 e = mheap_elt_at_uoffset (v, uoffset);
750 n = mheap_next_elt (e);
751 n_user_data_bytes = mheap_elt_data_bytes (e);
753 trace_uoffset = uoffset;
754 trace_n_user_data_bytes = n_user_data_bytes;
756 bin = user_data_size_to_bin_index (n_user_data_bytes);
757 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
759 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE))
761 uoffset = mheap_put_small_object (h, bin, uoffset);
765 e = mheap_elt_at_uoffset (v, uoffset);
766 n = mheap_next_elt (e);
767 n_user_data_bytes = mheap_elt_data_bytes (e);
770 /* Assert that forward and back pointers are equal. */
771 if (e->n_user_data != n->prev_n_user_data)
774 /* Forward and backwards is_free must agree. */
775 if (e->is_free != n->prev_is_free)
778 /* Object was already freed. */
782 /* Special case: delete last element in heap. */
783 if (n->n_user_data == MHEAP_N_USER_DATA_INVALID)
784 free_last_elt (v, e);
788 uword f0, f1, n_combine;
791 f1 = f0 + n_user_data_bytes;
796 mheap_elt_t * p = mheap_prev_elt (e);
797 f0 = mheap_elt_uoffset (v, p);
798 remove_free_elt2 (v, p);
804 mheap_elt_t * m = mheap_next_elt (n);
806 remove_free_elt2 (v, n);
811 mheap_elt_set_size (v, f0, f1 - f0, /* is_free */ 1);
813 e->is_free = n->prev_is_free = 1;
814 set_free_elt (v, f0, f1 - f0);
816 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
817 mheap_vm_elt (v, MHEAP_VM_UNMAP, f0);
821 h = mheap_header (v);
823 if (h->flags & MHEAP_FLAG_TRACE)
825 /* Recursion block for case when we are traceing main clib heap. */
826 h->flags &= ~MHEAP_FLAG_TRACE;
828 mheap_put_trace (v, trace_uoffset, trace_n_user_data_bytes);
830 h->flags |= MHEAP_FLAG_TRACE;
833 if (h->flags & MHEAP_FLAG_VALIDATE)
836 mheap_maybe_unlock (v);
838 cpu_times[1] = clib_cpu_time_now ();
839 h->stats.n_clocks_put += cpu_times[1] - cpu_times[0];
842 void * mheap_alloc_with_flags (void * memory, uword memory_size, uword flags)
848 if (! mheap_page_size)
849 mheap_page_size = clib_mem_get_page_size ();
853 /* No memory given, try to VM allocate some. */
854 memory = clib_mem_vm_alloc (memory_size);
858 /* No memory region implies we have virtual memory. */
859 flags &= ~MHEAP_FLAG_DISABLE_VM;
862 /* Make sure that given memory is page aligned. */
866 am = pointer_to_uword (memory);
867 av = mheap_page_round (am);
868 v = uword_to_pointer (av, void *);
869 h = mheap_header (v);
870 ah = pointer_to_uword (h);
872 ah += mheap_page_size;
874 h = uword_to_pointer (ah, void *);
875 v = mheap_vector (h);
877 size = memory + memory_size - v;
880 /* VM map header so we can use memory. */
881 if (! (flags & MHEAP_FLAG_DISABLE_VM))
882 clib_mem_vm_map (h, sizeof (h[0]));
884 /* Zero vector header: both heap header and vector length. */
885 memset (h, 0, sizeof (h[0]));
888 h->vm_alloc_offset_from_header = (void *) h - memory;
889 h->vm_alloc_size = memory_size;
894 /* Set flags based on those given less builtin-flags. */
895 h->flags |= (flags &~ MHEAP_FLAG_TRACE);
897 /* Unmap remainder of heap until we will be ready to use it. */
898 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
899 mheap_vm (v, MHEAP_VM_UNMAP | MHEAP_VM_ROUND_UP,
900 (clib_address_t) v, h->max_size);
902 /* Initialize free list heads to empty. */
903 memset (h->first_free_elt_uoffset_by_bin, ~0, sizeof (h->first_free_elt_uoffset_by_bin));
908 void * mheap_alloc (void * memory, uword size)
913 flags |= MHEAP_FLAG_DISABLE_VM;
915 #ifdef CLIB_HAVE_VEC128
916 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
919 return mheap_alloc_with_flags (memory, size, flags);
922 void * _mheap_free (void * v)
924 mheap_t * h = mheap_header (v);
927 clib_mem_vm_free ((void *) h - h->vm_alloc_offset_from_header, h->vm_alloc_size);
932 /* Call user's function with each object in heap. */
933 void mheap_foreach (void * v,
934 uword (* func) (void * arg, void * v, void * elt_data, uword elt_size),
938 u8 * stack_heap, * clib_mem_mheap_save;
939 u8 tmp_heap_memory[16*1024];
941 mheap_maybe_lock (v);
943 if (vec_len (v) == 0)
946 clib_mem_mheap_save = 0;
949 /* Allocate a new temporary heap on the stack.
950 This is so that our hash table & user's callback function can
951 themselves allocate memory somewhere without getting in the way
952 of the heap we are looking at. */
953 if (v == clib_mem_get_heap ())
955 stack_heap = mheap_alloc (tmp_heap_memory, sizeof (tmp_heap_memory));
956 clib_mem_mheap_save = v;
957 clib_mem_set_heap (stack_heap);
961 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
962 e = mheap_next_elt (e))
964 void * p = mheap_elt_data (v, e);
967 if ((* func) (arg, v, p, mheap_elt_data_bytes (e)))
971 /* Restore main CLIB heap. */
972 if (clib_mem_mheap_save)
973 clib_mem_set_heap (clib_mem_mheap_save);
976 mheap_maybe_unlock (v);
979 /* Bytes in mheap header overhead not including data bytes. */
981 mheap_bytes_overhead (void * v)
983 mheap_t * h = mheap_header (v);
984 return v ? sizeof (h[0]) + h->n_elts * sizeof (mheap_elt_t) : 0;
987 /* Total number of bytes including both data and overhead. */
988 uword mheap_bytes (void * v)
989 { return mheap_bytes_overhead (v) + vec_bytes (v); }
991 static void mheap_usage_no_lock (void * v, clib_mem_usage_t * usage)
993 mheap_t * h = mheap_header (v);
994 uword used = 0, free = 0, free_vm_unmapped = 0;
1001 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1002 e = mheap_next_elt (e))
1004 uword size = mheap_elt_data_bytes (e);
1008 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
1010 mheap_vm_elt (v, MHEAP_VM_NOMAP, mheap_elt_uoffset (v, e));
1017 usage->object_count = mheap_elts (v);
1018 usage->bytes_total = mheap_bytes (v);
1019 usage->bytes_overhead = mheap_bytes_overhead (v);
1020 usage->bytes_max = mheap_max_size (v);
1021 usage->bytes_used = used;
1022 usage->bytes_free = free;
1023 usage->bytes_free_reclaimed = free_vm_unmapped;
1026 void mheap_usage (void * v, clib_mem_usage_t * usage)
1028 mheap_maybe_lock (v);
1029 mheap_usage_no_lock (v, usage);
1030 mheap_maybe_unlock (v);
1033 static u8 * format_mheap_byte_count (u8 * s, va_list * va)
1035 uword n_bytes = va_arg (*va, uword);
1037 return format (s, "%wd", n_bytes);
1039 return format (s, "%wdk", n_bytes / 1024);
1042 /* Returns first corrupt heap element. */
1043 static mheap_elt_t * mheap_first_corrupt (void * v)
1045 mheap_elt_t * e, * n;
1047 if (vec_len (v) == 0)
1053 if (e->n_user_data == MHEAP_N_USER_DATA_INVALID)
1056 n = mheap_next_elt (e);
1058 if (e->n_user_data != n->prev_n_user_data)
1061 if (e->is_free != n->prev_is_free)
1070 static u8 * format_mheap_stats (u8 * s, va_list * va)
1072 mheap_t * h = va_arg (*va, mheap_t *);
1073 mheap_stats_t * st = &h->stats;
1074 uword indent = format_get_indent (s);
1076 s = format (s, "alloc. from small object cache: %Ld hits %Ld attempts (%.2f%%) replacements %d",
1077 st->n_small_object_cache_hits,
1078 st->n_small_object_cache_attempts,
1079 (st->n_small_object_cache_attempts != 0
1080 ? 100. * (f64) st->n_small_object_cache_hits / (f64) st->n_small_object_cache_attempts
1082 h->small_object_cache.replacement_index);
1084 s = format (s, "\n%Ualloc. from free-list: %Ld attempts, %Ld hits (%.2f%%), %Ld considered (per-attempt %.2f)",
1085 format_white_space, indent,
1086 st->free_list.n_search_attempts,
1087 st->free_list.n_objects_found,
1088 (st->free_list.n_search_attempts != 0
1089 ? 100. * (f64) st->free_list.n_objects_found / (f64) st->free_list.n_search_attempts
1091 st->free_list.n_objects_searched,
1092 (st->free_list.n_search_attempts != 0
1093 ? (f64) st->free_list.n_objects_searched / (f64) st->free_list.n_search_attempts
1096 s = format (s, "\n%Ualloc. from vector-expand: %Ld",
1097 format_white_space, indent,
1098 st->n_vector_expands);
1100 s = format (s, "\n%Uallocs: %Ld %.2f clocks/call",
1101 format_white_space, indent,
1103 (f64) st->n_clocks_get / (f64) st->n_gets);
1105 s = format (s, "\n%Ufrees: %Ld %.2f clocks/call",
1106 format_white_space, indent,
1108 (f64) st->n_clocks_put / (f64) st->n_puts);
1113 u8 * format_mheap (u8 * s, va_list * va)
1115 void * v = va_arg (*va, u8 *);
1116 int verbose = va_arg (*va, int);
1119 uword i, size, indent;
1120 clib_mem_usage_t usage;
1121 mheap_elt_t * first_corrupt;
1123 mheap_maybe_lock (v);
1125 h = mheap_header (v);
1127 mheap_usage_no_lock (v, &usage);
1129 indent = format_get_indent (s);
1131 s = format (s, "%d objects, %U of %U used, %U free, %U reclaimed, %U overhead",
1133 format_mheap_byte_count, usage.bytes_used,
1134 format_mheap_byte_count, usage.bytes_total,
1135 format_mheap_byte_count, usage.bytes_free,
1136 format_mheap_byte_count, usage.bytes_free_reclaimed,
1137 format_mheap_byte_count, usage.bytes_overhead);
1139 if (usage.bytes_max != ~0)
1140 s = format (s, ", %U capacity", format_mheap_byte_count, usage.bytes_max);
1142 /* Show histogram of sizes. */
1145 uword hist[MHEAP_N_BINS];
1149 memset (hist, 0, sizeof (hist));
1153 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1154 e = mheap_next_elt (e))
1156 uword n_user_data_bytes = mheap_elt_data_bytes (e);
1157 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
1165 s = format (s, "\n%U%=12s%=12s%=16s",
1166 format_white_space, indent + 2,
1167 "Size", "Count", "Fraction");
1169 for (i = 0; i < ARRAY_LEN (hist); i++)
1173 s = format (s, "\n%U%12d%12wd%16.4f",
1174 format_white_space, indent + 2,
1175 MHEAP_MIN_USER_DATA_BYTES + i * MHEAP_USER_DATA_WORD_BYTES,
1177 (f64) hist[i] / (f64) n_hist);
1182 s = format (s, "\n%U%U",
1183 format_white_space, indent + 2,
1184 format_mheap_stats, h);
1186 if ((h->flags & MHEAP_FLAG_TRACE) && vec_len (h->trace_main.traces) > 0)
1188 /* Make a copy of traces since we'll be sorting them. */
1189 mheap_trace_t * t, * traces_copy;
1190 uword indent, total_objects_traced;
1192 traces_copy = vec_dup (h->trace_main.traces);
1193 qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
1196 total_objects_traced = 0;
1197 s = format (s, "\n");
1198 vec_foreach (t, traces_copy) {
1199 /* Skip over free elements. */
1200 if (t->n_allocations == 0)
1203 total_objects_traced += t->n_allocations;
1205 /* When not verbose only report allocations of more than 1k. */
1206 if (! verbose && t->n_bytes < 1024)
1209 if (t == traces_copy)
1210 s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
1212 s = format (s, "%9d%9d %p", t->n_bytes, t->n_allocations,
1214 indent = format_get_indent (s);
1215 for (i = 0; i < ARRAY_LEN (t->callers) && t->callers[i]; i++)
1218 s = format (s, "%U", format_white_space, indent);
1220 s = format (s, " %U\n", format_clib_elf_symbol_with_address, t->callers[i]);
1222 s = format (s, " %p\n", t->callers[i]);
1227 s = format (s, "%d total traced objects\n", total_objects_traced);
1229 vec_free (traces_copy);
1232 first_corrupt = mheap_first_corrupt (v);
1235 size = mheap_elt_data_bytes (first_corrupt);
1236 s = format (s, "\n first corrupt object: %p, size %wd\n %U",
1239 format_hex_bytes, first_corrupt, size);
1242 /* FIXME. This output could be wrong in the unlikely case that format
1243 uses the same mheap as we are currently inspecting. */
1249 s = format (s, "\n");
1251 e = mheap_elt_at_uoffset (v, 0);
1256 s = format (s, "%8d: ", i);
1258 o = mheap_elt_uoffset (v, e);
1261 s = format (s, "(%8d) ", o);
1263 s = format (s, " %8d ", o);
1265 if ((i % 8) == 7 || (i + 1) >= h->n_elts)
1266 s = format (s, "\n");
1270 mheap_maybe_unlock (v);
1276 { fformat (stderr, "%U", format_mheap, v, 1); }
1278 static void mheap_validate_breakpoint ()
1281 void mheap_validate (void * v)
1283 mheap_t * h = mheap_header (v);
1286 uword elt_count, elt_size;
1287 uword free_count_from_free_lists, free_size_from_free_lists;
1288 uword small_elt_free_count, small_elt_free_size;
1290 #define CHECK(x) if (! (x)) { mheap_validate_breakpoint (); os_panic (); }
1292 if (vec_len (v) == 0)
1295 mheap_maybe_lock (v);
1297 /* Validate number of elements and size. */
1298 free_size_from_free_lists = free_count_from_free_lists = 0;
1299 for (i = 0; i < ARRAY_LEN (h->first_free_elt_uoffset_by_bin); i++)
1301 mheap_elt_t * e, * n;
1304 CHECK ((h->first_free_elt_uoffset_by_bin[i] != ~0)
1305 == ((h->non_empty_free_elt_heads[i / BITS (uword)] & ((uword) 1 << (uword) (i % BITS (uword)))) != 0));
1307 if (h->first_free_elt_uoffset_by_bin[i] == ~0)
1310 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[i]);
1316 n = mheap_next_elt (e);
1318 /* Object must be marked free. */
1321 /* Next object's previous free bit must also be set. */
1322 CHECK (n->prev_is_free);
1325 CHECK (e->free_elt.prev_uoffset == ~0);
1328 s = mheap_elt_data_bytes (e);
1329 CHECK (user_data_size_to_bin_index (s) == i);
1331 free_count_from_free_lists += 1;
1332 free_size_from_free_lists += s;
1334 if (e->free_elt.next_uoffset == ~0)
1337 n = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
1339 /* Check free element linkages. */
1340 CHECK (n->free_elt.prev_uoffset == mheap_elt_uoffset (v, e));
1346 /* Go through small object cache. */
1347 small_elt_free_count = small_elt_free_size = 0;
1348 for (i = 0; i < ARRAY_LEN (h->small_object_cache.bins.as_u8); i++)
1350 if (h->small_object_cache.bins.as_u8[i] != 0)
1353 uword b = h->small_object_cache.bins.as_u8[i] - 1;
1354 uword o = h->small_object_cache.offsets[i];
1357 e = mheap_elt_at_uoffset (v, o);
1359 /* Object must be allocated. */
1360 CHECK (! e->is_free);
1362 s = mheap_elt_data_bytes (e);
1363 CHECK (user_data_size_to_bin_index (s) == b);
1365 small_elt_free_count += 1;
1366 small_elt_free_size += s;
1371 mheap_elt_t * e, * n;
1372 uword elt_free_size, elt_free_count;
1374 elt_count = elt_size = elt_free_size = elt_free_count = 0;
1376 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1379 if (e->prev_n_user_data != MHEAP_N_USER_DATA_INVALID)
1380 CHECK (e->prev_n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
1382 CHECK (e->n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
1384 n = mheap_next_elt (e);
1386 CHECK (e->is_free == n->prev_is_free);
1389 s = mheap_elt_data_bytes (e);
1398 /* Consecutive free objects should have been combined. */
1399 CHECK (! (e->prev_is_free && n->prev_is_free));
1402 CHECK (free_count_from_free_lists == elt_free_count);
1403 CHECK (free_size_from_free_lists == elt_free_size);
1404 CHECK (elt_count == h->n_elts + elt_free_count + small_elt_free_count);
1405 CHECK (elt_size + (elt_count + 1) * MHEAP_ELT_OVERHEAD_BYTES == vec_len (v));
1409 mheap_elt_t * e, * n;
1412 e->n_user_data == MHEAP_N_USER_DATA_INVALID;
1415 n = mheap_next_elt (e);
1416 CHECK (e->n_user_data == n->prev_n_user_data);
1422 mheap_maybe_unlock (v);
1424 h->validate_serial += 1;
1427 static void mheap_get_trace (void * v, uword offset, uword size)
1430 mheap_trace_main_t * tm;
1432 uword i, n_callers, trace_index, * p;
1433 mheap_trace_t trace;
1435 /* Spurious Coverity warnings be gone. */
1436 memset(&trace, 0, sizeof(trace));
1438 n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers),
1439 /* Skip mheap_get_aligned's frame */ 1);
1443 for (i = n_callers; i < ARRAY_LEN (trace.callers); i++)
1444 trace.callers[i] = 0;
1446 h = mheap_header (v);
1447 tm = &h->trace_main;
1449 if (! tm->trace_by_callers)
1450 tm->trace_by_callers = hash_create_mem (0, sizeof (trace.callers), sizeof (uword));
1452 p = hash_get_mem (tm->trace_by_callers, &trace.callers);
1456 t = tm->traces + trace_index;
1460 i = vec_len (tm->trace_free_list);
1463 trace_index = tm->trace_free_list[i - 1];
1464 _vec_len (tm->trace_free_list) = i - 1;
1468 mheap_trace_t * old_start = tm->traces;
1469 mheap_trace_t * old_end = vec_end (tm->traces);
1471 vec_add2 (tm->traces, t, 1);
1473 if (tm->traces != old_start) {
1476 hash_foreach_pair (p, tm->trace_by_callers, ({
1477 q = uword_to_pointer (p->key, mheap_trace_t *);
1478 ASSERT (q >= old_start && q < old_end);
1479 p->key = pointer_to_uword (tm->traces + (q - old_start));
1482 trace_index = t - tm->traces;
1485 t = tm->traces + trace_index;
1487 t->n_allocations = 0;
1489 hash_set_mem (tm->trace_by_callers, t->callers, trace_index);
1492 t->n_allocations += 1;
1494 t->offset = offset; /* keep a sample to autopsy */
1495 hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
1498 static void mheap_put_trace (void * v, uword offset, uword size)
1501 mheap_trace_main_t * tm;
1503 uword trace_index, * p;
1505 h = mheap_header (v);
1506 tm = &h->trace_main;
1507 p = hash_get (tm->trace_index_by_offset, offset);
1512 hash_unset (tm->trace_index_by_offset, offset);
1513 ASSERT (trace_index < vec_len (tm->traces));
1515 t = tm->traces + trace_index;
1516 ASSERT (t->n_allocations > 0);
1517 ASSERT (t->n_bytes >= size);
1518 t->n_allocations -= 1;
1520 if (t->n_allocations == 0)
1522 hash_unset_mem (tm->trace_by_callers, t->callers);
1523 vec_add1 (tm->trace_free_list, trace_index);
1524 memset (t, 0, sizeof (t[0]));
1528 static int mheap_trace_sort (const void * _t1, const void * _t2)
1530 const mheap_trace_t * t1 = _t1;
1531 const mheap_trace_t * t2 = _t2;
1534 cmp = (word) t2->n_bytes - (word) t1->n_bytes;
1536 cmp = (word) t2->n_allocations - (word) t1->n_allocations;
1541 mheap_trace_main_free (mheap_trace_main_t * tm)
1543 vec_free (tm->traces);
1544 vec_free (tm->trace_free_list);
1545 hash_free (tm->trace_by_callers);
1546 hash_free (tm->trace_index_by_offset);
1549 void mheap_trace (void * v, int enable)
1553 h = mheap_header (v);
1557 h->flags |= MHEAP_FLAG_TRACE;
1561 mheap_trace_main_free (&h->trace_main);
1562 h->flags &= ~MHEAP_FLAG_TRACE;