2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 #include <vppinfra/bitops.h>
39 #include <vppinfra/hash.h>
40 #include <vppinfra/format.h>
41 #include <vppinfra/mheap.h>
42 #include <vppinfra/os.h>
43 #include <vppinfra/time.h>
46 #include <vppinfra/elf_clib.h>
49 static void mheap_get_trace (void * v, uword offset, uword size);
50 static void mheap_put_trace (void * v, uword offset, uword size);
51 static int mheap_trace_sort (const void * t1, const void * t2);
53 always_inline void mheap_maybe_lock (void * v)
55 mheap_t * h = mheap_header (v);
56 if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
58 u32 my_cpu = os_get_cpu_number();
59 if (h->owner_cpu == my_cpu)
65 while (__sync_lock_test_and_set (&h->lock, 1))
68 h->owner_cpu = my_cpu;
69 h->recursion_count = 1;
73 always_inline void mheap_maybe_unlock (void * v)
75 mheap_t * h = mheap_header (v);
76 if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
78 ASSERT(os_get_cpu_number() == h->owner_cpu);
79 if (--h->recursion_count == 0)
82 CLIB_MEMORY_BARRIER();
88 /* Find bin for objects with size at least n_user_data_bytes. */
90 user_data_size_to_bin_index (uword n_user_data_bytes)
92 uword n_user_data_words;
93 word small_bin, large_bin;
95 /* User size must be at least big enough to hold free elt. */
96 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
99 n_user_data_words = (round_pow2 (n_user_data_bytes, MHEAP_USER_DATA_WORD_BYTES)
100 / MHEAP_USER_DATA_WORD_BYTES);
102 ASSERT (n_user_data_words > 0);
103 small_bin = n_user_data_words - (MHEAP_MIN_USER_DATA_BYTES / MHEAP_USER_DATA_WORD_BYTES);
104 ASSERT (small_bin >= 0);
106 large_bin = MHEAP_N_SMALL_OBJECT_BINS + max_log2 (n_user_data_bytes) - MHEAP_LOG2_N_SMALL_OBJECT_BINS;
108 return small_bin < MHEAP_N_SMALL_OBJECT_BINS ? small_bin : large_bin;
112 mheap_elt_size_to_user_n_bytes (uword n_bytes)
114 ASSERT (n_bytes >= sizeof (mheap_elt_t));
115 return (n_bytes - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
118 always_inline uword __attribute__((unused))
119 mheap_elt_size_to_user_n_words (uword n_bytes)
121 ASSERT (n_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
122 return mheap_elt_size_to_user_n_bytes (n_bytes) / MHEAP_USER_DATA_WORD_BYTES;
126 mheap_elt_set_size (void * v,
128 uword n_user_data_bytes,
131 mheap_elt_t * e, * n;
133 e = mheap_elt_at_uoffset (v, uoffset);
135 ASSERT (n_user_data_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
136 e->n_user_data = n_user_data_bytes / MHEAP_USER_DATA_WORD_BYTES;
137 e->is_free = is_free;
138 ASSERT (e->prev_n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
140 n = mheap_next_elt (e);
141 n->prev_n_user_data = e->n_user_data;
142 n->prev_is_free = is_free;
145 always_inline void set_first_free_elt_offset (mheap_t * h, uword bin, uword uoffset)
149 h->first_free_elt_uoffset_by_bin[bin] = uoffset;
151 i0 = bin / BITS (h->non_empty_free_elt_heads[0]);
152 i1 = (uword) 1 << (uword) (bin % BITS (h->non_empty_free_elt_heads[0]));
154 ASSERT (i0 < ARRAY_LEN (h->non_empty_free_elt_heads));
155 if (h->first_free_elt_uoffset_by_bin[bin] == ~0)
156 h->non_empty_free_elt_heads[i0] &= ~i1;
158 h->non_empty_free_elt_heads[i0] |= i1;
162 set_free_elt (void * v, uword uoffset, uword n_user_data_bytes)
164 mheap_t * h = mheap_header (v);
165 mheap_elt_t * e = mheap_elt_at_uoffset (v, uoffset);
166 mheap_elt_t * n = mheap_next_elt (e);
167 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
169 ASSERT (n->prev_is_free);
172 e->free_elt.prev_uoffset = ~0;
173 e->free_elt.next_uoffset = h->first_free_elt_uoffset_by_bin[bin];
175 /* Fill in next free elt's previous pointer. */
176 if (e->free_elt.next_uoffset != ~0)
178 mheap_elt_t * nf = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
179 ASSERT (nf->is_free);
180 nf->free_elt.prev_uoffset = uoffset;
183 set_first_free_elt_offset (h, bin, uoffset);
187 new_free_elt (void * v, uword uoffset, uword n_user_data_bytes)
189 mheap_elt_set_size (v, uoffset, n_user_data_bytes, /* is_free */ 1);
190 set_free_elt (v, uoffset, n_user_data_bytes);
194 remove_free_elt (void * v, mheap_elt_t * e, uword bin)
196 mheap_t * h = mheap_header (v);
197 mheap_elt_t * p, * n;
200 no = e->free_elt.next_uoffset;
201 n = no != ~0 ? mheap_elt_at_uoffset (v, no) : 0;
202 po = e->free_elt.prev_uoffset;
203 p = po != ~0 ? mheap_elt_at_uoffset (v, po) : 0;
206 set_first_free_elt_offset (h, bin, no);
208 p->free_elt.next_uoffset = no;
211 n->free_elt.prev_uoffset = po;
215 remove_free_elt2 (void * v, mheap_elt_t * e)
218 bin = user_data_size_to_bin_index (mheap_elt_data_bytes (e));
219 remove_free_elt (v, e, bin);
222 #define MHEAP_VM_MAP (1 << 0)
223 #define MHEAP_VM_UNMAP (1 << 1)
224 #define MHEAP_VM_NOMAP (0 << 1)
225 #define MHEAP_VM_ROUND (1 << 2)
226 #define MHEAP_VM_ROUND_UP MHEAP_VM_ROUND
227 #define MHEAP_VM_ROUND_DOWN (0 << 2)
229 static uword mheap_page_size;
231 static_always_inline uword mheap_page_round (uword addr)
232 { return (addr + mheap_page_size - 1) &~ (mheap_page_size - 1); }
234 static_always_inline uword mheap_page_truncate (uword addr)
235 { return addr &~ (mheap_page_size - 1); }
237 static_always_inline uword
240 clib_address_t start_addr,
243 mheap_t * h = mheap_header (v);
244 clib_address_t start_page, end_page, end_addr;
247 ASSERT (! (h->flags & MHEAP_FLAG_DISABLE_VM));
249 end_addr = start_addr + size;
251 /* Round start/end address up to page boundary. */
252 start_page = mheap_page_round (start_addr);
254 if ((flags & MHEAP_VM_ROUND) == MHEAP_VM_ROUND_UP)
255 end_page = mheap_page_round (end_addr);
257 end_page = mheap_page_truncate (end_addr);
260 if (end_page > start_page)
262 mapped_bytes = end_page - start_page;
263 if (flags & MHEAP_VM_MAP)
264 clib_mem_vm_map ((void *) start_page, end_page - start_page);
265 else if (flags & MHEAP_VM_UNMAP)
266 clib_mem_vm_unmap ((void *) start_page, end_page - start_page);
272 static_always_inline uword
273 mheap_vm_elt (void * v, uword flags, uword offset)
276 clib_address_t start_addr, end_addr;
278 e = mheap_elt_at_uoffset (v, offset);
279 start_addr = (clib_address_t) ((void *) e->user_data);
280 end_addr = (clib_address_t) mheap_next_elt (e);
281 return mheap_vm (v, flags, start_addr, end_addr - start_addr);
285 mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
289 /* $$$$ ELIOT FIXME: add Altivec version of this routine */
290 #if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__)
293 u8x16 b = u8x16_splat (bin);
297 #define _(i) ((uword) u8x16_compare_byte_mask (u8x16_is_equal (b, c->bins.as_u8x16[i])) << (uword) ((i)*16))
298 mask = _ (0) | _ (1);
299 if (BITS (uword) > 32)
300 mask |= _ (2) | _ (3);
308 mheap_get_small_object (mheap_t * h, uword bin)
310 mheap_small_object_cache_t * c = &h->small_object_cache;
311 uword mask = mheap_small_object_cache_mask (c, bin + 1);
316 uword i = min_log2 (mask);
317 uword o = c->offsets[i];
319 c->bins.as_u8[i] = 0;
327 mheap_put_small_object (mheap_t * h, uword bin, uword offset)
329 mheap_small_object_cache_t * c = &h->small_object_cache;
330 uword free_mask = mheap_small_object_cache_mask (c, 0);
336 i = min_log2 (free_mask);
337 c->bins.as_u8[i] = b;
338 c->offsets[i] = offset;
342 /* Nothing free with right size: cyclic replacement. */
346 i = c->replacement_index++;
348 c->bins.as_u8[i] = b;
349 old_offset = c->offsets[i];
350 c->offsets[i] = offset;
352 /* Return old offset so it can be freed. */
358 mheap_get_search_free_bin (void * v,
360 uword * n_user_data_bytes_arg,
364 mheap_t * h = mheap_header (v);
367 /* Free object is at offset f0 ... f1;
368 Allocatted object is at offset o0 ... o1. */
369 word o0, o1, f0, f1, search_n_user_data_bytes;
370 word lo_free_usize, hi_free_usize;
372 ASSERT (h->first_free_elt_uoffset_by_bin[bin] != ~0);
373 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[bin]);
375 search_n_user_data_bytes = *n_user_data_bytes_arg;
377 /* Silence compiler warning. */
378 o0 = o1 = f0 = f1 = 0;
380 h->stats.free_list.n_search_attempts += 1;
382 /* Find an object that is large enough with correct alignment at given alignment offset. */
385 uword this_object_n_user_data_bytes = mheap_elt_data_bytes (e);
388 if (bin < MHEAP_N_SMALL_OBJECT_BINS)
389 ASSERT (this_object_n_user_data_bytes >= search_n_user_data_bytes);
391 h->stats.free_list.n_objects_searched += 1;
393 if (this_object_n_user_data_bytes < search_n_user_data_bytes)
396 /* Bounds of free object: from f0 to f1. */
397 f0 = ((void *) e->user_data - v);
398 f1 = f0 + this_object_n_user_data_bytes;
400 /* Place candidate object at end of free block and align as requested. */
401 o0 = ((f1 - search_n_user_data_bytes) &~ (align - 1)) - align_offset;
405 /* Make sure that first free fragment is either empty or
406 large enough to be valid. */
409 lo_free_usize = o0 != f0 ? o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES : 0;
410 if (o0 <= f0 || lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES)
415 o1 = o0 + search_n_user_data_bytes;
418 if (o0 >= f0 && o1 <= f1)
422 /* Reached end of free list without finding large enough object. */
423 if (e->free_elt.next_uoffset == ~0)
426 /* Otherwise keep searching for large enough object. */
427 e = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
431 /* Free fragment at end. */
432 hi_free_usize = f1 != o1 ? f1 - o1 - MHEAP_ELT_OVERHEAD_BYTES : 0;
434 /* If fragment at end is too small to be a new object,
435 give user's object a bit more space than requested. */
436 if (hi_free_usize < (word) MHEAP_MIN_USER_DATA_BYTES)
438 search_n_user_data_bytes += f1 - o1;
443 /* Need to make sure that relevant memory areas are mapped. */
444 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
446 mheap_elt_t * f0_elt = mheap_elt_at_uoffset (v, f0);
447 mheap_elt_t * f1_elt = mheap_elt_at_uoffset (v, f1);
448 mheap_elt_t * o0_elt = mheap_elt_at_uoffset (v, o0);
449 mheap_elt_t * o1_elt = mheap_elt_at_uoffset (v, o1);
451 uword f0_page_start, f0_page_end;
452 uword o0_page_start, o0_page_end;
454 /* Free elt is mapped. Addresses after that may not be mapped. */
455 f0_page_start = mheap_page_round (pointer_to_uword (f0_elt->user_data));
456 f0_page_end = mheap_page_truncate (pointer_to_uword (f1_elt));
458 o0_page_start = mheap_page_truncate (pointer_to_uword (o0_elt));
459 o0_page_end = mheap_page_round (pointer_to_uword (o1_elt->user_data));
461 if (o0_page_start < f0_page_start)
462 o0_page_start = f0_page_start;
463 if (o0_page_end > f0_page_end)
464 o0_page_end = f0_page_end;
466 if (o0_page_end > o0_page_start)
467 clib_mem_vm_map (uword_to_pointer (o0_page_start, void *),
468 o0_page_end - o0_page_start);
471 /* Remove free object from free list. */
472 remove_free_elt (v, e, bin);
474 /* Free fragment at begining. */
475 if (lo_free_usize > 0)
477 ASSERT (lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES);
478 mheap_elt_set_size (v, f0, lo_free_usize, /* is_free */ 1);
479 new_free_elt (v, f0, lo_free_usize);
482 mheap_elt_set_size (v, o0, search_n_user_data_bytes, /* is_free */ 0);
484 if (hi_free_usize > 0)
486 uword uo = o1 + MHEAP_ELT_OVERHEAD_BYTES;
487 mheap_elt_set_size (v, uo, hi_free_usize, /* is_free */ 1);
488 new_free_elt (v, uo, hi_free_usize);
491 /* Return actual size of block. */
492 *n_user_data_bytes_arg = search_n_user_data_bytes;
494 h->stats.free_list.n_objects_found += 1;
499 /* Search free lists for object with given size and alignment. */
501 mheap_get_search_free_list (void * v,
502 uword * n_user_bytes_arg,
506 mheap_t * h = mheap_header (v);
507 uword bin, n_user_bytes, i, bi;
509 n_user_bytes = *n_user_bytes_arg;
510 bin = user_data_size_to_bin_index (n_user_bytes);
512 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
513 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE)
515 && align == STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
516 && align_offset == 0)
518 uword r = mheap_get_small_object (h, bin);
519 h->stats.n_small_object_cache_attempts += 1;
522 h->stats.n_small_object_cache_hits += 1;
527 for (i = bin / BITS (uword); i < ARRAY_LEN (h->non_empty_free_elt_heads); i++)
529 uword non_empty_bin_mask = h->non_empty_free_elt_heads[i];
531 /* No need to search smaller bins. */
532 if (i == bin / BITS (uword))
533 non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
535 /* Search each occupied free bin which is large enough. */
536 foreach_set_bit (bi, non_empty_bin_mask, ({
537 uword r = mheap_get_search_free_bin (v, bi + i * BITS (uword), n_user_bytes_arg, align, align_offset);
546 static never_inline void *
547 mheap_get_extend_vector (void * v,
548 uword n_user_data_bytes,
551 uword * offset_return)
553 /* Bounds of free and allocated objects (as above). */
554 uword f0, f1, o0, o1;
556 mheap_t * h = mheap_header (v);
559 if (_vec_len (v) == 0)
561 _vec_len (v) = MHEAP_ELT_OVERHEAD_BYTES;
563 /* Create first element of heap. */
564 e = mheap_elt_at_uoffset (v, _vec_len (v));
565 e->prev_n_user_data = MHEAP_N_USER_DATA_INVALID;
570 o0 = round_pow2 (f0, align) - align_offset;
573 free_size = o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES;
574 if (o0 == f0 || free_size >= (word) sizeof (mheap_elt_t))
580 o1 = o0 + n_user_data_bytes;
581 f1 = o1 + MHEAP_ELT_OVERHEAD_BYTES;
584 h = mheap_header (v);
586 /* Make sure we have space for object plus overhead. */
587 if (f1 > h->max_size)
595 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
597 mheap_elt_t * f0_elt = mheap_elt_at_uoffset (v, f0);
598 mheap_elt_t * f1_elt = mheap_elt_at_uoffset (v, f1);
600 uword f0_page = mheap_page_round (pointer_to_uword (f0_elt->user_data));
601 uword f1_page = mheap_page_round (pointer_to_uword (f1_elt->user_data));
603 if (f1_page > f0_page)
604 mheap_vm (v, MHEAP_VM_MAP, f0_page, f1_page - f0_page);
608 new_free_elt (v, f0, free_size);
610 mheap_elt_set_size (v, o0, n_user_data_bytes, /* is_free */ 0);
612 /* Mark last element. */
613 e = mheap_elt_at_uoffset (v, f1);
614 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
621 void * mheap_get_aligned (void * v,
622 uword n_user_data_bytes,
625 uword * offset_return)
631 cpu_times[0] = clib_cpu_time_now ();
633 align = clib_max (align, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
634 align = max_pow2 (align);
636 /* Correct align offset to be smaller than alignment. */
637 align_offset &= (align - 1);
639 /* Align offset must be multiple of minimum object size. */
640 if (align_offset % STRUCT_SIZE_OF (mheap_elt_t, user_data[0]) != 0)
646 /* Round requested size. */
647 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
648 n_user_data_bytes = round_pow2 (n_user_data_bytes, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
651 v = mheap_alloc (0, 64 << 20);
653 mheap_maybe_lock (v);
655 h = mheap_header (v);
657 if (h->flags & MHEAP_FLAG_VALIDATE)
660 /* First search free lists for object. */
661 offset = mheap_get_search_free_list (v, &n_user_data_bytes, align, align_offset);
663 h = mheap_header (v);
665 /* If that fails allocate object at end of heap by extending vector. */
666 if (offset == ~0 && _vec_len (v) < h->max_size)
668 v = mheap_get_extend_vector (v, n_user_data_bytes, align, align_offset, &offset);
669 h = mheap_header (v);
670 h->stats.n_vector_expands += offset != ~0;
673 *offset_return = offset;
678 if (h->flags & MHEAP_FLAG_TRACE)
680 /* Recursion block for case when we are traceing main clib heap. */
681 h->flags &= ~MHEAP_FLAG_TRACE;
683 mheap_get_trace (v, offset, n_user_data_bytes);
685 h->flags |= MHEAP_FLAG_TRACE;
689 if (h->flags & MHEAP_FLAG_VALIDATE)
692 mheap_maybe_unlock (v);
694 cpu_times[1] = clib_cpu_time_now ();
695 h->stats.n_clocks_get += cpu_times[1] - cpu_times[0];
696 h->stats.n_gets += 1;
701 static void free_last_elt (void * v, mheap_elt_t * e)
703 mheap_t * h = mheap_header (v);
705 /* Possibly delete preceeding free element also. */
708 e = mheap_prev_elt (e);
709 remove_free_elt2 (v, e);
712 if (e->prev_n_user_data == MHEAP_N_USER_DATA_INVALID)
714 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
715 mheap_vm_elt (v, MHEAP_VM_UNMAP, mheap_elt_uoffset (v, e));
720 uword uo = mheap_elt_uoffset (v, e);
721 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
722 mheap_vm_elt (v, MHEAP_VM_UNMAP, uo);
723 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
728 void mheap_put (void * v, uword uoffset)
731 uword n_user_data_bytes, bin;
732 mheap_elt_t * e, * n;
733 uword trace_uoffset, trace_n_user_data_bytes;
736 cpu_times[0] = clib_cpu_time_now ();
738 h = mheap_header (v);
740 mheap_maybe_lock (v);
742 if (h->flags & MHEAP_FLAG_VALIDATE)
745 ASSERT (h->n_elts > 0);
747 h->stats.n_puts += 1;
749 e = mheap_elt_at_uoffset (v, uoffset);
750 n = mheap_next_elt (e);
751 n_user_data_bytes = mheap_elt_data_bytes (e);
753 trace_uoffset = uoffset;
754 trace_n_user_data_bytes = n_user_data_bytes;
756 bin = user_data_size_to_bin_index (n_user_data_bytes);
757 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
759 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE))
761 uoffset = mheap_put_small_object (h, bin, uoffset);
765 e = mheap_elt_at_uoffset (v, uoffset);
766 n = mheap_next_elt (e);
767 n_user_data_bytes = mheap_elt_data_bytes (e);
770 /* Assert that forward and back pointers are equal. */
771 if (e->n_user_data != n->prev_n_user_data)
774 /* Forward and backwards is_free must agree. */
775 if (e->is_free != n->prev_is_free)
778 /* Object was already freed. */
782 /* Special case: delete last element in heap. */
783 if (n->n_user_data == MHEAP_N_USER_DATA_INVALID)
784 free_last_elt (v, e);
788 uword f0, f1, n_combine;
791 f1 = f0 + n_user_data_bytes;
796 mheap_elt_t * p = mheap_prev_elt (e);
797 f0 = mheap_elt_uoffset (v, p);
798 remove_free_elt2 (v, p);
804 mheap_elt_t * m = mheap_next_elt (n);
806 remove_free_elt2 (v, n);
811 mheap_elt_set_size (v, f0, f1 - f0, /* is_free */ 1);
813 e->is_free = n->prev_is_free = 1;
814 set_free_elt (v, f0, f1 - f0);
816 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
817 mheap_vm_elt (v, MHEAP_VM_UNMAP, f0);
821 h = mheap_header (v);
823 if (h->flags & MHEAP_FLAG_TRACE)
825 /* Recursion block for case when we are traceing main clib heap. */
826 h->flags &= ~MHEAP_FLAG_TRACE;
828 mheap_put_trace (v, trace_uoffset, trace_n_user_data_bytes);
830 h->flags |= MHEAP_FLAG_TRACE;
833 if (h->flags & MHEAP_FLAG_VALIDATE)
836 mheap_maybe_unlock (v);
838 cpu_times[1] = clib_cpu_time_now ();
839 h->stats.n_clocks_put += cpu_times[1] - cpu_times[0];
842 void * mheap_alloc_with_flags (void * memory, uword memory_size, uword flags)
848 if (! mheap_page_size)
849 mheap_page_size = clib_mem_get_page_size ();
853 /* No memory given, try to VM allocate some. */
854 memory = clib_mem_vm_alloc (memory_size);
858 /* No memory region implies we have virtual memory. */
859 flags &= ~MHEAP_FLAG_DISABLE_VM;
862 /* Make sure that given memory is page aligned. */
866 am = pointer_to_uword (memory);
867 av = mheap_page_round (am);
868 v = uword_to_pointer (av, void *);
869 h = mheap_header (v);
870 ah = pointer_to_uword (h);
872 ah += mheap_page_size;
874 h = uword_to_pointer (ah, void *);
875 v = mheap_vector (h);
877 if (PREDICT_FALSE(memory + memory_size < v)) {
879 * This will happen when the requested memory_size is too
880 * small to cope with the heap header and/or memory alignment.
882 clib_mem_vm_free(memory, memory_size);
886 size = memory + memory_size - v;
889 /* VM map header so we can use memory. */
890 if (! (flags & MHEAP_FLAG_DISABLE_VM))
891 clib_mem_vm_map (h, sizeof (h[0]));
893 /* Zero vector header: both heap header and vector length. */
894 memset (h, 0, sizeof (h[0]));
897 h->vm_alloc_offset_from_header = (void *) h - memory;
898 h->vm_alloc_size = memory_size;
903 /* Set flags based on those given less builtin-flags. */
904 h->flags |= (flags &~ MHEAP_FLAG_TRACE);
906 /* Unmap remainder of heap until we will be ready to use it. */
907 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
908 mheap_vm (v, MHEAP_VM_UNMAP | MHEAP_VM_ROUND_UP,
909 (clib_address_t) v, h->max_size);
911 /* Initialize free list heads to empty. */
912 memset (h->first_free_elt_uoffset_by_bin, ~0, sizeof (h->first_free_elt_uoffset_by_bin));
917 void * mheap_alloc (void * memory, uword size)
922 flags |= MHEAP_FLAG_DISABLE_VM;
924 #ifdef CLIB_HAVE_VEC128
925 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
928 return mheap_alloc_with_flags (memory, size, flags);
931 void * _mheap_free (void * v)
933 mheap_t * h = mheap_header (v);
936 clib_mem_vm_free ((void *) h - h->vm_alloc_offset_from_header, h->vm_alloc_size);
941 /* Call user's function with each object in heap. */
942 void mheap_foreach (void * v,
943 uword (* func) (void * arg, void * v, void * elt_data, uword elt_size),
947 u8 * stack_heap, * clib_mem_mheap_save;
948 u8 tmp_heap_memory[16*1024];
950 mheap_maybe_lock (v);
952 if (vec_len (v) == 0)
955 clib_mem_mheap_save = 0;
958 /* Allocate a new temporary heap on the stack.
959 This is so that our hash table & user's callback function can
960 themselves allocate memory somewhere without getting in the way
961 of the heap we are looking at. */
962 if (v == clib_mem_get_heap ())
964 stack_heap = mheap_alloc (tmp_heap_memory, sizeof (tmp_heap_memory));
965 clib_mem_mheap_save = v;
966 clib_mem_set_heap (stack_heap);
970 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
971 e = mheap_next_elt (e))
973 void * p = mheap_elt_data (v, e);
976 if ((* func) (arg, v, p, mheap_elt_data_bytes (e)))
980 /* Restore main CLIB heap. */
981 if (clib_mem_mheap_save)
982 clib_mem_set_heap (clib_mem_mheap_save);
985 mheap_maybe_unlock (v);
988 /* Bytes in mheap header overhead not including data bytes. */
990 mheap_bytes_overhead (void * v)
992 mheap_t * h = mheap_header (v);
993 return v ? sizeof (h[0]) + h->n_elts * sizeof (mheap_elt_t) : 0;
996 /* Total number of bytes including both data and overhead. */
997 uword mheap_bytes (void * v)
998 { return mheap_bytes_overhead (v) + vec_bytes (v); }
1000 static void mheap_usage_no_lock (void * v, clib_mem_usage_t * usage)
1002 mheap_t * h = mheap_header (v);
1003 uword used = 0, free = 0, free_vm_unmapped = 0;
1005 if (vec_len (v) > 0)
1010 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1011 e = mheap_next_elt (e))
1013 uword size = mheap_elt_data_bytes (e);
1017 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
1019 mheap_vm_elt (v, MHEAP_VM_NOMAP, mheap_elt_uoffset (v, e));
1026 usage->object_count = mheap_elts (v);
1027 usage->bytes_total = mheap_bytes (v);
1028 usage->bytes_overhead = mheap_bytes_overhead (v);
1029 usage->bytes_max = mheap_max_size (v);
1030 usage->bytes_used = used;
1031 usage->bytes_free = free;
1032 usage->bytes_free_reclaimed = free_vm_unmapped;
1035 void mheap_usage (void * v, clib_mem_usage_t * usage)
1037 mheap_maybe_lock (v);
1038 mheap_usage_no_lock (v, usage);
1039 mheap_maybe_unlock (v);
1042 static u8 * format_mheap_byte_count (u8 * s, va_list * va)
1044 uword n_bytes = va_arg (*va, uword);
1046 return format (s, "%wd", n_bytes);
1048 return format (s, "%wdk", n_bytes / 1024);
1051 /* Returns first corrupt heap element. */
1052 static mheap_elt_t * mheap_first_corrupt (void * v)
1054 mheap_elt_t * e, * n;
1056 if (vec_len (v) == 0)
1062 if (e->n_user_data == MHEAP_N_USER_DATA_INVALID)
1065 n = mheap_next_elt (e);
1067 if (e->n_user_data != n->prev_n_user_data)
1070 if (e->is_free != n->prev_is_free)
1079 static u8 * format_mheap_stats (u8 * s, va_list * va)
1081 mheap_t * h = va_arg (*va, mheap_t *);
1082 mheap_stats_t * st = &h->stats;
1083 uword indent = format_get_indent (s);
1085 s = format (s, "alloc. from small object cache: %Ld hits %Ld attempts (%.2f%%) replacements %d",
1086 st->n_small_object_cache_hits,
1087 st->n_small_object_cache_attempts,
1088 (st->n_small_object_cache_attempts != 0
1089 ? 100. * (f64) st->n_small_object_cache_hits / (f64) st->n_small_object_cache_attempts
1091 h->small_object_cache.replacement_index);
1093 s = format (s, "\n%Ualloc. from free-list: %Ld attempts, %Ld hits (%.2f%%), %Ld considered (per-attempt %.2f)",
1094 format_white_space, indent,
1095 st->free_list.n_search_attempts,
1096 st->free_list.n_objects_found,
1097 (st->free_list.n_search_attempts != 0
1098 ? 100. * (f64) st->free_list.n_objects_found / (f64) st->free_list.n_search_attempts
1100 st->free_list.n_objects_searched,
1101 (st->free_list.n_search_attempts != 0
1102 ? (f64) st->free_list.n_objects_searched / (f64) st->free_list.n_search_attempts
1105 s = format (s, "\n%Ualloc. from vector-expand: %Ld",
1106 format_white_space, indent,
1107 st->n_vector_expands);
1109 s = format (s, "\n%Uallocs: %Ld %.2f clocks/call",
1110 format_white_space, indent,
1112 (f64) st->n_clocks_get / (f64) st->n_gets);
1114 s = format (s, "\n%Ufrees: %Ld %.2f clocks/call",
1115 format_white_space, indent,
1117 (f64) st->n_clocks_put / (f64) st->n_puts);
1122 u8 * format_mheap (u8 * s, va_list * va)
1124 void * v = va_arg (*va, u8 *);
1125 int verbose = va_arg (*va, int);
1128 uword i, size, indent;
1129 clib_mem_usage_t usage;
1130 mheap_elt_t * first_corrupt;
1132 mheap_maybe_lock (v);
1134 h = mheap_header (v);
1136 mheap_usage_no_lock (v, &usage);
1138 indent = format_get_indent (s);
1140 s = format (s, "%d objects, %U of %U used, %U free, %U reclaimed, %U overhead",
1142 format_mheap_byte_count, usage.bytes_used,
1143 format_mheap_byte_count, usage.bytes_total,
1144 format_mheap_byte_count, usage.bytes_free,
1145 format_mheap_byte_count, usage.bytes_free_reclaimed,
1146 format_mheap_byte_count, usage.bytes_overhead);
1148 if (usage.bytes_max != ~0)
1149 s = format (s, ", %U capacity", format_mheap_byte_count, usage.bytes_max);
1151 /* Show histogram of sizes. */
1154 uword hist[MHEAP_N_BINS];
1158 memset (hist, 0, sizeof (hist));
1162 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1163 e = mheap_next_elt (e))
1165 uword n_user_data_bytes = mheap_elt_data_bytes (e);
1166 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
1174 s = format (s, "\n%U%=12s%=12s%=16s",
1175 format_white_space, indent + 2,
1176 "Size", "Count", "Fraction");
1178 for (i = 0; i < ARRAY_LEN (hist); i++)
1182 s = format (s, "\n%U%12d%12wd%16.4f",
1183 format_white_space, indent + 2,
1184 MHEAP_MIN_USER_DATA_BYTES + i * MHEAP_USER_DATA_WORD_BYTES,
1186 (f64) hist[i] / (f64) n_hist);
1191 s = format (s, "\n%U%U",
1192 format_white_space, indent + 2,
1193 format_mheap_stats, h);
1195 if ((h->flags & MHEAP_FLAG_TRACE) && vec_len (h->trace_main.traces) > 0)
1197 /* Make a copy of traces since we'll be sorting them. */
1198 mheap_trace_t * t, * traces_copy;
1199 uword indent, total_objects_traced;
1201 traces_copy = vec_dup (h->trace_main.traces);
1202 qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
1205 total_objects_traced = 0;
1206 s = format (s, "\n");
1207 vec_foreach (t, traces_copy) {
1208 /* Skip over free elements. */
1209 if (t->n_allocations == 0)
1212 total_objects_traced += t->n_allocations;
1214 /* When not verbose only report allocations of more than 1k. */
1215 if (! verbose && t->n_bytes < 1024)
1218 if (t == traces_copy)
1219 s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
1221 s = format (s, "%9d%9d %p", t->n_bytes, t->n_allocations,
1223 indent = format_get_indent (s);
1224 for (i = 0; i < ARRAY_LEN (t->callers) && t->callers[i]; i++)
1227 s = format (s, "%U", format_white_space, indent);
1229 s = format (s, " %U\n", format_clib_elf_symbol_with_address, t->callers[i]);
1231 s = format (s, " %p\n", t->callers[i]);
1236 s = format (s, "%d total traced objects\n", total_objects_traced);
1238 vec_free (traces_copy);
1241 first_corrupt = mheap_first_corrupt (v);
1244 size = mheap_elt_data_bytes (first_corrupt);
1245 s = format (s, "\n first corrupt object: %p, size %wd\n %U",
1248 format_hex_bytes, first_corrupt, size);
1251 /* FIXME. This output could be wrong in the unlikely case that format
1252 uses the same mheap as we are currently inspecting. */
1258 s = format (s, "\n");
1260 e = mheap_elt_at_uoffset (v, 0);
1265 s = format (s, "%8d: ", i);
1267 o = mheap_elt_uoffset (v, e);
1270 s = format (s, "(%8d) ", o);
1272 s = format (s, " %8d ", o);
1274 if ((i % 8) == 7 || (i + 1) >= h->n_elts)
1275 s = format (s, "\n");
1279 mheap_maybe_unlock (v);
1285 { fformat (stderr, "%U", format_mheap, v, 1); }
1287 static void mheap_validate_breakpoint ()
1290 void mheap_validate (void * v)
1292 mheap_t * h = mheap_header (v);
1295 uword elt_count, elt_size;
1296 uword free_count_from_free_lists, free_size_from_free_lists;
1297 uword small_elt_free_count, small_elt_free_size;
1299 #define CHECK(x) if (! (x)) { mheap_validate_breakpoint (); os_panic (); }
1301 if (vec_len (v) == 0)
1304 mheap_maybe_lock (v);
1306 /* Validate number of elements and size. */
1307 free_size_from_free_lists = free_count_from_free_lists = 0;
1308 for (i = 0; i < ARRAY_LEN (h->first_free_elt_uoffset_by_bin); i++)
1310 mheap_elt_t * e, * n;
1313 CHECK ((h->first_free_elt_uoffset_by_bin[i] != ~0)
1314 == ((h->non_empty_free_elt_heads[i / BITS (uword)] & ((uword) 1 << (uword) (i % BITS (uword)))) != 0));
1316 if (h->first_free_elt_uoffset_by_bin[i] == ~0)
1319 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[i]);
1325 n = mheap_next_elt (e);
1327 /* Object must be marked free. */
1330 /* Next object's previous free bit must also be set. */
1331 CHECK (n->prev_is_free);
1334 CHECK (e->free_elt.prev_uoffset == ~0);
1337 s = mheap_elt_data_bytes (e);
1338 CHECK (user_data_size_to_bin_index (s) == i);
1340 free_count_from_free_lists += 1;
1341 free_size_from_free_lists += s;
1343 if (e->free_elt.next_uoffset == ~0)
1346 n = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
1348 /* Check free element linkages. */
1349 CHECK (n->free_elt.prev_uoffset == mheap_elt_uoffset (v, e));
1355 /* Go through small object cache. */
1356 small_elt_free_count = small_elt_free_size = 0;
1357 for (i = 0; i < ARRAY_LEN (h->small_object_cache.bins.as_u8); i++)
1359 if (h->small_object_cache.bins.as_u8[i] != 0)
1362 uword b = h->small_object_cache.bins.as_u8[i] - 1;
1363 uword o = h->small_object_cache.offsets[i];
1366 e = mheap_elt_at_uoffset (v, o);
1368 /* Object must be allocated. */
1369 CHECK (! e->is_free);
1371 s = mheap_elt_data_bytes (e);
1372 CHECK (user_data_size_to_bin_index (s) == b);
1374 small_elt_free_count += 1;
1375 small_elt_free_size += s;
1380 mheap_elt_t * e, * n;
1381 uword elt_free_size, elt_free_count;
1383 elt_count = elt_size = elt_free_size = elt_free_count = 0;
1385 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1388 if (e->prev_n_user_data != MHEAP_N_USER_DATA_INVALID)
1389 CHECK (e->prev_n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
1391 CHECK (e->n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
1393 n = mheap_next_elt (e);
1395 CHECK (e->is_free == n->prev_is_free);
1398 s = mheap_elt_data_bytes (e);
1407 /* Consecutive free objects should have been combined. */
1408 CHECK (! (e->prev_is_free && n->prev_is_free));
1411 CHECK (free_count_from_free_lists == elt_free_count);
1412 CHECK (free_size_from_free_lists == elt_free_size);
1413 CHECK (elt_count == h->n_elts + elt_free_count + small_elt_free_count);
1414 CHECK (elt_size + (elt_count + 1) * MHEAP_ELT_OVERHEAD_BYTES == vec_len (v));
1418 mheap_elt_t * e, * n;
1421 e->n_user_data == MHEAP_N_USER_DATA_INVALID;
1424 n = mheap_next_elt (e);
1425 CHECK (e->n_user_data == n->prev_n_user_data);
1431 mheap_maybe_unlock (v);
1433 h->validate_serial += 1;
1436 static void mheap_get_trace (void * v, uword offset, uword size)
1439 mheap_trace_main_t * tm;
1441 uword i, n_callers, trace_index, * p;
1442 mheap_trace_t trace;
1444 /* Spurious Coverity warnings be gone. */
1445 memset(&trace, 0, sizeof(trace));
1447 n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers),
1448 /* Skip mheap_get_aligned's frame */ 1);
1452 for (i = n_callers; i < ARRAY_LEN (trace.callers); i++)
1453 trace.callers[i] = 0;
1455 h = mheap_header (v);
1456 tm = &h->trace_main;
1458 if (! tm->trace_by_callers)
1459 tm->trace_by_callers = hash_create_mem (0, sizeof (trace.callers), sizeof (uword));
1461 p = hash_get_mem (tm->trace_by_callers, &trace.callers);
1465 t = tm->traces + trace_index;
1469 i = vec_len (tm->trace_free_list);
1472 trace_index = tm->trace_free_list[i - 1];
1473 _vec_len (tm->trace_free_list) = i - 1;
1477 mheap_trace_t * old_start = tm->traces;
1478 mheap_trace_t * old_end = vec_end (tm->traces);
1480 vec_add2 (tm->traces, t, 1);
1482 if (tm->traces != old_start) {
1485 hash_foreach_pair (p, tm->trace_by_callers, ({
1486 q = uword_to_pointer (p->key, mheap_trace_t *);
1487 ASSERT (q >= old_start && q < old_end);
1488 p->key = pointer_to_uword (tm->traces + (q - old_start));
1491 trace_index = t - tm->traces;
1494 t = tm->traces + trace_index;
1496 t->n_allocations = 0;
1498 hash_set_mem (tm->trace_by_callers, t->callers, trace_index);
1501 t->n_allocations += 1;
1503 t->offset = offset; /* keep a sample to autopsy */
1504 hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
1507 static void mheap_put_trace (void * v, uword offset, uword size)
1510 mheap_trace_main_t * tm;
1512 uword trace_index, * p;
1514 h = mheap_header (v);
1515 tm = &h->trace_main;
1516 p = hash_get (tm->trace_index_by_offset, offset);
1521 hash_unset (tm->trace_index_by_offset, offset);
1522 ASSERT (trace_index < vec_len (tm->traces));
1524 t = tm->traces + trace_index;
1525 ASSERT (t->n_allocations > 0);
1526 ASSERT (t->n_bytes >= size);
1527 t->n_allocations -= 1;
1529 if (t->n_allocations == 0)
1531 hash_unset_mem (tm->trace_by_callers, t->callers);
1532 vec_add1 (tm->trace_free_list, trace_index);
1533 memset (t, 0, sizeof (t[0]));
1537 static int mheap_trace_sort (const void * _t1, const void * _t2)
1539 const mheap_trace_t * t1 = _t1;
1540 const mheap_trace_t * t2 = _t2;
1543 cmp = (word) t2->n_bytes - (word) t1->n_bytes;
1545 cmp = (word) t2->n_allocations - (word) t1->n_allocations;
1550 mheap_trace_main_free (mheap_trace_main_t * tm)
1552 vec_free (tm->traces);
1553 vec_free (tm->trace_free_list);
1554 hash_free (tm->trace_by_callers);
1555 hash_free (tm->trace_index_by_offset);
1558 void mheap_trace (void * v, int enable)
1562 h = mheap_header (v);
1566 h->flags |= MHEAP_FLAG_TRACE;
1570 mheap_trace_main_free (&h->trace_main);
1571 h->flags &= ~MHEAP_FLAG_TRACE;