2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 #include <vppinfra/bitops.h>
39 #include <vppinfra/hash.h>
40 #include <vppinfra/format.h>
41 #include <vppinfra/mheap.h>
42 #include <vppinfra/os.h>
43 #include <vppinfra/time.h>
46 #include <vppinfra/elf_clib.h>
49 static void mheap_get_trace (void * v, uword offset, uword size);
50 static void mheap_put_trace (void * v, uword offset, uword size);
51 static int mheap_trace_sort (const void * t1, const void * t2);
53 always_inline void mheap_maybe_lock (void * v)
55 mheap_t * h = mheap_header (v);
56 if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
58 u32 my_cpu = os_get_cpu_number();
59 if (h->owner_cpu == my_cpu)
65 while (__sync_lock_test_and_set (&h->lock, 1))
68 h->owner_cpu = my_cpu;
69 h->recursion_count = 1;
73 always_inline void mheap_maybe_unlock (void * v)
75 mheap_t * h = mheap_header (v);
76 if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
78 ASSERT(os_get_cpu_number() == h->owner_cpu);
79 if (--h->recursion_count == 0)
82 CLIB_MEMORY_BARRIER();
88 /* Find bin for objects with size at least n_user_data_bytes. */
90 user_data_size_to_bin_index (uword n_user_data_bytes)
92 uword n_user_data_words;
93 word small_bin, large_bin;
95 /* User size must be at least big enough to hold free elt. */
96 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
99 n_user_data_words = (round_pow2 (n_user_data_bytes, MHEAP_USER_DATA_WORD_BYTES)
100 / MHEAP_USER_DATA_WORD_BYTES);
102 ASSERT (n_user_data_words > 0);
103 small_bin = n_user_data_words - (MHEAP_MIN_USER_DATA_BYTES / MHEAP_USER_DATA_WORD_BYTES);
104 ASSERT (small_bin >= 0);
106 large_bin = MHEAP_N_SMALL_OBJECT_BINS + max_log2 (n_user_data_bytes) - MHEAP_LOG2_N_SMALL_OBJECT_BINS;
108 return small_bin < MHEAP_N_SMALL_OBJECT_BINS ? small_bin : large_bin;
112 mheap_elt_size_to_user_n_bytes (uword n_bytes)
114 ASSERT (n_bytes >= sizeof (mheap_elt_t));
115 return (n_bytes - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
118 always_inline uword __attribute__((unused))
119 mheap_elt_size_to_user_n_words (uword n_bytes)
121 ASSERT (n_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
122 return mheap_elt_size_to_user_n_bytes (n_bytes) / MHEAP_USER_DATA_WORD_BYTES;
126 mheap_elt_set_size (void * v,
128 uword n_user_data_bytes,
131 mheap_elt_t * e, * n;
133 e = mheap_elt_at_uoffset (v, uoffset);
135 ASSERT (n_user_data_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
137 e->n_user_data = n_user_data_bytes / MHEAP_USER_DATA_WORD_BYTES;
138 e->is_free = is_free;
139 ASSERT (e->prev_n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
141 n = mheap_next_elt (e);
142 n->prev_n_user_data = e->n_user_data;
143 n->prev_is_free = is_free;
146 always_inline void set_first_free_elt_offset (mheap_t * h, uword bin, uword uoffset)
150 h->first_free_elt_uoffset_by_bin[bin] = uoffset;
152 i0 = bin / BITS (h->non_empty_free_elt_heads[0]);
153 i1 = (uword) 1 << (uword) (bin % BITS (h->non_empty_free_elt_heads[0]));
155 ASSERT (i0 < ARRAY_LEN (h->non_empty_free_elt_heads));
156 if (h->first_free_elt_uoffset_by_bin[bin] == MHEAP_GROUNDED)
157 h->non_empty_free_elt_heads[i0] &= ~i1;
159 h->non_empty_free_elt_heads[i0] |= i1;
163 set_free_elt (void * v, uword uoffset, uword n_user_data_bytes)
165 mheap_t * h = mheap_header (v);
166 mheap_elt_t * e = mheap_elt_at_uoffset (v, uoffset);
167 mheap_elt_t * n = mheap_next_elt (e);
168 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
170 ASSERT (n->prev_is_free);
173 e->free_elt.prev_uoffset = MHEAP_GROUNDED;
174 e->free_elt.next_uoffset = h->first_free_elt_uoffset_by_bin[bin];
176 /* Fill in next free elt's previous pointer. */
177 if (e->free_elt.next_uoffset != MHEAP_GROUNDED)
179 mheap_elt_t * nf = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
180 ASSERT (nf->is_free);
181 nf->free_elt.prev_uoffset = uoffset;
184 set_first_free_elt_offset (h, bin, uoffset);
188 new_free_elt (void * v, uword uoffset, uword n_user_data_bytes)
190 mheap_elt_set_size (v, uoffset, n_user_data_bytes, /* is_free */ 1);
191 set_free_elt (v, uoffset, n_user_data_bytes);
195 remove_free_elt (void * v, mheap_elt_t * e, uword bin)
197 mheap_t * h = mheap_header (v);
198 mheap_elt_t * p, * n;
205 no = e->free_elt.next_uoffset;
207 n = no != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, no) : 0;
208 po = e->free_elt.prev_uoffset;
209 p = po != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, po) : 0;
212 set_first_free_elt_offset (h, bin, no);
214 p->free_elt.next_uoffset = no;
217 n->free_elt.prev_uoffset = po;
221 remove_free_elt2 (void * v, mheap_elt_t * e)
224 bin = user_data_size_to_bin_index (mheap_elt_data_bytes (e));
225 remove_free_elt (v, e, bin);
228 #define MHEAP_VM_MAP (1 << 0)
229 #define MHEAP_VM_UNMAP (1 << 1)
230 #define MHEAP_VM_NOMAP (0 << 1)
231 #define MHEAP_VM_ROUND (1 << 2)
232 #define MHEAP_VM_ROUND_UP MHEAP_VM_ROUND
233 #define MHEAP_VM_ROUND_DOWN (0 << 2)
235 static uword mheap_page_size;
237 static_always_inline uword mheap_page_round (uword addr)
238 { return (addr + mheap_page_size - 1) &~ (mheap_page_size - 1); }
240 static_always_inline uword mheap_page_truncate (uword addr)
241 { return addr &~ (mheap_page_size - 1); }
243 static_always_inline uword
246 clib_address_t start_addr,
249 mheap_t * h = mheap_header (v);
250 clib_address_t start_page, end_page, end_addr;
253 ASSERT (! (h->flags & MHEAP_FLAG_DISABLE_VM));
255 end_addr = start_addr + size;
257 /* Round start/end address up to page boundary. */
258 start_page = mheap_page_round (start_addr);
260 if ((flags & MHEAP_VM_ROUND) == MHEAP_VM_ROUND_UP)
261 end_page = mheap_page_round (end_addr);
263 end_page = mheap_page_truncate (end_addr);
266 if (end_page > start_page)
268 mapped_bytes = end_page - start_page;
269 if (flags & MHEAP_VM_MAP)
270 clib_mem_vm_map ((void *) start_page, end_page - start_page);
271 else if (flags & MHEAP_VM_UNMAP)
272 clib_mem_vm_unmap ((void *) start_page, end_page - start_page);
278 static_always_inline uword
279 mheap_vm_elt (void * v, uword flags, uword offset)
282 clib_address_t start_addr, end_addr;
284 e = mheap_elt_at_uoffset (v, offset);
285 start_addr = (clib_address_t) ((void *) e->user_data);
286 end_addr = (clib_address_t) mheap_next_elt (e);
287 return mheap_vm (v, flags, start_addr, end_addr - start_addr);
291 mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
295 /* $$$$ ELIOT FIXME: add Altivec version of this routine */
296 #if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__)
299 u8x16 b = u8x16_splat (bin);
303 #define _(i) ((uword) u8x16_compare_byte_mask (u8x16_is_equal (b, c->bins.as_u8x16[i])) << (uword) ((i)*16))
304 mask = _ (0) | _ (1);
305 if (BITS (uword) > 32)
306 mask |= _ (2) | _ (3);
314 mheap_get_small_object (mheap_t * h, uword bin)
316 mheap_small_object_cache_t * c = &h->small_object_cache;
317 uword mask = mheap_small_object_cache_mask (c, bin + 1);
318 uword offset = MHEAP_GROUNDED;
322 uword i = min_log2 (mask);
323 uword o = c->offsets[i];
324 ASSERT (o != MHEAP_GROUNDED);
325 c->bins.as_u8[i] = 0;
333 mheap_put_small_object (mheap_t * h, uword bin, uword offset)
335 mheap_small_object_cache_t * c = &h->small_object_cache;
336 uword free_mask = mheap_small_object_cache_mask (c, 0);
342 i = min_log2 (free_mask);
343 c->bins.as_u8[i] = b;
344 c->offsets[i] = offset;
348 /* Nothing free with right size: cyclic replacement. */
352 i = c->replacement_index++;
354 c->bins.as_u8[i] = b;
355 old_offset = c->offsets[i];
356 c->offsets[i] = offset;
358 /* Return old offset so it can be freed. */
364 mheap_get_search_free_bin (void * v,
366 uword * n_user_data_bytes_arg,
370 mheap_t * h = mheap_header (v);
373 /* Free object is at offset f0 ... f1;
374 Allocatted object is at offset o0 ... o1. */
375 word o0, o1, f0, f1, search_n_user_data_bytes;
376 word lo_free_usize, hi_free_usize;
378 ASSERT (h->first_free_elt_uoffset_by_bin[bin] != MHEAP_GROUNDED);
379 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[bin]);
381 search_n_user_data_bytes = *n_user_data_bytes_arg;
383 /* Silence compiler warning. */
384 o0 = o1 = f0 = f1 = 0;
386 h->stats.free_list.n_search_attempts += 1;
388 /* Find an object that is large enough with correct alignment at given alignment offset. */
391 uword this_object_n_user_data_bytes = mheap_elt_data_bytes (e);
394 if (bin < MHEAP_N_SMALL_OBJECT_BINS)
395 ASSERT (this_object_n_user_data_bytes >= search_n_user_data_bytes);
397 h->stats.free_list.n_objects_searched += 1;
399 if (this_object_n_user_data_bytes < search_n_user_data_bytes)
402 /* Bounds of free object: from f0 to f1. */
403 f0 = ((void *) e->user_data - v);
404 f1 = f0 + this_object_n_user_data_bytes;
406 /* Place candidate object at end of free block and align as requested. */
407 o0 = ((f1 - search_n_user_data_bytes) &~ (align - 1)) - align_offset;
411 /* Make sure that first free fragment is either empty or
412 large enough to be valid. */
415 lo_free_usize = o0 != f0 ? o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES : 0;
416 if (o0 <= f0 || lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES)
421 o1 = o0 + search_n_user_data_bytes;
424 if (o0 >= f0 && o1 <= f1)
428 /* Reached end of free list without finding large enough object. */
429 if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
430 return MHEAP_GROUNDED;
432 /* Otherwise keep searching for large enough object. */
433 e = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
437 /* Free fragment at end. */
438 hi_free_usize = f1 != o1 ? f1 - o1 - MHEAP_ELT_OVERHEAD_BYTES : 0;
440 /* If fragment at end is too small to be a new object,
441 give user's object a bit more space than requested. */
442 if (hi_free_usize < (word) MHEAP_MIN_USER_DATA_BYTES)
444 search_n_user_data_bytes += f1 - o1;
449 /* Need to make sure that relevant memory areas are mapped. */
450 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
452 mheap_elt_t * f0_elt = mheap_elt_at_uoffset (v, f0);
453 mheap_elt_t * f1_elt = mheap_elt_at_uoffset (v, f1);
454 mheap_elt_t * o0_elt = mheap_elt_at_uoffset (v, o0);
455 mheap_elt_t * o1_elt = mheap_elt_at_uoffset (v, o1);
457 uword f0_page_start, f0_page_end;
458 uword o0_page_start, o0_page_end;
460 /* Free elt is mapped. Addresses after that may not be mapped. */
461 f0_page_start = mheap_page_round (pointer_to_uword (f0_elt->user_data));
462 f0_page_end = mheap_page_truncate (pointer_to_uword (f1_elt));
464 o0_page_start = mheap_page_truncate (pointer_to_uword (o0_elt));
465 o0_page_end = mheap_page_round (pointer_to_uword (o1_elt->user_data));
467 if (o0_page_start < f0_page_start)
468 o0_page_start = f0_page_start;
469 if (o0_page_end > f0_page_end)
470 o0_page_end = f0_page_end;
472 if (o0_page_end > o0_page_start)
473 clib_mem_vm_map (uword_to_pointer (o0_page_start, void *),
474 o0_page_end - o0_page_start);
477 /* Remove free object from free list. */
478 remove_free_elt (v, e, bin);
480 /* Free fragment at begining. */
481 if (lo_free_usize > 0)
483 ASSERT (lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES);
484 mheap_elt_set_size (v, f0, lo_free_usize, /* is_free */ 1);
485 new_free_elt (v, f0, lo_free_usize);
488 mheap_elt_set_size (v, o0, search_n_user_data_bytes, /* is_free */ 0);
490 if (hi_free_usize > 0)
492 uword uo = o1 + MHEAP_ELT_OVERHEAD_BYTES;
493 mheap_elt_set_size (v, uo, hi_free_usize, /* is_free */ 1);
494 new_free_elt (v, uo, hi_free_usize);
497 /* Return actual size of block. */
498 *n_user_data_bytes_arg = search_n_user_data_bytes;
500 h->stats.free_list.n_objects_found += 1;
505 /* Search free lists for object with given size and alignment. */
507 mheap_get_search_free_list (void * v,
508 uword * n_user_bytes_arg,
512 mheap_t * h = mheap_header (v);
513 uword bin, n_user_bytes, i, bi;
515 n_user_bytes = *n_user_bytes_arg;
516 bin = user_data_size_to_bin_index (n_user_bytes);
518 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
519 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE)
521 && align == STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
522 && align_offset == 0)
524 uword r = mheap_get_small_object (h, bin);
525 h->stats.n_small_object_cache_attempts += 1;
526 if (r != MHEAP_GROUNDED)
528 h->stats.n_small_object_cache_hits += 1;
533 for (i = bin / BITS (uword); i < ARRAY_LEN (h->non_empty_free_elt_heads); i++)
535 uword non_empty_bin_mask = h->non_empty_free_elt_heads[i];
537 /* No need to search smaller bins. */
538 if (i == bin / BITS (uword))
539 non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
541 /* Search each occupied free bin which is large enough. */
542 foreach_set_bit (bi, non_empty_bin_mask, ({
543 uword r = mheap_get_search_free_bin (v, bi + i * BITS (uword), n_user_bytes_arg, align, align_offset);
544 if (r != MHEAP_GROUNDED)
549 return MHEAP_GROUNDED;
552 static never_inline void *
553 mheap_get_extend_vector (void * v,
554 uword n_user_data_bytes,
557 uword * offset_return)
559 /* Bounds of free and allocated objects (as above). */
560 uword f0, f1, o0, o1;
562 mheap_t * h = mheap_header (v);
565 if (_vec_len (v) == 0)
567 _vec_len (v) = MHEAP_ELT_OVERHEAD_BYTES;
569 /* Create first element of heap. */
570 e = mheap_elt_at_uoffset (v, _vec_len (v));
571 e->prev_n_user_data = MHEAP_N_USER_DATA_INVALID;
576 o0 = round_pow2 (f0, align) - align_offset;
579 free_size = o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES;
580 if (o0 == f0 || free_size >= (word) sizeof (mheap_elt_t))
586 o1 = o0 + n_user_data_bytes;
587 f1 = o1 + MHEAP_ELT_OVERHEAD_BYTES;
590 h = mheap_header (v);
592 /* Make sure we have space for object plus overhead. */
593 if (f1 > h->max_size)
595 *offset_return = MHEAP_GROUNDED;
601 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
603 mheap_elt_t * f0_elt = mheap_elt_at_uoffset (v, f0);
604 mheap_elt_t * f1_elt = mheap_elt_at_uoffset (v, f1);
606 uword f0_page = mheap_page_round (pointer_to_uword (f0_elt->user_data));
607 uword f1_page = mheap_page_round (pointer_to_uword (f1_elt->user_data));
609 if (f1_page > f0_page)
610 mheap_vm (v, MHEAP_VM_MAP, f0_page, f1_page - f0_page);
614 new_free_elt (v, f0, free_size);
616 mheap_elt_set_size (v, o0, n_user_data_bytes, /* is_free */ 0);
618 /* Mark last element. */
619 e = mheap_elt_at_uoffset (v, f1);
620 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
627 void * mheap_get_aligned (void * v,
628 uword n_user_data_bytes,
631 uword * offset_return)
637 cpu_times[0] = clib_cpu_time_now ();
639 align = clib_max (align, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
640 align = max_pow2 (align);
642 /* Correct align offset to be smaller than alignment. */
643 align_offset &= (align - 1);
645 /* Align offset must be multiple of minimum object size. */
646 if (align_offset % STRUCT_SIZE_OF (mheap_elt_t, user_data[0]) != 0)
648 *offset_return = MHEAP_GROUNDED;
652 /* Round requested size. */
653 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
654 n_user_data_bytes = round_pow2 (n_user_data_bytes, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
657 v = mheap_alloc (0, 64 << 20);
659 mheap_maybe_lock (v);
661 h = mheap_header (v);
663 if (h->flags & MHEAP_FLAG_VALIDATE)
666 /* First search free lists for object. */
667 offset = mheap_get_search_free_list (v, &n_user_data_bytes, align, align_offset);
669 h = mheap_header (v);
671 /* If that fails allocate object at end of heap by extending vector. */
672 if (offset == MHEAP_GROUNDED && _vec_len (v) < h->max_size)
674 v = mheap_get_extend_vector (v, n_user_data_bytes, align, align_offset, &offset);
675 h = mheap_header (v);
676 h->stats.n_vector_expands += offset != MHEAP_GROUNDED;
679 *offset_return = offset;
680 if (offset != MHEAP_GROUNDED)
684 if (h->flags & MHEAP_FLAG_TRACE)
686 /* Recursion block for case when we are traceing main clib heap. */
687 h->flags &= ~MHEAP_FLAG_TRACE;
689 mheap_get_trace (v, offset, n_user_data_bytes);
691 h->flags |= MHEAP_FLAG_TRACE;
695 if (h->flags & MHEAP_FLAG_VALIDATE)
698 mheap_maybe_unlock (v);
700 cpu_times[1] = clib_cpu_time_now ();
701 h->stats.n_clocks_get += cpu_times[1] - cpu_times[0];
702 h->stats.n_gets += 1;
707 static void free_last_elt (void * v, mheap_elt_t * e)
709 mheap_t * h = mheap_header (v);
711 /* Possibly delete preceeding free element also. */
714 e = mheap_prev_elt (e);
715 remove_free_elt2 (v, e);
718 if (e->prev_n_user_data == MHEAP_N_USER_DATA_INVALID)
720 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
721 mheap_vm_elt (v, MHEAP_VM_UNMAP, mheap_elt_uoffset (v, e));
726 uword uo = mheap_elt_uoffset (v, e);
727 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
728 mheap_vm_elt (v, MHEAP_VM_UNMAP, uo);
729 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
734 void mheap_put (void * v, uword uoffset)
737 uword n_user_data_bytes, bin;
738 mheap_elt_t * e, * n;
739 uword trace_uoffset, trace_n_user_data_bytes;
742 cpu_times[0] = clib_cpu_time_now ();
744 h = mheap_header (v);
746 mheap_maybe_lock (v);
748 if (h->flags & MHEAP_FLAG_VALIDATE)
751 ASSERT (h->n_elts > 0);
753 h->stats.n_puts += 1;
755 e = mheap_elt_at_uoffset (v, uoffset);
756 n = mheap_next_elt (e);
757 n_user_data_bytes = mheap_elt_data_bytes (e);
759 trace_uoffset = uoffset;
760 trace_n_user_data_bytes = n_user_data_bytes;
762 bin = user_data_size_to_bin_index (n_user_data_bytes);
763 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
765 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE))
767 uoffset = mheap_put_small_object (h, bin, uoffset);
771 e = mheap_elt_at_uoffset (v, uoffset);
772 n = mheap_next_elt (e);
773 n_user_data_bytes = mheap_elt_data_bytes (e);
776 /* Assert that forward and back pointers are equal. */
777 if (e->n_user_data != n->prev_n_user_data)
780 /* Forward and backwards is_free must agree. */
781 if (e->is_free != n->prev_is_free)
784 /* Object was already freed. */
788 /* Special case: delete last element in heap. */
789 if (n->n_user_data == MHEAP_N_USER_DATA_INVALID)
790 free_last_elt (v, e);
794 uword f0, f1, n_combine;
797 f1 = f0 + n_user_data_bytes;
802 mheap_elt_t * p = mheap_prev_elt (e);
803 f0 = mheap_elt_uoffset (v, p);
804 remove_free_elt2 (v, p);
810 mheap_elt_t * m = mheap_next_elt (n);
812 remove_free_elt2 (v, n);
817 mheap_elt_set_size (v, f0, f1 - f0, /* is_free */ 1);
819 e->is_free = n->prev_is_free = 1;
820 set_free_elt (v, f0, f1 - f0);
822 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
823 mheap_vm_elt (v, MHEAP_VM_UNMAP, f0);
827 h = mheap_header (v);
829 if (h->flags & MHEAP_FLAG_TRACE)
831 /* Recursion block for case when we are traceing main clib heap. */
832 h->flags &= ~MHEAP_FLAG_TRACE;
834 mheap_put_trace (v, trace_uoffset, trace_n_user_data_bytes);
836 h->flags |= MHEAP_FLAG_TRACE;
839 if (h->flags & MHEAP_FLAG_VALIDATE)
842 mheap_maybe_unlock (v);
844 cpu_times[1] = clib_cpu_time_now ();
845 h->stats.n_clocks_put += cpu_times[1] - cpu_times[0];
848 void * mheap_alloc_with_flags (void * memory, uword memory_size, uword flags)
854 if (! mheap_page_size)
855 mheap_page_size = clib_mem_get_page_size ();
859 /* No memory given, try to VM allocate some. */
860 memory = clib_mem_vm_alloc (memory_size);
864 /* No memory region implies we have virtual memory. */
865 flags &= ~MHEAP_FLAG_DISABLE_VM;
868 /* Make sure that given memory is page aligned. */
872 am = pointer_to_uword (memory);
873 av = mheap_page_round (am);
874 v = uword_to_pointer (av, void *);
875 h = mheap_header (v);
876 ah = pointer_to_uword (h);
878 ah += mheap_page_size;
880 h = uword_to_pointer (ah, void *);
881 v = mheap_vector (h);
883 if (PREDICT_FALSE(memory + memory_size < v)) {
885 * This will happen when the requested memory_size is too
886 * small to cope with the heap header and/or memory alignment.
888 clib_mem_vm_free(memory, memory_size);
892 size = memory + memory_size - v;
895 /* VM map header so we can use memory. */
896 if (! (flags & MHEAP_FLAG_DISABLE_VM))
897 clib_mem_vm_map (h, sizeof (h[0]));
899 /* Zero vector header: both heap header and vector length. */
900 memset (h, 0, sizeof (h[0]));
903 h->vm_alloc_offset_from_header = (void *) h - memory;
904 h->vm_alloc_size = memory_size;
909 /* Set flags based on those given less builtin-flags. */
910 h->flags |= (flags &~ MHEAP_FLAG_TRACE);
912 /* Unmap remainder of heap until we will be ready to use it. */
913 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
914 mheap_vm (v, MHEAP_VM_UNMAP | MHEAP_VM_ROUND_UP,
915 (clib_address_t) v, h->max_size);
917 /* Initialize free list heads to empty. */
918 memset (h->first_free_elt_uoffset_by_bin, 0xFF,
919 sizeof (h->first_free_elt_uoffset_by_bin));
924 void * mheap_alloc (void * memory, uword size)
929 flags |= MHEAP_FLAG_DISABLE_VM;
931 #ifdef CLIB_HAVE_VEC128
932 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
935 return mheap_alloc_with_flags (memory, size, flags);
938 void * _mheap_free (void * v)
940 mheap_t * h = mheap_header (v);
943 clib_mem_vm_free ((void *) h - h->vm_alloc_offset_from_header, h->vm_alloc_size);
948 /* Call user's function with each object in heap. */
949 void mheap_foreach (void * v,
950 uword (* func) (void * arg, void * v, void * elt_data, uword elt_size),
954 u8 * stack_heap, * clib_mem_mheap_save;
955 u8 tmp_heap_memory[16*1024];
957 mheap_maybe_lock (v);
959 if (vec_len (v) == 0)
962 clib_mem_mheap_save = 0;
965 /* Allocate a new temporary heap on the stack.
966 This is so that our hash table & user's callback function can
967 themselves allocate memory somewhere without getting in the way
968 of the heap we are looking at. */
969 if (v == clib_mem_get_heap ())
971 stack_heap = mheap_alloc (tmp_heap_memory, sizeof (tmp_heap_memory));
972 clib_mem_mheap_save = v;
973 clib_mem_set_heap (stack_heap);
977 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
978 e = mheap_next_elt (e))
980 void * p = mheap_elt_data (v, e);
983 if ((* func) (arg, v, p, mheap_elt_data_bytes (e)))
987 /* Restore main CLIB heap. */
988 if (clib_mem_mheap_save)
989 clib_mem_set_heap (clib_mem_mheap_save);
992 mheap_maybe_unlock (v);
995 /* Bytes in mheap header overhead not including data bytes. */
997 mheap_bytes_overhead (void * v)
999 mheap_t * h = mheap_header (v);
1000 return v ? sizeof (h[0]) + h->n_elts * sizeof (mheap_elt_t) : 0;
1003 /* Total number of bytes including both data and overhead. */
1004 uword mheap_bytes (void * v)
1005 { return mheap_bytes_overhead (v) + vec_bytes (v); }
1007 static void mheap_usage_no_lock (void * v, clib_mem_usage_t * usage)
1009 mheap_t * h = mheap_header (v);
1010 uword used = 0, free = 0, free_vm_unmapped = 0;
1012 if (vec_len (v) > 0)
1017 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1018 e = mheap_next_elt (e))
1020 uword size = mheap_elt_data_bytes (e);
1024 if (! (h->flags & MHEAP_FLAG_DISABLE_VM))
1026 mheap_vm_elt (v, MHEAP_VM_NOMAP, mheap_elt_uoffset (v, e));
1033 usage->object_count = mheap_elts (v);
1034 usage->bytes_total = mheap_bytes (v);
1035 usage->bytes_overhead = mheap_bytes_overhead (v);
1036 usage->bytes_max = mheap_max_size (v);
1037 usage->bytes_used = used;
1038 usage->bytes_free = free;
1039 usage->bytes_free_reclaimed = free_vm_unmapped;
1042 void mheap_usage (void * v, clib_mem_usage_t * usage)
1044 mheap_maybe_lock (v);
1045 mheap_usage_no_lock (v, usage);
1046 mheap_maybe_unlock (v);
1049 static u8 * format_mheap_byte_count (u8 * s, va_list * va)
1051 uword n_bytes = va_arg (*va, uword);
1053 return format (s, "%wd", n_bytes);
1055 return format (s, "%wdk", n_bytes / 1024);
1058 /* Returns first corrupt heap element. */
1059 static mheap_elt_t * mheap_first_corrupt (void * v)
1061 mheap_elt_t * e, * n;
1063 if (vec_len (v) == 0)
1069 if (e->n_user_data == MHEAP_N_USER_DATA_INVALID)
1072 n = mheap_next_elt (e);
1074 if (e->n_user_data != n->prev_n_user_data)
1077 if (e->is_free != n->prev_is_free)
1086 static u8 * format_mheap_stats (u8 * s, va_list * va)
1088 mheap_t * h = va_arg (*va, mheap_t *);
1089 mheap_stats_t * st = &h->stats;
1090 uword indent = format_get_indent (s);
1092 s = format (s, "alloc. from small object cache: %Ld hits %Ld attempts (%.2f%%) replacements %d",
1093 st->n_small_object_cache_hits,
1094 st->n_small_object_cache_attempts,
1095 (st->n_small_object_cache_attempts != 0
1096 ? 100. * (f64) st->n_small_object_cache_hits / (f64) st->n_small_object_cache_attempts
1098 h->small_object_cache.replacement_index);
1100 s = format (s, "\n%Ualloc. from free-list: %Ld attempts, %Ld hits (%.2f%%), %Ld considered (per-attempt %.2f)",
1101 format_white_space, indent,
1102 st->free_list.n_search_attempts,
1103 st->free_list.n_objects_found,
1104 (st->free_list.n_search_attempts != 0
1105 ? 100. * (f64) st->free_list.n_objects_found / (f64) st->free_list.n_search_attempts
1107 st->free_list.n_objects_searched,
1108 (st->free_list.n_search_attempts != 0
1109 ? (f64) st->free_list.n_objects_searched / (f64) st->free_list.n_search_attempts
1112 s = format (s, "\n%Ualloc. from vector-expand: %Ld",
1113 format_white_space, indent,
1114 st->n_vector_expands);
1116 s = format (s, "\n%Uallocs: %Ld %.2f clocks/call",
1117 format_white_space, indent,
1119 (f64) st->n_clocks_get / (f64) st->n_gets);
1121 s = format (s, "\n%Ufrees: %Ld %.2f clocks/call",
1122 format_white_space, indent,
1124 (f64) st->n_clocks_put / (f64) st->n_puts);
1129 u8 * format_mheap (u8 * s, va_list * va)
1131 void * v = va_arg (*va, u8 *);
1132 int verbose = va_arg (*va, int);
1135 uword i, size, indent;
1136 clib_mem_usage_t usage;
1137 mheap_elt_t * first_corrupt;
1139 mheap_maybe_lock (v);
1141 h = mheap_header (v);
1143 mheap_usage_no_lock (v, &usage);
1145 indent = format_get_indent (s);
1147 s = format (s, "%d objects, %U of %U used, %U free, %U reclaimed, %U overhead",
1149 format_mheap_byte_count, usage.bytes_used,
1150 format_mheap_byte_count, usage.bytes_total,
1151 format_mheap_byte_count, usage.bytes_free,
1152 format_mheap_byte_count, usage.bytes_free_reclaimed,
1153 format_mheap_byte_count, usage.bytes_overhead);
1155 if (usage.bytes_max != ~0)
1156 s = format (s, ", %U capacity", format_mheap_byte_count, usage.bytes_max);
1158 /* Show histogram of sizes. */
1161 uword hist[MHEAP_N_BINS];
1165 memset (hist, 0, sizeof (hist));
1169 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1170 e = mheap_next_elt (e))
1172 uword n_user_data_bytes = mheap_elt_data_bytes (e);
1173 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
1181 s = format (s, "\n%U%=12s%=12s%=16s",
1182 format_white_space, indent + 2,
1183 "Size", "Count", "Fraction");
1185 for (i = 0; i < ARRAY_LEN (hist); i++)
1189 s = format (s, "\n%U%12d%12wd%16.4f",
1190 format_white_space, indent + 2,
1191 MHEAP_MIN_USER_DATA_BYTES + i * MHEAP_USER_DATA_WORD_BYTES,
1193 (f64) hist[i] / (f64) n_hist);
1198 s = format (s, "\n%U%U",
1199 format_white_space, indent + 2,
1200 format_mheap_stats, h);
1202 if ((h->flags & MHEAP_FLAG_TRACE) && vec_len (h->trace_main.traces) > 0)
1204 /* Make a copy of traces since we'll be sorting them. */
1205 mheap_trace_t * t, * traces_copy;
1206 uword indent, total_objects_traced;
1208 traces_copy = vec_dup (h->trace_main.traces);
1209 qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
1212 total_objects_traced = 0;
1213 s = format (s, "\n");
1214 vec_foreach (t, traces_copy) {
1215 /* Skip over free elements. */
1216 if (t->n_allocations == 0)
1219 total_objects_traced += t->n_allocations;
1221 /* When not verbose only report allocations of more than 1k. */
1222 if (! verbose && t->n_bytes < 1024)
1225 if (t == traces_copy)
1226 s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
1228 s = format (s, "%9d%9d %p", t->n_bytes, t->n_allocations,
1230 indent = format_get_indent (s);
1231 for (i = 0; i < ARRAY_LEN (t->callers) && t->callers[i]; i++)
1234 s = format (s, "%U", format_white_space, indent);
1236 s = format (s, " %U\n", format_clib_elf_symbol_with_address, t->callers[i]);
1238 s = format (s, " %p\n", t->callers[i]);
1243 s = format (s, "%d total traced objects\n", total_objects_traced);
1245 vec_free (traces_copy);
1248 first_corrupt = mheap_first_corrupt (v);
1251 size = mheap_elt_data_bytes (first_corrupt);
1252 s = format (s, "\n first corrupt object: %p, size %wd\n %U",
1255 format_hex_bytes, first_corrupt, size);
1258 /* FIXME. This output could be wrong in the unlikely case that format
1259 uses the same mheap as we are currently inspecting. */
1265 s = format (s, "\n");
1267 e = mheap_elt_at_uoffset (v, 0);
1272 s = format (s, "%8d: ", i);
1274 o = mheap_elt_uoffset (v, e);
1277 s = format (s, "(%8d) ", o);
1279 s = format (s, " %8d ", o);
1281 if ((i % 8) == 7 || (i + 1) >= h->n_elts)
1282 s = format (s, "\n");
1286 mheap_maybe_unlock (v);
1292 { fformat (stderr, "%U", format_mheap, v, 1); }
1294 static void mheap_validate_breakpoint ()
1297 void mheap_validate (void * v)
1299 mheap_t * h = mheap_header (v);
1302 uword elt_count, elt_size;
1303 uword free_count_from_free_lists, free_size_from_free_lists;
1304 uword small_elt_free_count, small_elt_free_size;
1306 #define CHECK(x) if (! (x)) { mheap_validate_breakpoint (); os_panic (); }
1308 if (vec_len (v) == 0)
1311 mheap_maybe_lock (v);
1313 /* Validate number of elements and size. */
1314 free_size_from_free_lists = free_count_from_free_lists = 0;
1315 for (i = 0; i < ARRAY_LEN (h->first_free_elt_uoffset_by_bin); i++)
1317 mheap_elt_t * e, * n;
1320 CHECK ((h->first_free_elt_uoffset_by_bin[i] != MHEAP_GROUNDED)
1321 == ((h->non_empty_free_elt_heads[i / BITS (uword)] & ((uword) 1 << (uword) (i % BITS (uword)))) != 0));
1323 if (h->first_free_elt_uoffset_by_bin[i] == MHEAP_GROUNDED)
1326 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[i]);
1332 n = mheap_next_elt (e);
1334 /* Object must be marked free. */
1337 /* Next object's previous free bit must also be set. */
1338 CHECK (n->prev_is_free);
1341 CHECK (e->free_elt.prev_uoffset == MHEAP_GROUNDED);
1344 s = mheap_elt_data_bytes (e);
1345 CHECK (user_data_size_to_bin_index (s) == i);
1347 free_count_from_free_lists += 1;
1348 free_size_from_free_lists += s;
1350 if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
1353 n = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
1355 /* Check free element linkages. */
1356 CHECK (n->free_elt.prev_uoffset == mheap_elt_uoffset (v, e));
1362 /* Go through small object cache. */
1363 small_elt_free_count = small_elt_free_size = 0;
1364 for (i = 0; i < ARRAY_LEN (h->small_object_cache.bins.as_u8); i++)
1366 if (h->small_object_cache.bins.as_u8[i] != 0)
1369 uword b = h->small_object_cache.bins.as_u8[i] - 1;
1370 uword o = h->small_object_cache.offsets[i];
1373 e = mheap_elt_at_uoffset (v, o);
1375 /* Object must be allocated. */
1376 CHECK (! e->is_free);
1378 s = mheap_elt_data_bytes (e);
1379 CHECK (user_data_size_to_bin_index (s) == b);
1381 small_elt_free_count += 1;
1382 small_elt_free_size += s;
1387 mheap_elt_t * e, * n;
1388 uword elt_free_size, elt_free_count;
1390 elt_count = elt_size = elt_free_size = elt_free_count = 0;
1392 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1395 if (e->prev_n_user_data != MHEAP_N_USER_DATA_INVALID)
1396 CHECK (e->prev_n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
1398 CHECK (e->n_user_data * sizeof (e->user_data[0]) >= MHEAP_MIN_USER_DATA_BYTES);
1400 n = mheap_next_elt (e);
1402 CHECK (e->is_free == n->prev_is_free);
1405 s = mheap_elt_data_bytes (e);
1414 /* Consecutive free objects should have been combined. */
1415 CHECK (! (e->prev_is_free && n->prev_is_free));
1418 CHECK (free_count_from_free_lists == elt_free_count);
1419 CHECK (free_size_from_free_lists == elt_free_size);
1420 CHECK (elt_count == h->n_elts + elt_free_count + small_elt_free_count);
1421 CHECK (elt_size + (elt_count + 1) * MHEAP_ELT_OVERHEAD_BYTES == vec_len (v));
1425 mheap_elt_t * e, * n;
1428 e->n_user_data == MHEAP_N_USER_DATA_INVALID;
1431 n = mheap_next_elt (e);
1432 CHECK (e->n_user_data == n->prev_n_user_data);
1438 mheap_maybe_unlock (v);
1440 h->validate_serial += 1;
1443 static void mheap_get_trace (void * v, uword offset, uword size)
1446 mheap_trace_main_t * tm;
1448 uword i, n_callers, trace_index, * p;
1449 mheap_trace_t trace;
1451 /* Spurious Coverity warnings be gone. */
1452 memset(&trace, 0, sizeof(trace));
1454 n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers),
1455 /* Skip mheap_get_aligned's frame */ 1);
1459 for (i = n_callers; i < ARRAY_LEN (trace.callers); i++)
1460 trace.callers[i] = 0;
1462 h = mheap_header (v);
1463 tm = &h->trace_main;
1465 if (! tm->trace_by_callers)
1466 tm->trace_by_callers = hash_create_mem (0, sizeof (trace.callers), sizeof (uword));
1468 p = hash_get_mem (tm->trace_by_callers, &trace.callers);
1472 t = tm->traces + trace_index;
1476 i = vec_len (tm->trace_free_list);
1479 trace_index = tm->trace_free_list[i - 1];
1480 _vec_len (tm->trace_free_list) = i - 1;
1484 mheap_trace_t * old_start = tm->traces;
1485 mheap_trace_t * old_end = vec_end (tm->traces);
1487 vec_add2 (tm->traces, t, 1);
1489 if (tm->traces != old_start) {
1492 hash_foreach_pair (p, tm->trace_by_callers, ({
1493 q = uword_to_pointer (p->key, mheap_trace_t *);
1494 ASSERT (q >= old_start && q < old_end);
1495 p->key = pointer_to_uword (tm->traces + (q - old_start));
1498 trace_index = t - tm->traces;
1501 t = tm->traces + trace_index;
1503 t->n_allocations = 0;
1505 hash_set_mem (tm->trace_by_callers, t->callers, trace_index);
1508 t->n_allocations += 1;
1510 t->offset = offset; /* keep a sample to autopsy */
1511 hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
1514 static void mheap_put_trace (void * v, uword offset, uword size)
1517 mheap_trace_main_t * tm;
1519 uword trace_index, * p;
1521 h = mheap_header (v);
1522 tm = &h->trace_main;
1523 p = hash_get (tm->trace_index_by_offset, offset);
1528 hash_unset (tm->trace_index_by_offset, offset);
1529 ASSERT (trace_index < vec_len (tm->traces));
1531 t = tm->traces + trace_index;
1532 ASSERT (t->n_allocations > 0);
1533 ASSERT (t->n_bytes >= size);
1534 t->n_allocations -= 1;
1536 if (t->n_allocations == 0)
1538 hash_unset_mem (tm->trace_by_callers, t->callers);
1539 vec_add1 (tm->trace_free_list, trace_index);
1540 memset (t, 0, sizeof (t[0]));
1544 static int mheap_trace_sort (const void * _t1, const void * _t2)
1546 const mheap_trace_t * t1 = _t1;
1547 const mheap_trace_t * t2 = _t2;
1550 cmp = (word) t2->n_bytes - (word) t1->n_bytes;
1552 cmp = (word) t2->n_allocations - (word) t1->n_allocations;
1557 mheap_trace_main_free (mheap_trace_main_t * tm)
1559 vec_free (tm->traces);
1560 vec_free (tm->trace_free_list);
1561 hash_free (tm->trace_by_callers);
1562 hash_free (tm->trace_index_by_offset);
1565 void mheap_trace (void * v, int enable)
1569 h = mheap_header (v);
1573 h->flags |= MHEAP_FLAG_TRACE;
1577 mheap_trace_main_free (&h->trace_main);
1578 h->flags &= ~MHEAP_FLAG_TRACE;