2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 #include <vppinfra/bitops.h>
39 #include <vppinfra/hash.h>
40 #include <vppinfra/format.h>
41 #include <vppinfra/mheap.h>
42 #include <vppinfra/os.h>
43 #include <vppinfra/time.h>
44 #include <vppinfra/lock.h>
47 #include <vppinfra/elf_clib.h>
50 static void mheap_get_trace (void *v, uword offset, uword size);
51 static void mheap_put_trace (void *v, uword offset, uword size);
52 static int mheap_trace_sort (const void *t1, const void *t2);
55 mheap_maybe_lock (void *v)
57 mheap_t *h = mheap_header (v);
58 if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
60 u32 my_cpu = os_get_thread_index ();
61 if (h->owner_cpu == my_cpu)
67 while (clib_atomic_test_and_set (&h->lock))
70 h->owner_cpu = my_cpu;
71 h->recursion_count = 1;
76 mheap_maybe_unlock (void *v)
78 mheap_t *h = mheap_header (v);
79 if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
81 ASSERT (os_get_thread_index () == h->owner_cpu);
82 if (--h->recursion_count == 0)
85 CLIB_MEMORY_BARRIER ();
91 /* Find bin for objects with size at least n_user_data_bytes. */
93 user_data_size_to_bin_index (uword n_user_data_bytes)
95 uword n_user_data_words;
96 word small_bin, large_bin;
98 /* User size must be at least big enough to hold free elt. */
99 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
101 /* Round to words. */
103 (round_pow2 (n_user_data_bytes, MHEAP_USER_DATA_WORD_BYTES) /
104 MHEAP_USER_DATA_WORD_BYTES);
106 ASSERT (n_user_data_words > 0);
109 (MHEAP_MIN_USER_DATA_BYTES / MHEAP_USER_DATA_WORD_BYTES);
110 ASSERT (small_bin >= 0);
113 MHEAP_N_SMALL_OBJECT_BINS + max_log2 (n_user_data_bytes) -
114 MHEAP_LOG2_N_SMALL_OBJECT_BINS;
116 return small_bin < MHEAP_N_SMALL_OBJECT_BINS ? small_bin : large_bin;
120 mheap_elt_size_to_user_n_bytes (uword n_bytes)
122 ASSERT (n_bytes >= sizeof (mheap_elt_t));
123 return (n_bytes - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
126 always_inline uword __attribute__ ((unused))
127 mheap_elt_size_to_user_n_words (uword n_bytes)
129 ASSERT (n_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
130 return mheap_elt_size_to_user_n_bytes (n_bytes) /
131 MHEAP_USER_DATA_WORD_BYTES;
135 mheap_elt_set_size (void *v,
136 uword uoffset, uword n_user_data_bytes, uword is_free)
140 e = mheap_elt_at_uoffset (v, uoffset);
142 ASSERT (n_user_data_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
144 e->n_user_data = n_user_data_bytes / MHEAP_USER_DATA_WORD_BYTES;
145 e->is_free = is_free;
146 ASSERT (e->prev_n_user_data * sizeof (e->user_data[0]) >=
147 MHEAP_MIN_USER_DATA_BYTES);
149 n = mheap_next_elt (e);
150 n->prev_n_user_data = e->n_user_data;
151 n->prev_is_free = is_free;
155 set_first_free_elt_offset (mheap_t * h, uword bin, uword uoffset)
159 h->first_free_elt_uoffset_by_bin[bin] = uoffset;
161 i0 = bin / BITS (h->non_empty_free_elt_heads[0]);
162 i1 = (uword) 1 << (uword) (bin % BITS (h->non_empty_free_elt_heads[0]));
164 ASSERT (i0 < ARRAY_LEN (h->non_empty_free_elt_heads));
165 if (h->first_free_elt_uoffset_by_bin[bin] == MHEAP_GROUNDED)
166 h->non_empty_free_elt_heads[i0] &= ~i1;
168 h->non_empty_free_elt_heads[i0] |= i1;
172 set_free_elt (void *v, uword uoffset, uword n_user_data_bytes)
174 mheap_t *h = mheap_header (v);
175 mheap_elt_t *e = mheap_elt_at_uoffset (v, uoffset);
176 mheap_elt_t *n = mheap_next_elt (e);
177 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
179 ASSERT (n->prev_is_free);
182 e->free_elt.prev_uoffset = MHEAP_GROUNDED;
183 e->free_elt.next_uoffset = h->first_free_elt_uoffset_by_bin[bin];
185 /* Fill in next free elt's previous pointer. */
186 if (e->free_elt.next_uoffset != MHEAP_GROUNDED)
188 mheap_elt_t *nf = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
189 ASSERT (nf->is_free);
190 nf->free_elt.prev_uoffset = uoffset;
193 set_first_free_elt_offset (h, bin, uoffset);
197 new_free_elt (void *v, uword uoffset, uword n_user_data_bytes)
199 mheap_elt_set_size (v, uoffset, n_user_data_bytes, /* is_free */ 1);
200 set_free_elt (v, uoffset, n_user_data_bytes);
204 remove_free_elt (void *v, mheap_elt_t * e, uword bin)
206 mheap_t *h = mheap_header (v);
214 no = e->free_elt.next_uoffset;
216 n = no != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, no) : 0;
217 po = e->free_elt.prev_uoffset;
218 p = po != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, po) : 0;
221 set_first_free_elt_offset (h, bin, no);
223 p->free_elt.next_uoffset = no;
226 n->free_elt.prev_uoffset = po;
230 remove_free_elt2 (void *v, mheap_elt_t * e)
233 bin = user_data_size_to_bin_index (mheap_elt_data_bytes (e));
234 remove_free_elt (v, e, bin);
237 #define MHEAP_VM_MAP (1 << 0)
238 #define MHEAP_VM_UNMAP (1 << 1)
239 #define MHEAP_VM_NOMAP (0 << 1)
240 #define MHEAP_VM_ROUND (1 << 2)
241 #define MHEAP_VM_ROUND_UP MHEAP_VM_ROUND
242 #define MHEAP_VM_ROUND_DOWN (0 << 2)
244 static uword mheap_page_size;
246 static_always_inline uword
247 mheap_page_round (uword addr)
249 return (addr + mheap_page_size - 1) & ~(mheap_page_size - 1);
252 static_always_inline uword
253 mheap_page_truncate (uword addr)
255 return addr & ~(mheap_page_size - 1);
258 static_always_inline uword
259 mheap_vm (void *v, uword flags, clib_address_t start_addr, uword size)
261 mheap_t *h = mheap_header (v);
262 clib_address_t start_page, end_page, end_addr;
265 ASSERT (!(h->flags & MHEAP_FLAG_DISABLE_VM));
267 end_addr = start_addr + size;
269 /* Round start/end address up to page boundary. */
270 start_page = mheap_page_round (start_addr);
272 if ((flags & MHEAP_VM_ROUND) == MHEAP_VM_ROUND_UP)
273 end_page = mheap_page_round (end_addr);
275 end_page = mheap_page_truncate (end_addr);
278 if (end_page > start_page)
280 mapped_bytes = end_page - start_page;
281 if (flags & MHEAP_VM_MAP)
282 clib_mem_vm_map ((void *) start_page, end_page - start_page);
283 else if (flags & MHEAP_VM_UNMAP)
284 clib_mem_vm_unmap ((void *) start_page, end_page - start_page);
290 static_always_inline uword
291 mheap_vm_elt (void *v, uword flags, uword offset)
294 clib_address_t start_addr, end_addr;
296 e = mheap_elt_at_uoffset (v, offset);
297 start_addr = (clib_address_t) ((void *) e->user_data);
298 end_addr = (clib_address_t) mheap_next_elt (e);
299 return mheap_vm (v, flags, start_addr, end_addr - start_addr);
303 mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
307 /* $$$$ ELIOT FIXME: add Altivec version of this routine */
308 #if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__) || defined (__i386__)
311 u8x16 b = u8x16_splat (bin);
315 #define _(i) ((uword) u8x16_compare_byte_mask ((b == c->bins.as_u8x16[i])) << (uword) ((i)*16))
317 if (BITS (uword) > 32)
326 mheap_get_small_object (mheap_t * h, uword bin)
328 mheap_small_object_cache_t *c = &h->small_object_cache;
329 uword mask = mheap_small_object_cache_mask (c, bin + 1);
330 uword offset = MHEAP_GROUNDED;
334 uword i = min_log2 (mask);
335 uword o = c->offsets[i];
336 ASSERT (o != MHEAP_GROUNDED);
337 c->bins.as_u8[i] = 0;
345 mheap_put_small_object (mheap_t * h, uword bin, uword offset)
347 mheap_small_object_cache_t *c = &h->small_object_cache;
348 uword free_mask = mheap_small_object_cache_mask (c, 0);
354 i = min_log2 (free_mask);
355 c->bins.as_u8[i] = b;
356 c->offsets[i] = offset;
360 /* Nothing free with right size: cyclic replacement. */
364 i = c->replacement_index++;
366 c->bins.as_u8[i] = b;
367 old_offset = c->offsets[i];
368 c->offsets[i] = offset;
370 /* Return old offset so it can be freed. */
376 mheap_get_search_free_bin (void *v,
378 uword * n_user_data_bytes_arg,
379 uword align, uword align_offset)
381 mheap_t *h = mheap_header (v);
384 /* Free object is at offset f0 ... f1;
385 Allocatted object is at offset o0 ... o1. */
386 word o0, o1, f0, f1, search_n_user_data_bytes;
387 word lo_free_usize, hi_free_usize;
389 ASSERT (h->first_free_elt_uoffset_by_bin[bin] != MHEAP_GROUNDED);
390 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[bin]);
392 search_n_user_data_bytes = *n_user_data_bytes_arg;
394 /* Silence compiler warning. */
395 o0 = o1 = f0 = f1 = 0;
397 h->stats.free_list.n_search_attempts += 1;
399 /* Find an object that is large enough with correct alignment at given alignment offset. */
402 uword this_object_n_user_data_bytes = mheap_elt_data_bytes (e);
405 if (bin < MHEAP_N_SMALL_OBJECT_BINS)
406 ASSERT (this_object_n_user_data_bytes >= search_n_user_data_bytes);
408 h->stats.free_list.n_objects_searched += 1;
410 if (this_object_n_user_data_bytes < search_n_user_data_bytes)
413 /* Bounds of free object: from f0 to f1. */
414 f0 = ((void *) e->user_data - v);
415 f1 = f0 + this_object_n_user_data_bytes;
417 /* Place candidate object at end of free block and align as requested. */
418 o0 = ((f1 - search_n_user_data_bytes) & ~(align - 1)) - align_offset;
422 /* Make sure that first free fragment is either empty or
423 large enough to be valid. */
426 lo_free_usize = o0 != f0 ? o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES : 0;
427 if (o0 <= f0 || lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES)
432 o1 = o0 + search_n_user_data_bytes;
435 if (o0 >= f0 && o1 <= f1)
439 /* Reached end of free list without finding large enough object. */
440 if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
441 return MHEAP_GROUNDED;
443 /* Otherwise keep searching for large enough object. */
444 e = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
448 /* Free fragment at end. */
449 hi_free_usize = f1 != o1 ? f1 - o1 - MHEAP_ELT_OVERHEAD_BYTES : 0;
451 /* If fragment at end is too small to be a new object,
452 give user's object a bit more space than requested. */
453 if (hi_free_usize < (word) MHEAP_MIN_USER_DATA_BYTES)
455 search_n_user_data_bytes += f1 - o1;
460 /* Need to make sure that relevant memory areas are mapped. */
461 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
463 mheap_elt_t *f0_elt = mheap_elt_at_uoffset (v, f0);
464 mheap_elt_t *f1_elt = mheap_elt_at_uoffset (v, f1);
465 mheap_elt_t *o0_elt = mheap_elt_at_uoffset (v, o0);
466 mheap_elt_t *o1_elt = mheap_elt_at_uoffset (v, o1);
468 uword f0_page_start, f0_page_end;
469 uword o0_page_start, o0_page_end;
471 /* Free elt is mapped. Addresses after that may not be mapped. */
472 f0_page_start = mheap_page_round (pointer_to_uword (f0_elt->user_data));
473 f0_page_end = mheap_page_truncate (pointer_to_uword (f1_elt));
475 o0_page_start = mheap_page_truncate (pointer_to_uword (o0_elt));
476 o0_page_end = mheap_page_round (pointer_to_uword (o1_elt->user_data));
478 if (o0_page_start < f0_page_start)
479 o0_page_start = f0_page_start;
480 if (o0_page_end > f0_page_end)
481 o0_page_end = f0_page_end;
483 if (o0_page_end > o0_page_start)
484 clib_mem_vm_map (uword_to_pointer (o0_page_start, void *),
485 o0_page_end - o0_page_start);
488 /* Remove free object from free list. */
489 remove_free_elt (v, e, bin);
491 /* Free fragment at begining. */
492 if (lo_free_usize > 0)
494 ASSERT (lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES);
495 mheap_elt_set_size (v, f0, lo_free_usize, /* is_free */ 1);
496 new_free_elt (v, f0, lo_free_usize);
499 mheap_elt_set_size (v, o0, search_n_user_data_bytes, /* is_free */ 0);
501 if (hi_free_usize > 0)
503 uword uo = o1 + MHEAP_ELT_OVERHEAD_BYTES;
504 mheap_elt_set_size (v, uo, hi_free_usize, /* is_free */ 1);
505 new_free_elt (v, uo, hi_free_usize);
508 /* Return actual size of block. */
509 *n_user_data_bytes_arg = search_n_user_data_bytes;
511 h->stats.free_list.n_objects_found += 1;
516 /* Search free lists for object with given size and alignment. */
518 mheap_get_search_free_list (void *v,
519 uword * n_user_bytes_arg,
520 uword align, uword align_offset)
522 mheap_t *h = mheap_header (v);
523 uword bin, n_user_bytes, i, bi;
525 n_user_bytes = *n_user_bytes_arg;
526 bin = user_data_size_to_bin_index (n_user_bytes);
528 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
529 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE)
531 && align == STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
532 && align_offset == 0)
534 uword r = mheap_get_small_object (h, bin);
535 h->stats.n_small_object_cache_attempts += 1;
536 if (r != MHEAP_GROUNDED)
538 h->stats.n_small_object_cache_hits += 1;
543 for (i = bin / BITS (uword); i < ARRAY_LEN (h->non_empty_free_elt_heads);
546 uword non_empty_bin_mask = h->non_empty_free_elt_heads[i];
548 /* No need to search smaller bins. */
549 if (i == bin / BITS (uword))
550 non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
552 /* Search each occupied free bin which is large enough. */
554 foreach_set_bit (bi, non_empty_bin_mask,
557 mheap_get_search_free_bin (v, bi + i * BITS (uword),
561 if (r != MHEAP_GROUNDED) return r;
566 return MHEAP_GROUNDED;
569 static never_inline void *
570 mheap_get_extend_vector (void *v,
571 uword n_user_data_bytes,
573 uword align_offset, uword * offset_return)
575 /* Bounds of free and allocated objects (as above). */
576 uword f0, f1, o0, o1;
578 mheap_t *h = mheap_header (v);
581 if (_vec_len (v) == 0)
583 _vec_len (v) = MHEAP_ELT_OVERHEAD_BYTES;
585 /* Create first element of heap. */
586 e = mheap_elt_at_uoffset (v, _vec_len (v));
587 e->prev_n_user_data = MHEAP_N_USER_DATA_INVALID;
592 o0 = round_pow2 (f0, align) - align_offset;
595 free_size = o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES;
596 if (o0 == f0 || free_size >= (word) sizeof (mheap_elt_t))
602 o1 = o0 + n_user_data_bytes;
603 f1 = o1 + MHEAP_ELT_OVERHEAD_BYTES;
606 h = mheap_header (v);
608 /* Make sure we have space for object plus overhead. */
609 if (f1 > h->max_size)
611 *offset_return = MHEAP_GROUNDED;
617 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
619 mheap_elt_t *f0_elt = mheap_elt_at_uoffset (v, f0);
620 mheap_elt_t *f1_elt = mheap_elt_at_uoffset (v, f1);
622 uword f0_page = mheap_page_round (pointer_to_uword (f0_elt->user_data));
623 uword f1_page = mheap_page_round (pointer_to_uword (f1_elt->user_data));
625 if (f1_page > f0_page)
626 mheap_vm (v, MHEAP_VM_MAP, f0_page, f1_page - f0_page);
630 new_free_elt (v, f0, free_size);
632 mheap_elt_set_size (v, o0, n_user_data_bytes, /* is_free */ 0);
634 /* Mark last element. */
635 e = mheap_elt_at_uoffset (v, f1);
636 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
644 mheap_get_aligned (void *v,
645 uword n_user_data_bytes,
646 uword align, uword align_offset, uword * offset_return)
652 cpu_times[0] = clib_cpu_time_now ();
654 align = clib_max (align, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
655 align = max_pow2 (align);
657 /* Correct align offset to be smaller than alignment. */
658 align_offset &= (align - 1);
660 /* Align offset must be multiple of minimum object size. */
661 if (align_offset % STRUCT_SIZE_OF (mheap_elt_t, user_data[0]) != 0)
663 *offset_return = MHEAP_GROUNDED;
668 * Round requested size.
670 * Step 1: round up to the minimum object size.
671 * Step 2: round up to a multiple of the user data size (e.g. 4)
672 * Step 3: if non-trivial alignment requested, round up
673 * so that the object precisely fills a chunk
674 * as big as the alignment request.
676 * Step 3 prevents the code from going into "bin search hyperspace":
677 * looking at a huge number of fractional remainder chunks, none of which
678 * will satisfy the alignment constraint. This fixes an allocator
679 * performance issue when one requests a large number of 16 byte objects
680 * aligned to 64 bytes, to name one variation on the theme.
682 n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
684 round_pow2 (n_user_data_bytes,
685 STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
686 if (align > MHEAP_ELT_OVERHEAD_BYTES)
687 n_user_data_bytes = clib_max (n_user_data_bytes,
688 align - MHEAP_ELT_OVERHEAD_BYTES);
690 v = mheap_alloc (0, 64 << 20);
692 mheap_maybe_lock (v);
694 h = mheap_header (v);
696 if (h->flags & MHEAP_FLAG_VALIDATE)
699 /* First search free lists for object. */
701 mheap_get_search_free_list (v, &n_user_data_bytes, align, align_offset);
703 h = mheap_header (v);
705 /* If that fails allocate object at end of heap by extending vector. */
706 if (offset == MHEAP_GROUNDED && _vec_len (v) < h->max_size)
709 mheap_get_extend_vector (v, n_user_data_bytes, align, align_offset,
711 h = mheap_header (v);
712 h->stats.n_vector_expands += offset != MHEAP_GROUNDED;
715 *offset_return = offset;
716 if (offset != MHEAP_GROUNDED)
720 if (h->flags & MHEAP_FLAG_TRACE)
722 /* Recursion block for case when we are traceing main clib heap. */
723 h->flags &= ~MHEAP_FLAG_TRACE;
725 mheap_get_trace (v, offset, n_user_data_bytes);
727 h->flags |= MHEAP_FLAG_TRACE;
731 if (h->flags & MHEAP_FLAG_VALIDATE)
734 mheap_maybe_unlock (v);
736 cpu_times[1] = clib_cpu_time_now ();
737 h->stats.n_clocks_get += cpu_times[1] - cpu_times[0];
738 h->stats.n_gets += 1;
744 free_last_elt (void *v, mheap_elt_t * e)
746 mheap_t *h = mheap_header (v);
748 /* Possibly delete preceeding free element also. */
751 e = mheap_prev_elt (e);
752 remove_free_elt2 (v, e);
755 if (e->prev_n_user_data == MHEAP_N_USER_DATA_INVALID)
757 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
758 mheap_vm_elt (v, MHEAP_VM_UNMAP, mheap_elt_uoffset (v, e));
763 uword uo = mheap_elt_uoffset (v, e);
764 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
765 mheap_vm_elt (v, MHEAP_VM_UNMAP, uo);
766 e->n_user_data = MHEAP_N_USER_DATA_INVALID;
772 mheap_put (void *v, uword uoffset)
775 uword n_user_data_bytes, bin;
777 uword trace_uoffset, trace_n_user_data_bytes;
780 cpu_times[0] = clib_cpu_time_now ();
782 h = mheap_header (v);
784 mheap_maybe_lock (v);
786 if (h->flags & MHEAP_FLAG_VALIDATE)
789 ASSERT (h->n_elts > 0);
791 h->stats.n_puts += 1;
793 e = mheap_elt_at_uoffset (v, uoffset);
794 n = mheap_next_elt (e);
795 n_user_data_bytes = mheap_elt_data_bytes (e);
797 trace_uoffset = uoffset;
798 trace_n_user_data_bytes = n_user_data_bytes;
800 bin = user_data_size_to_bin_index (n_user_data_bytes);
801 if (MHEAP_HAVE_SMALL_OBJECT_CACHE
802 && bin < 255 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE))
804 uoffset = mheap_put_small_object (h, bin, uoffset);
808 e = mheap_elt_at_uoffset (v, uoffset);
809 n = mheap_next_elt (e);
810 n_user_data_bytes = mheap_elt_data_bytes (e);
813 /* Assert that forward and back pointers are equal. */
814 if (e->n_user_data != n->prev_n_user_data)
817 /* Forward and backwards is_free must agree. */
818 if (e->is_free != n->prev_is_free)
821 /* Object was already freed. */
825 /* Special case: delete last element in heap. */
826 if (n->n_user_data == MHEAP_N_USER_DATA_INVALID)
827 free_last_elt (v, e);
831 uword f0, f1, n_combine;
834 f1 = f0 + n_user_data_bytes;
839 mheap_elt_t *p = mheap_prev_elt (e);
840 f0 = mheap_elt_uoffset (v, p);
841 remove_free_elt2 (v, p);
847 mheap_elt_t *m = mheap_next_elt (n);
849 remove_free_elt2 (v, n);
854 mheap_elt_set_size (v, f0, f1 - f0, /* is_free */ 1);
856 e->is_free = n->prev_is_free = 1;
857 set_free_elt (v, f0, f1 - f0);
859 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
860 mheap_vm_elt (v, MHEAP_VM_UNMAP, f0);
864 h = mheap_header (v);
866 if (h->flags & MHEAP_FLAG_TRACE)
868 /* Recursion block for case when we are traceing main clib heap. */
869 h->flags &= ~MHEAP_FLAG_TRACE;
871 mheap_put_trace (v, trace_uoffset, trace_n_user_data_bytes);
873 h->flags |= MHEAP_FLAG_TRACE;
876 if (h->flags & MHEAP_FLAG_VALIDATE)
879 mheap_maybe_unlock (v);
881 cpu_times[1] = clib_cpu_time_now ();
882 h->stats.n_clocks_put += cpu_times[1] - cpu_times[0];
886 mheap_alloc_with_flags (void *memory, uword memory_size, uword flags)
892 if (!mheap_page_size)
893 mheap_page_size = clib_mem_get_page_size ();
897 /* No memory given, try to VM allocate some. */
898 memory = clib_mem_vm_alloc (memory_size);
902 /* No memory region implies we have virtual memory. */
903 flags &= ~MHEAP_FLAG_DISABLE_VM;
906 /* Make sure that given memory is page aligned. */
910 am = pointer_to_uword (memory);
911 av = mheap_page_round (am);
912 v = uword_to_pointer (av, void *);
913 h = mheap_header (v);
914 ah = pointer_to_uword (h);
916 ah += mheap_page_size;
918 h = uword_to_pointer (ah, void *);
919 v = mheap_vector (h);
921 if (PREDICT_FALSE (memory + memory_size < v))
924 * This will happen when the requested memory_size is too
925 * small to cope with the heap header and/or memory alignment.
927 clib_mem_vm_free (memory, memory_size);
931 size = memory + memory_size - v;
934 /* VM map header so we can use memory. */
935 if (!(flags & MHEAP_FLAG_DISABLE_VM))
936 clib_mem_vm_map (h, sizeof (h[0]));
938 /* Zero vector header: both heap header and vector length. */
939 clib_memset (h, 0, sizeof (h[0]));
942 h->vm_alloc_offset_from_header = (void *) h - memory;
943 h->vm_alloc_size = memory_size;
948 /* Set flags based on those given less builtin-flags. */
949 h->flags |= (flags & ~MHEAP_FLAG_TRACE);
951 /* Unmap remainder of heap until we will be ready to use it. */
952 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
953 mheap_vm (v, MHEAP_VM_UNMAP | MHEAP_VM_ROUND_UP,
954 (clib_address_t) v, h->max_size);
956 /* Initialize free list heads to empty. */
957 clib_memset (h->first_free_elt_uoffset_by_bin, 0xFF,
958 sizeof (h->first_free_elt_uoffset_by_bin));
964 mheap_alloc (void *memory, uword size)
969 flags |= MHEAP_FLAG_DISABLE_VM;
971 #ifdef CLIB_HAVE_VEC128
972 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
975 return mheap_alloc_with_flags (memory, size, flags);
979 mheap_alloc_with_lock (void *memory, uword size, int locked)
985 flags |= MHEAP_FLAG_DISABLE_VM;
987 #ifdef CLIB_HAVE_VEC128
988 flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
991 rv = mheap_alloc_with_flags (memory, size, flags);
995 mheap_t *h = mheap_header (rv);
996 h->flags |= MHEAP_FLAG_THREAD_SAFE;
1002 _mheap_free (void *v)
1004 mheap_t *h = mheap_header (v);
1007 clib_mem_vm_free ((void *) h - h->vm_alloc_offset_from_header,
1013 /* Call user's function with each object in heap. */
1015 mheap_foreach (void *v,
1016 uword (*func) (void *arg, void *v, void *elt_data,
1017 uword elt_size), void *arg)
1020 u8 *stack_heap, *clib_mem_mheap_save;
1021 u8 tmp_heap_memory[16 * 1024];
1023 mheap_maybe_lock (v);
1025 if (vec_len (v) == 0)
1028 clib_mem_mheap_save = 0;
1031 /* Allocate a new temporary heap on the stack.
1032 This is so that our hash table & user's callback function can
1033 themselves allocate memory somewhere without getting in the way
1034 of the heap we are looking at. */
1035 if (v == clib_mem_get_heap ())
1037 stack_heap = mheap_alloc (tmp_heap_memory, sizeof (tmp_heap_memory));
1038 clib_mem_mheap_save = v;
1039 clib_mem_set_heap (stack_heap);
1043 e->n_user_data != MHEAP_N_USER_DATA_INVALID; e = mheap_next_elt (e))
1045 void *p = mheap_elt_data (v, e);
1048 if ((*func) (arg, v, p, mheap_elt_data_bytes (e)))
1052 /* Restore main CLIB heap. */
1053 if (clib_mem_mheap_save)
1054 clib_mem_set_heap (clib_mem_mheap_save);
1057 mheap_maybe_unlock (v);
1060 /* Bytes in mheap header overhead not including data bytes. */
1062 mheap_bytes_overhead (void *v)
1064 mheap_t *h = mheap_header (v);
1065 return v ? sizeof (h[0]) + h->n_elts * sizeof (mheap_elt_t) : 0;
1068 /* Total number of bytes including both data and overhead. */
1070 mheap_bytes (void *v)
1072 return mheap_bytes_overhead (v) + vec_bytes (v);
1076 mheap_usage_no_lock (void *v, clib_mem_usage_t * usage)
1078 mheap_t *h = mheap_header (v);
1079 uword used = 0, free = 0, free_vm_unmapped = 0;
1081 if (vec_len (v) > 0)
1086 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1087 e = mheap_next_elt (e))
1089 uword size = mheap_elt_data_bytes (e);
1093 if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
1095 mheap_vm_elt (v, MHEAP_VM_NOMAP, mheap_elt_uoffset (v, e));
1102 usage->object_count = mheap_elts (v);
1103 usage->bytes_total = mheap_bytes (v);
1104 usage->bytes_overhead = mheap_bytes_overhead (v);
1105 usage->bytes_max = mheap_max_size (v);
1106 usage->bytes_used = used;
1107 usage->bytes_free = free;
1108 usage->bytes_free_reclaimed = free_vm_unmapped;
1112 mheap_usage (void *v, clib_mem_usage_t * usage)
1114 mheap_maybe_lock (v);
1115 mheap_usage_no_lock (v, usage);
1116 mheap_maybe_unlock (v);
1120 format_mheap_byte_count (u8 * s, va_list * va)
1122 uword n_bytes = va_arg (*va, uword);
1124 return format (s, "%wd", n_bytes);
1126 return format (s, "%wdk", n_bytes / 1024);
1129 /* Returns first corrupt heap element. */
1130 static mheap_elt_t *
1131 mheap_first_corrupt (void *v)
1135 if (vec_len (v) == 0)
1141 if (e->n_user_data == MHEAP_N_USER_DATA_INVALID)
1144 n = mheap_next_elt (e);
1146 if (e->n_user_data != n->prev_n_user_data)
1149 if (e->is_free != n->prev_is_free)
1159 format_mheap_stats (u8 * s, va_list * va)
1161 mheap_t *h = va_arg (*va, mheap_t *);
1162 mheap_stats_t *st = &h->stats;
1163 u32 indent = format_get_indent (s);
1167 "alloc. from small object cache: %Ld hits %Ld attempts (%.2f%%) replacements %d",
1168 st->n_small_object_cache_hits, st->n_small_object_cache_attempts,
1169 (st->n_small_object_cache_attempts !=
1170 0 ? 100. * (f64) st->n_small_object_cache_hits /
1171 (f64) st->n_small_object_cache_attempts : 0.),
1172 h->small_object_cache.replacement_index);
1176 "\n%Ualloc. from free-list: %Ld attempts, %Ld hits (%.2f%%), %Ld considered (per-attempt %.2f)",
1177 format_white_space, indent, st->free_list.n_search_attempts,
1178 st->free_list.n_objects_found,
1179 (st->free_list.n_search_attempts !=
1180 0 ? 100. * (f64) st->free_list.n_objects_found /
1181 (f64) st->free_list.n_search_attempts : 0.),
1182 st->free_list.n_objects_searched,
1183 (st->free_list.n_search_attempts !=
1184 0 ? (f64) st->free_list.n_objects_searched /
1185 (f64) st->free_list.n_search_attempts : 0.));
1187 s = format (s, "\n%Ualloc. from vector-expand: %Ld",
1188 format_white_space, indent, st->n_vector_expands);
1190 s = format (s, "\n%Uallocs: %Ld %.2f clocks/call",
1191 format_white_space, indent,
1192 st->n_gets, (f64) st->n_clocks_get / (f64) st->n_gets);
1194 s = format (s, "\n%Ufrees: %Ld %.2f clocks/call",
1195 format_white_space, indent,
1196 st->n_puts, (f64) st->n_clocks_put / (f64) st->n_puts);
1202 format_mheap (u8 * s, va_list * va)
1204 void *v = va_arg (*va, u8 *);
1205 int verbose = va_arg (*va, int);
1210 clib_mem_usage_t usage;
1211 mheap_elt_t *first_corrupt;
1213 mheap_maybe_lock (v);
1215 h = mheap_header (v);
1217 mheap_usage_no_lock (v, &usage);
1219 indent = format_get_indent (s);
1223 "%d objects, %U of %U used, %U free, %U reclaimed, %U overhead",
1224 usage.object_count, format_mheap_byte_count, usage.bytes_used,
1225 format_mheap_byte_count, usage.bytes_total,
1226 format_mheap_byte_count, usage.bytes_free,
1227 format_mheap_byte_count, usage.bytes_free_reclaimed,
1228 format_mheap_byte_count, usage.bytes_overhead);
1230 if (usage.bytes_max != ~0)
1231 s = format (s, ", %U capacity", format_mheap_byte_count, usage.bytes_max);
1233 /* Show histogram of sizes. */
1236 uword hist[MHEAP_N_BINS];
1240 clib_memset (hist, 0, sizeof (hist));
1244 e->n_user_data != MHEAP_N_USER_DATA_INVALID;
1245 e = mheap_next_elt (e))
1247 uword n_user_data_bytes = mheap_elt_data_bytes (e);
1248 uword bin = user_data_size_to_bin_index (n_user_data_bytes);
1256 s = format (s, "\n%U%=12s%=12s%=16s",
1257 format_white_space, indent + 2,
1258 "Size", "Count", "Fraction");
1260 for (i = 0; i < ARRAY_LEN (hist); i++)
1264 s = format (s, "\n%U%12d%12wd%16.4f",
1265 format_white_space, indent + 2,
1266 MHEAP_MIN_USER_DATA_BYTES +
1267 i * MHEAP_USER_DATA_WORD_BYTES, hist[i],
1268 (f64) hist[i] / (f64) n_hist);
1273 s = format (s, "\n%U%U",
1274 format_white_space, indent + 2, format_mheap_stats, h);
1276 if ((h->flags & MHEAP_FLAG_TRACE) && vec_len (h->trace_main.traces) > 0)
1278 /* Make a copy of traces since we'll be sorting them. */
1279 mheap_trace_t *t, *traces_copy;
1280 u32 indent, total_objects_traced;
1282 traces_copy = vec_dup (h->trace_main.traces);
1283 qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
1286 total_objects_traced = 0;
1287 s = format (s, "\n");
1288 vec_foreach (t, traces_copy)
1290 /* Skip over free elements. */
1291 if (t->n_allocations == 0)
1294 total_objects_traced += t->n_allocations;
1296 /* When not verbose only report allocations of more than 1k. */
1297 if (!verbose && t->n_bytes < 1024)
1300 if (t == traces_copy)
1301 s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
1303 s = format (s, "%9d%9d %p", t->n_bytes, t->n_allocations,
1305 indent = format_get_indent (s);
1306 for (i = 0; i < ARRAY_LEN (t->callers) && t->callers[i]; i++)
1309 s = format (s, "%U", format_white_space, indent);
1312 format (s, " %U\n", format_clib_elf_symbol_with_address,
1315 s = format (s, " %p\n", t->callers[i]);
1320 s = format (s, "%d total traced objects\n", total_objects_traced);
1322 vec_free (traces_copy);
1325 first_corrupt = mheap_first_corrupt (v);
1328 size = mheap_elt_data_bytes (first_corrupt);
1329 s = format (s, "\n first corrupt object: %p, size %wd\n %U",
1330 first_corrupt, size, format_hex_bytes, first_corrupt, size);
1333 /* FIXME. This output could be wrong in the unlikely case that format
1334 uses the same mheap as we are currently inspecting. */
1340 s = format (s, "\n");
1342 e = mheap_elt_at_uoffset (v, 0);
1347 s = format (s, "%8d: ", i);
1349 o = mheap_elt_uoffset (v, e);
1352 s = format (s, "(%8d) ", o);
1354 s = format (s, " %8d ", o);
1356 if ((i % 8) == 7 || (i + 1) >= h->n_elts)
1357 s = format (s, "\n");
1361 mheap_maybe_unlock (v);
1369 fformat (stderr, "%U", format_mheap, v, 1);
1373 mheap_validate_breakpoint ()
1379 mheap_validate (void *v)
1381 mheap_t *h = mheap_header (v);
1384 uword elt_count, elt_size;
1385 uword free_count_from_free_lists, free_size_from_free_lists;
1386 uword small_elt_free_count, small_elt_free_size;
1388 #define CHECK(x) if (! (x)) { mheap_validate_breakpoint (); os_panic (); }
1390 if (vec_len (v) == 0)
1393 mheap_maybe_lock (v);
1395 /* Validate number of elements and size. */
1396 free_size_from_free_lists = free_count_from_free_lists = 0;
1397 for (i = 0; i < ARRAY_LEN (h->first_free_elt_uoffset_by_bin); i++)
1402 CHECK ((h->first_free_elt_uoffset_by_bin[i] != MHEAP_GROUNDED)
1404 ((h->non_empty_free_elt_heads[i /
1405 BITS (uword)] & ((uword) 1 <<
1411 if (h->first_free_elt_uoffset_by_bin[i] == MHEAP_GROUNDED)
1414 e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[i]);
1420 n = mheap_next_elt (e);
1422 /* Object must be marked free. */
1425 /* Next object's previous free bit must also be set. */
1426 CHECK (n->prev_is_free);
1429 CHECK (e->free_elt.prev_uoffset == MHEAP_GROUNDED);
1432 s = mheap_elt_data_bytes (e);
1433 CHECK (user_data_size_to_bin_index (s) == i);
1435 free_count_from_free_lists += 1;
1436 free_size_from_free_lists += s;
1438 if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
1441 n = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
1443 /* Check free element linkages. */
1444 CHECK (n->free_elt.prev_uoffset == mheap_elt_uoffset (v, e));
1450 /* Go through small object cache. */
1451 small_elt_free_count = small_elt_free_size = 0;
1452 for (i = 0; i < ARRAY_LEN (h->small_object_cache.bins.as_u8); i++)
1454 if (h->small_object_cache.bins.as_u8[i] != 0)
1457 uword b = h->small_object_cache.bins.as_u8[i] - 1;
1458 uword o = h->small_object_cache.offsets[i];
1461 e = mheap_elt_at_uoffset (v, o);
1463 /* Object must be allocated. */
1464 CHECK (!e->is_free);
1466 s = mheap_elt_data_bytes (e);
1467 CHECK (user_data_size_to_bin_index (s) == b);
1469 small_elt_free_count += 1;
1470 small_elt_free_size += s;
1476 uword elt_free_size, elt_free_count;
1478 elt_count = elt_size = elt_free_size = elt_free_count = 0;
1479 for (e = v; e->n_user_data != MHEAP_N_USER_DATA_INVALID; e = n)
1481 if (e->prev_n_user_data != MHEAP_N_USER_DATA_INVALID)
1482 CHECK (e->prev_n_user_data * sizeof (e->user_data[0]) >=
1483 MHEAP_MIN_USER_DATA_BYTES);
1485 CHECK (e->n_user_data * sizeof (e->user_data[0]) >=
1486 MHEAP_MIN_USER_DATA_BYTES);
1488 n = mheap_next_elt (e);
1490 CHECK (e->is_free == n->prev_is_free);
1493 s = mheap_elt_data_bytes (e);
1502 /* Consecutive free objects should have been combined. */
1503 CHECK (!(e->prev_is_free && n->prev_is_free));
1506 CHECK (free_count_from_free_lists == elt_free_count);
1507 CHECK (free_size_from_free_lists == elt_free_size);
1508 CHECK (elt_count == h->n_elts + elt_free_count + small_elt_free_count);
1509 CHECK (elt_size + (elt_count + 1) * MHEAP_ELT_OVERHEAD_BYTES ==
1516 for (e = v; e->n_user_data == MHEAP_N_USER_DATA_INVALID; e = n)
1518 n = mheap_next_elt (e);
1519 CHECK (e->n_user_data == n->prev_n_user_data);
1525 mheap_maybe_unlock (v);
1527 h->validate_serial += 1;
1531 mheap_get_trace (void *v, uword offset, uword size)
1534 mheap_trace_main_t *tm;
1536 uword i, n_callers, trace_index, *p;
1537 mheap_trace_t trace;
1539 /* Spurious Coverity warnings be gone. */
1540 clib_memset (&trace, 0, sizeof (trace));
1542 n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers),
1543 /* Skip mheap_get_aligned's frame */ 1);
1547 for (i = n_callers; i < ARRAY_LEN (trace.callers); i++)
1548 trace.callers[i] = 0;
1550 h = mheap_header (v);
1551 tm = &h->trace_main;
1553 if (!tm->trace_by_callers)
1554 tm->trace_by_callers =
1555 hash_create_shmem (0, sizeof (trace.callers), sizeof (uword));
1557 p = hash_get_mem (tm->trace_by_callers, &trace.callers);
1561 t = tm->traces + trace_index;
1565 i = vec_len (tm->trace_free_list);
1568 trace_index = tm->trace_free_list[i - 1];
1569 _vec_len (tm->trace_free_list) = i - 1;
1573 mheap_trace_t *old_start = tm->traces;
1574 mheap_trace_t *old_end = vec_end (tm->traces);
1576 vec_add2 (tm->traces, t, 1);
1578 if (tm->traces != old_start)
1583 hash_foreach_pair (p, tm->trace_by_callers,
1585 q = uword_to_pointer (p->key, mheap_trace_t *);
1586 ASSERT (q >= old_start && q < old_end);
1587 p->key = pointer_to_uword (tm->traces + (q - old_start));
1591 trace_index = t - tm->traces;
1594 t = tm->traces + trace_index;
1596 t->n_allocations = 0;
1598 hash_set_mem (tm->trace_by_callers, t->callers, trace_index);
1601 t->n_allocations += 1;
1603 t->offset = offset; /* keep a sample to autopsy */
1604 hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
1608 mheap_put_trace (void *v, uword offset, uword size)
1611 mheap_trace_main_t *tm;
1613 uword trace_index, *p;
1615 h = mheap_header (v);
1616 tm = &h->trace_main;
1617 p = hash_get (tm->trace_index_by_offset, offset);
1622 hash_unset (tm->trace_index_by_offset, offset);
1623 ASSERT (trace_index < vec_len (tm->traces));
1625 t = tm->traces + trace_index;
1626 ASSERT (t->n_allocations > 0);
1627 ASSERT (t->n_bytes >= size);
1628 t->n_allocations -= 1;
1630 if (t->n_allocations == 0)
1632 hash_unset_mem (tm->trace_by_callers, t->callers);
1633 vec_add1 (tm->trace_free_list, trace_index);
1634 clib_memset (t, 0, sizeof (t[0]));
1639 mheap_trace_sort (const void *_t1, const void *_t2)
1641 const mheap_trace_t *t1 = _t1;
1642 const mheap_trace_t *t2 = _t2;
1645 cmp = (word) t2->n_bytes - (word) t1->n_bytes;
1647 cmp = (word) t2->n_allocations - (word) t1->n_allocations;
1652 mheap_trace_main_free (mheap_trace_main_t * tm)
1654 vec_free (tm->traces);
1655 vec_free (tm->trace_free_list);
1656 hash_free (tm->trace_by_callers);
1657 hash_free (tm->trace_index_by_offset);
1661 mheap_trace (void *v, int enable)
1665 h = mheap_header (v);
1669 h->flags |= MHEAP_FLAG_TRACE;
1673 mheap_trace_main_free (&h->trace_main);
1674 h->flags &= ~MHEAP_FLAG_TRACE;
1679 * fd.io coding-style-patch-verification: ON
1682 * eval: (c-set-style "gnu")