2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * buffer.c: allocate/free network buffers.
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
44 * Allocate/free network buffers with DPDK.
47 #include <rte_config.h>
49 #include <rte_common.h>
51 #include <rte_memory.h>
52 #include <rte_memzone.h>
53 #include <rte_tailq.h>
55 #include <rte_per_lcore.h>
56 #include <rte_launch.h>
57 #include <rte_atomic.h>
58 #include <rte_cycles.h>
59 #include <rte_prefetch.h>
60 #include <rte_lcore.h>
61 #include <rte_per_lcore.h>
62 #include <rte_branch_prediction.h>
63 #include <rte_interrupts.h>
65 #include <rte_random.h>
66 #include <rte_debug.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
70 #include <rte_mempool.h>
72 #include <rte_version.h>
74 #include <vlib/vlib.h>
76 #pragma weak rte_mem_virt2phy
77 #pragma weak rte_eal_has_hugepages
78 #pragma weak rte_socket_id
79 #pragma weak rte_pktmbuf_pool_create
82 vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
83 vlib_buffer_t * b_first)
85 vlib_buffer_t *b = b_first;
86 uword l_first = b_first->current_length;
88 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
90 b = vlib_get_buffer (vm, b->next_buffer);
91 l += b->current_length;
93 b_first->total_length_not_including_first_buffer = l;
94 b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
99 format_vlib_buffer (u8 * s, va_list * args)
101 vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
102 uword indent = format_get_indent (s);
104 s = format (s, "current data %d, length %d, free-list %d",
105 b->current_data, b->current_length, b->free_list_index);
107 if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
108 s = format (s, ", totlen-nifb %d",
109 b->total_length_not_including_first_buffer);
111 if (b->flags & VLIB_BUFFER_IS_TRACED)
112 s = format (s, ", trace 0x%x", b->trace_index);
114 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
116 vlib_main_t *vm = vlib_get_main ();
117 u32 next_buffer = b->next_buffer;
118 b = vlib_get_buffer (vm, next_buffer);
120 s = format (s, "\n%Unext-buffer 0x%x, segment length %d",
121 format_white_space, indent, next_buffer, b->current_length);
129 format_vlib_buffer_and_data (u8 * s, va_list * args)
131 vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
133 s = format (s, "%U, %U",
134 format_vlib_buffer, b,
135 format_hex_bytes, vlib_buffer_get_current (b), 64);
141 format_vlib_buffer_contents (u8 * s, va_list * va)
143 vlib_main_t *vm = va_arg (*va, vlib_main_t *);
144 vlib_buffer_t *b = va_arg (*va, vlib_buffer_t *);
148 vec_add (s, vlib_buffer_get_current (b), b->current_length);
149 if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
151 b = vlib_get_buffer (vm, b->next_buffer);
157 vlib_main_t **vlib_mains;
159 /* Aligned copy routine. */
161 vlib_aligned_memcpy (void *_dst, void *_src, int n_bytes)
163 vlib_copy_unit_t *dst = _dst;
164 vlib_copy_unit_t *src = _src;
166 /* Arguments must be naturally aligned. */
167 ASSERT (pointer_to_uword (dst) % sizeof (dst[0]) == 0);
168 ASSERT (pointer_to_uword (src) % sizeof (src[0]) == 0);
169 ASSERT (n_bytes % sizeof (dst[0]) == 0);
171 if (4 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES)
173 CLIB_PREFETCH (dst + 0, 4 * sizeof (dst[0]), WRITE);
174 CLIB_PREFETCH (src + 0, 4 * sizeof (src[0]), READ);
176 while (n_bytes >= 4 * sizeof (dst[0]))
180 n_bytes -= 4 * sizeof (dst[0]);
181 CLIB_PREFETCH (dst, 4 * sizeof (dst[0]), WRITE);
182 CLIB_PREFETCH (src, 4 * sizeof (src[0]), READ);
189 else if (8 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES)
191 CLIB_PREFETCH (dst + 0, 8 * sizeof (dst[0]), WRITE);
192 CLIB_PREFETCH (src + 0, 8 * sizeof (src[0]), READ);
194 while (n_bytes >= 8 * sizeof (dst[0]))
198 n_bytes -= 8 * sizeof (dst[0]);
199 CLIB_PREFETCH (dst, 8 * sizeof (dst[0]), WRITE);
200 CLIB_PREFETCH (src, 8 * sizeof (src[0]), READ);
212 /* Cache line size unknown: fall back to slow version. */ ;
217 n_bytes -= 1 * sizeof (dst[0]);
221 #define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32))
223 /* Make sure we have at least given number of unaligned buffers. */
225 fill_unaligned (vlib_main_t * vm,
226 vlib_buffer_free_list_t * free_list,
227 uword n_unaligned_buffers)
229 word la = vec_len (free_list->aligned_buffers);
230 word lu = vec_len (free_list->unaligned_buffers);
232 /* Aligned come in aligned copy-sized chunks. */
233 ASSERT (la % BUFFERS_PER_COPY == 0);
235 ASSERT (la >= n_unaligned_buffers);
237 while (lu < n_unaligned_buffers)
239 /* Copy 4 buffers from end of aligned vector to unaligned vector. */
240 vec_add (free_list->unaligned_buffers,
241 free_list->aligned_buffers + la - BUFFERS_PER_COPY,
243 la -= BUFFERS_PER_COPY;
244 lu += BUFFERS_PER_COPY;
246 _vec_len (free_list->aligned_buffers) = la;
249 /* After free aligned buffers may not contain even sized chunks. */
251 trim_aligned (vlib_buffer_free_list_t * f)
255 /* Add unaligned to aligned before trim. */
256 l = vec_len (f->unaligned_buffers);
259 vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l,
260 /* align */ sizeof (vlib_copy_unit_t));
262 _vec_len (f->unaligned_buffers) = 0;
265 /* Remove unaligned buffers from end of aligned vector and save for next trim. */
266 l = vec_len (f->aligned_buffers);
267 n_trim = l % BUFFERS_PER_COPY;
270 /* Trim aligned -> unaligned. */
271 vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim);
273 /* Remove from aligned. */
274 _vec_len (f->aligned_buffers) = l - n_trim;
279 merge_free_lists (vlib_buffer_free_list_t * dst,
280 vlib_buffer_free_list_t * src)
288 l = vec_len (src->aligned_buffers);
291 vec_add2_aligned (dst->aligned_buffers, d, l,
292 /* align */ sizeof (vlib_copy_unit_t));
293 vlib_aligned_memcpy (d, src->aligned_buffers, l * sizeof (d[0]));
294 vec_free (src->aligned_buffers);
297 l = vec_len (src->unaligned_buffers);
300 vec_add (dst->unaligned_buffers, src->unaligned_buffers, l);
301 vec_free (src->unaligned_buffers);
306 vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size)
308 vlib_buffer_main_t *bm = vm->buffer_main;
310 size = vlib_buffer_round_size (size);
311 uword *p = hash_get (bm->free_list_by_size, size);
312 return p ? p[0] : ~0;
315 /* Add buffer free list. */
317 vlib_buffer_create_free_list_helper (vlib_main_t * vm,
319 u32 is_public, u32 is_default, u8 * name)
321 vlib_buffer_main_t *bm = vm->buffer_main;
322 vlib_buffer_free_list_t *f;
324 if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
326 u32 default_free_free_list_index;
329 default_free_free_list_index =
330 vlib_buffer_create_free_list_helper
332 /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
337 ASSERT (default_free_free_list_index ==
338 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
340 if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
341 return default_free_free_list_index;
344 pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
346 memset (f, 0, sizeof (f[0]));
347 f->index = f - bm->buffer_free_list_pool;
348 f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
349 f->min_n_buffers_each_physmem_alloc = 16;
350 f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
352 /* Setup free buffer template. */
353 f->buffer_init_template.free_list_index = f->index;
357 uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes);
359 hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
366 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
373 name = va_format (0, fmt, &va);
376 return vlib_buffer_create_free_list_helper (vm, n_data_bytes,
383 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
386 u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes);
394 name = va_format (0, fmt, &va);
397 i = vlib_buffer_create_free_list_helper (vm, n_data_bytes,
407 del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
413 for (i = 0; i < vec_len (f->unaligned_buffers); i++)
415 b = vlib_get_buffer (vm, f->unaligned_buffers[i]);
416 mb = rte_mbuf_from_vlib_buffer (b);
417 ASSERT (rte_mbuf_refcnt_read (mb) == 1);
418 rte_pktmbuf_free (mb);
420 for (i = 0; i < vec_len (f->aligned_buffers); i++)
422 b = vlib_get_buffer (vm, f->aligned_buffers[i]);
423 mb = rte_mbuf_from_vlib_buffer (b);
424 ASSERT (rte_mbuf_refcnt_read (mb) == 1);
425 rte_pktmbuf_free (mb);
428 vec_free (f->unaligned_buffers);
429 vec_free (f->aligned_buffers);
432 /* Add buffer free list. */
434 vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
436 vlib_buffer_main_t *bm = vm->buffer_main;
437 vlib_buffer_free_list_t *f;
440 f = vlib_buffer_get_free_list (vm, free_list_index);
442 merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
443 if (merge_index != ~0 && merge_index != free_list_index)
445 merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
449 del_free_list (vm, f);
452 memset (f, 0xab, sizeof (f[0]));
454 pool_put (bm->buffer_free_list_pool, f);
457 /* Make sure free list has at least given number of free buffers. */
459 fill_free_list (vlib_main_t * vm,
460 vlib_buffer_free_list_t * fl, uword min_free_buffers)
465 u32 n_remaining = 0, n_alloc = 0;
466 unsigned socket_id = rte_socket_id ? rte_socket_id () : 0;
467 struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id];
471 if (PREDICT_FALSE (rmp == 0))
476 /* Already have enough free buffers on free list? */
477 n = min_free_buffers - vec_len (fl->aligned_buffers);
479 return min_free_buffers;
481 /* Always allocate round number of buffers. */
482 n = round_pow2 (n, BUFFERS_PER_COPY);
484 /* Always allocate new buffers in reasonably large sized chunks. */
485 n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
487 vec_validate (vm->mbuf_alloc_list, n - 1);
489 if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
492 _vec_len (vm->mbuf_alloc_list) = n;
494 for (i = 0; i < n; i++)
496 mb = vm->mbuf_alloc_list[i];
498 ASSERT (rte_mbuf_refcnt_read (mb) == 0);
499 rte_mbuf_refcnt_set (mb, 1);
501 mb->data_off = RTE_PKTMBUF_HEADROOM;
504 b = vlib_buffer_from_rte_mbuf (mb);
505 bi = vlib_get_buffer_index (vm, b);
507 vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t));
511 vlib_buffer_init_for_free_list (b, fl);
513 if (fl->buffer_init_function)
514 fl->buffer_init_function (vm, fl, &bi, 1);
523 copy_alignment (u32 * x)
525 return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY;
529 alloc_from_free_list (vlib_main_t * vm,
530 vlib_buffer_free_list_t * free_list,
531 u32 * alloc_buffers, u32 n_alloc_buffers)
535 uword n_unaligned_start, n_unaligned_end, n_filled;
537 n_left = n_alloc_buffers;
539 n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst))
540 & (BUFFERS_PER_COPY - 1));
542 n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
546 n_left = n_filled < n_left ? n_filled : n_left;
547 n_alloc_buffers = n_left;
549 if (n_unaligned_start >= n_left)
551 n_unaligned_start = n_left;
555 n_unaligned_end = copy_alignment (dst + n_alloc_buffers);
557 fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end);
559 u_len = vec_len (free_list->unaligned_buffers);
560 u_src = free_list->unaligned_buffers + u_len - 1;
562 if (n_unaligned_start)
564 uword n_copy = n_unaligned_start;
576 /* Now dst should be aligned. */
578 ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0);
583 vlib_copy_unit_t *d, *s;
586 if (vec_len (free_list->aligned_buffers) <
587 ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY))
590 n_copy = n_left / BUFFERS_PER_COPY;
591 n_left = n_left % BUFFERS_PER_COPY;
593 /* Remove buffers from aligned free list. */
594 _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY;
596 s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers);
597 d = (vlib_copy_unit_t *) dst;
599 /* Fast path loop. */
622 /* Unaligned copy. */
623 ASSERT (n_unaligned_end == n_left);
631 if (!free_list->unaligned_buffers)
634 _vec_len (free_list->unaligned_buffers) = u_len;
636 return n_alloc_buffers;
639 /* Allocate a given number of buffers into given array.
640 Returns number actually allocated which will be either zero or
643 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
645 vlib_buffer_main_t *bm = vm->buffer_main;
647 return alloc_from_free_list
649 pool_elt_at_index (bm->buffer_free_list_pool,
650 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
655 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
657 u32 n_buffers, u32 free_list_index)
659 vlib_buffer_main_t *bm = vm->buffer_main;
660 vlib_buffer_free_list_t *f;
661 f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
662 return alloc_from_free_list (vm, f, buffers, n_buffers);
666 add_buffer_to_free_list (vlib_main_t * vm,
667 vlib_buffer_free_list_t * f,
668 u32 buffer_index, u8 do_init)
671 b = vlib_get_buffer (vm, buffer_index);
672 if (PREDICT_TRUE (do_init))
673 vlib_buffer_init_for_free_list (b, f);
674 vec_add1_aligned (f->aligned_buffers, buffer_index,
675 sizeof (vlib_copy_unit_t));
678 always_inline vlib_buffer_free_list_t *
679 buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index)
681 vlib_buffer_main_t *bm = vm->buffer_main;
684 *index = i = b->free_list_index;
685 return pool_elt_at_index (bm->buffer_free_list_pool, i);
689 vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp)
691 vlib_buffer_main_t *bm = vm->buffer_main;
692 void *rv = bm->buffer_free_callback;
694 bm->buffer_free_callback = fp;
698 static_always_inline void
699 vlib_buffer_free_inline (vlib_main_t * vm,
700 u32 * buffers, u32 n_buffers, u32 follow_buffer_next)
702 vlib_buffer_main_t *bm = vm->buffer_main;
703 vlib_buffer_free_list_t *fl;
706 u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
707 u32 follow_buffer_next);
709 cb = bm->buffer_free_callback;
711 if (PREDICT_FALSE (cb != 0))
712 n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
717 for (i = 0; i < n_buffers; i++)
722 b = vlib_get_buffer (vm, buffers[i]);
724 fl = buffer_get_free_list (vm, b, &fi);
726 /* The only current use of this callback: multicast recycle */
727 if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
731 add_buffer_to_free_list
732 (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
734 for (j = 0; j < vec_len (bm->announce_list); j++)
736 if (fl == bm->announce_list[j])
737 goto already_announced;
739 vec_add1 (bm->announce_list, fl);
745 if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
747 mb = rte_mbuf_from_vlib_buffer (b);
748 ASSERT (rte_mbuf_refcnt_read (mb) == 1);
749 rte_pktmbuf_free (mb);
753 if (vec_len (bm->announce_list))
755 vlib_buffer_free_list_t *fl;
756 for (i = 0; i < vec_len (bm->announce_list); i++)
758 fl = bm->announce_list[i];
759 fl->buffers_added_to_freelist_function (vm, fl);
761 _vec_len (bm->announce_list) = 0;
766 vlib_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
768 vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
773 vlib_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
775 vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
779 /* Copy template packet data into buffers as they are allocated. */
780 __attribute__ ((unused))
782 vlib_packet_template_buffer_init (vlib_main_t * vm,
783 vlib_buffer_free_list_t * fl,
784 u32 * buffers, u32 n_buffers)
786 vlib_packet_template_t *t =
787 uword_to_pointer (fl->buffer_init_function_opaque,
788 vlib_packet_template_t *);
791 for (i = 0; i < n_buffers; i++)
793 vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]);
794 ASSERT (b->current_length == vec_len (t->packet_data));
795 clib_memcpy (vlib_buffer_get_current (b), t->packet_data,
801 vlib_packet_template_init (vlib_main_t * vm,
802 vlib_packet_template_t * t,
804 uword n_packet_data_bytes,
805 uword min_n_buffers_each_physmem_alloc,
809 __attribute__ ((unused)) u8 *name;
812 name = va_format (0, fmt, &va);
815 vlib_worker_thread_barrier_sync (vm);
816 memset (t, 0, sizeof (t[0]));
818 vec_add (t->packet_data, packet_data, n_packet_data_bytes);
820 vlib_worker_thread_barrier_release (vm);
824 vlib_packet_template_get_packet (vlib_main_t * vm,
825 vlib_packet_template_t * t, u32 * bi_result)
830 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
835 b = vlib_get_buffer (vm, bi);
836 clib_memcpy (vlib_buffer_get_current (b),
837 t->packet_data, vec_len (t->packet_data));
838 b->current_length = vec_len (t->packet_data);
840 /* Fix up mbuf header length fields */
842 mb = rte_mbuf_from_vlib_buffer (b);
843 mb->data_len = b->current_length;
844 mb->pkt_len = b->current_length;
849 /* Append given data to end of buffer, possibly allocating new buffers. */
851 vlib_buffer_add_data (vlib_main_t * vm,
853 u32 buffer_index, void *data, u32 n_data_bytes)
855 u32 n_buffer_bytes, n_left, n_left_this_buffer, bi;
861 && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index))
865 n_left = n_data_bytes;
866 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index);
868 b = vlib_get_buffer (vm, bi);
869 b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
871 /* Get to the end of the chain before we try to append data... */
872 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
873 b = vlib_get_buffer (vm, b->next_buffer);
879 ASSERT (n_buffer_bytes >= b->current_length);
881 n_buffer_bytes - (b->current_data + b->current_length);
882 n = clib_min (n_left_this_buffer, n_left);
883 clib_memcpy (vlib_buffer_get_current (b) + b->current_length, d, n);
884 b->current_length += n;
891 vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1,
895 b->flags |= VLIB_BUFFER_NEXT_PRESENT;
897 b = vlib_get_buffer (vm, b->next_buffer);
903 clib_error ("out of buffers");
908 vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
910 vlib_buffer_t * first,
911 vlib_buffer_t ** last,
912 void *data, u16 data_len)
914 vlib_buffer_t *l = *last;
916 vlib_buffer_free_list_buffer_size (vm, free_list_index);
918 ASSERT (n_buffer_bytes >= l->current_length + l->current_data);
921 u16 max = n_buffer_bytes - l->current_length - l->current_data;
925 vlib_buffer_alloc_from_free_list (vm, &l->next_buffer, 1,
928 *last = l = vlib_buffer_chain_buffer (vm, first, l, l->next_buffer);
929 max = n_buffer_bytes - l->current_length - l->current_data;
932 u16 len = (data_len > max) ? max : data_len;
933 clib_memcpy (vlib_buffer_get_current (l) + l->current_length,
935 vlib_buffer_chain_increase_length (first, l, len);
943 * Fills in the required rte_mbuf fields for chained buffers given a VLIB chain.
946 vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * b_first)
948 vlib_buffer_t *b = b_first, *prev = b_first;
949 struct rte_mbuf *mb_prev, *mb, *mb_first;
951 mb_first = rte_mbuf_from_vlib_buffer (b_first);
953 mb_first->pkt_len = mb_first->data_len = b_first->current_length;
954 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
956 b = vlib_get_buffer (vm, b->next_buffer);
957 mb = rte_mbuf_from_vlib_buffer (b);
958 mb_prev = rte_mbuf_from_vlib_buffer (prev);
960 mb_first->pkt_len += b->current_length;
962 mb->data_len = b->current_length;
968 vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
971 vlib_buffer_main_t *bm = vm->buffer_main;
972 vlib_physmem_main_t *vpm = &vm->physmem_main;
973 struct rte_mempool *rmp;
976 if (!rte_pktmbuf_pool_create)
977 return clib_error_return (0, "not linked with DPDK");
979 vec_validate_aligned (bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
981 /* pool already exists, nothing to do */
982 if (bm->pktmbuf_pools[socket_id])
985 u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
987 rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */
988 num_mbufs, /* number of mbufs */
989 512, /* cache size */
990 VLIB_BUFFER_HDR_SIZE, /* priv size */
991 VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */
992 socket_id); /* cpu socket */
998 uword this_pool_start;
999 uword this_pool_size;
1000 uword save_vpm_start, save_vpm_end, save_vpm_size;
1001 struct rte_mempool_memhdr *memhdr;
1003 this_pool_start = ~0ULL;
1004 this_pool_end = 0LL;
1006 STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
1008 if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end)
1009 this_pool_end = (uword) (memhdr->addr + memhdr->len);
1010 if (((uword) memhdr->addr) < this_pool_start)
1011 this_pool_start = (uword) (memhdr->addr);
1013 ASSERT (this_pool_start < ~0ULL && this_pool_end > 0);
1014 this_pool_size = this_pool_end - this_pool_start;
1018 clib_warning ("%s: pool start %llx pool end %llx pool size %lld",
1019 pool_name, this_pool_start, this_pool_end,
1022 ("before: virtual.start %llx virtual.end %llx virtual.size %lld",
1023 vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
1026 save_vpm_start = vpm->virtual.start;
1027 save_vpm_end = vpm->virtual.end;
1028 save_vpm_size = vpm->virtual.size;
1030 if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0)
1031 vpm->virtual.start = this_pool_start;
1032 if (this_pool_end > vpm->virtual.end)
1033 vpm->virtual.end = this_pool_end;
1035 vpm->virtual.size = vpm->virtual.end - vpm->virtual.start;
1040 ("after: virtual.start %llx virtual.end %llx virtual.size %lld",
1041 vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
1044 /* check if fits into buffer index range */
1045 if ((u64) vpm->virtual.size >
1046 ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
1048 clib_warning ("physmem: virtual size out of range!");
1049 vpm->virtual.start = save_vpm_start;
1050 vpm->virtual.end = save_vpm_end;
1051 vpm->virtual.size = save_vpm_size;
1057 bm->pktmbuf_pools[socket_id] = rmp;
1058 vec_free (pool_name);
1063 vec_free (pool_name);
1065 /* no usable pool for this socket, try to use pool from another one */
1066 for (i = 0; i < vec_len (bm->pktmbuf_pools); i++)
1068 if (bm->pktmbuf_pools[i])
1071 ("WARNING: Failed to allocate mempool for CPU socket %u. "
1072 "Threads running on socket %u will use socket %u mempool.",
1073 socket_id, socket_id, i);
1074 bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i];
1079 return clib_error_return (0, "failed to allocate mempool on socket %u",
1085 vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
1088 vlib_serialize_buffer_main_t *sm;
1089 uword n, n_bytes_to_write;
1090 vlib_buffer_t *last;
1092 n_bytes_to_write = s->current_buffer_index;
1094 uword_to_pointer (s->data_function_opaque,
1095 vlib_serialize_buffer_main_t *);
1098 ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
1099 if (serialize_stream_is_end_of_stream (s)
1100 || sm->tx.n_total_data_bytes + n_bytes_to_write >
1101 sm->tx.max_n_data_bytes_per_chain)
1103 vlib_process_t *p = vlib_get_current_process (vm);
1105 last = vlib_get_buffer (vm, sm->last_buffer);
1106 last->current_length = n_bytes_to_write;
1108 vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index,
1111 sm->first_buffer = sm->last_buffer = ~0;
1112 sm->tx.n_total_data_bytes = 0;
1115 else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
1117 ASSERT (sm->first_buffer == ~0);
1118 ASSERT (sm->last_buffer == ~0);
1120 vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1,
1121 sm->tx.free_list_index);
1125 ("vlib_buffer_alloc_from_free_list fails"));
1126 sm->last_buffer = sm->first_buffer;
1128 vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
1131 if (n_bytes_to_write > 0)
1133 vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer);
1135 vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1,
1136 sm->tx.free_list_index);
1140 ("vlib_buffer_alloc_from_free_list fails"));
1141 sm->tx.n_total_data_bytes += n_bytes_to_write;
1142 prev->current_length = n_bytes_to_write;
1143 prev->next_buffer = sm->last_buffer;
1144 prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
1147 if (sm->last_buffer != ~0)
1149 last = vlib_get_buffer (vm, sm->last_buffer);
1150 s->buffer = vlib_buffer_get_current (last);
1151 s->current_buffer_index = 0;
1152 ASSERT (last->current_data == s->current_buffer_index);
1157 vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
1160 vlib_serialize_buffer_main_t *sm;
1161 vlib_buffer_t *last;
1164 uword_to_pointer (s->data_function_opaque,
1165 vlib_serialize_buffer_main_t *);
1168 if (serialize_stream_is_end_of_stream (s))
1171 if (sm->last_buffer != ~0)
1173 last = vlib_get_buffer (vm, sm->last_buffer);
1175 if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
1176 sm->last_buffer = last->next_buffer;
1179 vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
1180 sm->first_buffer = sm->last_buffer = ~0;
1184 if (sm->last_buffer == ~0)
1186 while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
1188 sm->rx.ready_one_time_event =
1189 vlib_process_create_one_time_event (vm, vlib_current_process (vm),
1191 vlib_process_wait_for_one_time_event (vm, /* no event data */ 0,
1192 sm->rx.ready_one_time_event);
1195 clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
1196 sm->last_buffer = sm->first_buffer;
1199 ASSERT (sm->last_buffer != ~0);
1201 last = vlib_get_buffer (vm, sm->last_buffer);
1202 s->current_buffer_index = 0;
1203 s->buffer = vlib_buffer_get_current (last);
1204 s->n_buffer_bytes = last->current_length;
1208 serialize_open_vlib_helper (serialize_main_t * m,
1210 vlib_serialize_buffer_main_t * sm, uword is_read)
1212 /* Initialize serialize main but save overflow buffer for re-use between calls. */
1214 u8 *save = m->stream.overflow_buffer;
1215 memset (m, 0, sizeof (m[0]));
1216 m->stream.overflow_buffer = save;
1218 _vec_len (save) = 0;
1221 sm->first_buffer = sm->last_buffer = ~0;
1223 clib_fifo_reset (sm->rx.buffer_fifo);
1225 sm->tx.n_total_data_bytes = 0;
1227 m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
1228 m->stream.data_function_opaque = pointer_to_uword (sm);
1232 serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
1233 vlib_serialize_buffer_main_t * sm)
1235 serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0);
1239 unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
1240 vlib_serialize_buffer_main_t * sm)
1242 serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1);
1246 serialize_close_vlib_buffer (serialize_main_t * m)
1248 vlib_serialize_buffer_main_t *sm
1249 = uword_to_pointer (m->stream.data_function_opaque,
1250 vlib_serialize_buffer_main_t *);
1251 vlib_buffer_t *last;
1252 serialize_stream_t *s = &m->stream;
1254 last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
1255 last->current_length = s->current_buffer_index;
1257 if (vec_len (s->overflow_buffer) > 0)
1260 = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
1261 sm->last_buffer == ~0 ? 0 : sm->last_buffer,
1263 vec_len (s->overflow_buffer));
1264 _vec_len (s->overflow_buffer) = 0;
1267 return sm->first_buffer;
1271 unserialize_close_vlib_buffer (serialize_main_t * m)
1273 vlib_serialize_buffer_main_t *sm
1274 = uword_to_pointer (m->stream.data_function_opaque,
1275 vlib_serialize_buffer_main_t *);
1276 if (sm->first_buffer != ~0)
1277 vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
1278 clib_fifo_reset (sm->rx.buffer_fifo);
1279 if (m->stream.overflow_buffer)
1280 _vec_len (m->stream.overflow_buffer) = 0;
1284 format_vlib_buffer_free_list (u8 * s, va_list * va)
1286 vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *);
1287 u32 threadnum = va_arg (*va, u32);
1288 uword bytes_alloc, bytes_free, n_free, size;
1291 return format (s, "%=7s%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
1292 "Thread", "Name", "Index", "Size", "Alloc", "Free",
1295 size = sizeof (vlib_buffer_t) + f->n_data_bytes;
1296 n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers);
1297 bytes_alloc = size * f->n_alloc;
1298 bytes_free = size * n_free;
1300 s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d",
1302 f->name, f->index, f->n_data_bytes,
1303 format_memory_size, bytes_alloc,
1304 format_memory_size, bytes_free, f->n_alloc, n_free);
1309 static clib_error_t *
1310 show_buffers (vlib_main_t * vm,
1311 unformat_input_t * input, vlib_cli_command_t * cmd)
1313 vlib_buffer_main_t *bm;
1314 vlib_buffer_free_list_t *f;
1315 vlib_main_t *curr_vm;
1318 vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0, 0);
1322 curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm;
1323 bm = curr_vm->buffer_main;
1326 pool_foreach (f, bm->buffer_free_list_pool, ({
1327 vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index);
1333 while (vm_index < vec_len (vlib_mains));
1339 VLIB_CLI_COMMAND (show_buffers_command, static) = {
1340 .path = "show buffers",
1341 .short_help = "Show packet buffer allocation",
1342 .function = show_buffers,
1348 u32 *vlib_buffer_state_validation_lock;
1349 uword *vlib_buffer_state_validation_hash;
1350 void *vlib_buffer_state_heap;
1352 static clib_error_t *
1353 buffer_state_validation_init (vlib_main_t * vm)
1357 vlib_buffer_state_heap = mheap_alloc (0, 10 << 20);
1359 oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
1361 vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword));
1362 vec_validate_aligned (vlib_buffer_state_validation_lock, 0,
1363 CLIB_CACHE_LINE_BYTES);
1364 clib_mem_set_heap (oldheap);
1368 VLIB_INIT_FUNCTION (buffer_state_validation_init);
1373 * fd.io coding-style-patch-verification: ON
1376 * eval: (c-set-style "gnu")