+ vlib_buffer_pool_t *bp;
+
+ /* *INDENT-OFF* */
+ vec_foreach (bp, bm->buffer_pools)
+ {
+ clib_spinlock_lock (&bp->lock);
+ vec_validate_aligned (bp->threads, vec_len (vlib_mains) - 1,
+ CLIB_CACHE_LINE_BYTES);
+ clib_spinlock_unlock (&bp->lock);
+ }
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (vlib_buffer_worker_init);
+
+static clib_error_t *
+vlib_buffer_main_init_numa_alloc (struct vlib_main_t *vm, u32 numa_node,
+ u32 * physmem_map_index,
+ clib_mem_page_sz_t log2_page_size,
+ u8 unpriv)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ u32 buffers_per_numa = bm->buffers_per_numa;
+ clib_error_t *error;
+ u32 buffer_size;
+ uword n_pages, pagesize;
+ u8 *name = 0;
+
+ ASSERT (log2_page_size != CLIB_MEM_PAGE_SZ_UNKNOWN);
+
+ pagesize = clib_mem_page_bytes (log2_page_size);
+ buffer_size = vlib_buffer_alloc_size (bm->ext_hdr_size,
+ vlib_buffer_get_default_data_size
+ (vm));
+ if (buffer_size > pagesize)
+ return clib_error_return (0, "buffer size (%llu) is greater than page "
+ "size (%llu)", buffer_size, pagesize);
+
+ if (buffers_per_numa == 0)
+ buffers_per_numa = unpriv ? VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV :
+ VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA;
+
+ name = format (0, "buffers-numa-%d%c", numa_node, 0);
+ n_pages = (buffers_per_numa - 1) / (pagesize / buffer_size) + 1;
+ error = vlib_physmem_shared_map_create (vm, (char *) name,
+ n_pages * pagesize,
+ min_log2 (pagesize), numa_node,
+ physmem_map_index);
+ vec_free (name);
+ return error;
+}
+
+static clib_error_t *
+vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node,
+ u8 * index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ u32 physmem_map_index;
+ clib_error_t *error;
+ u8 *name = 0;
+
+ if (bm->log2_page_size == CLIB_MEM_PAGE_SZ_UNKNOWN)
+ {
+ error = vlib_buffer_main_init_numa_alloc (vm, numa_node,
+ &physmem_map_index,
+ CLIB_MEM_PAGE_SZ_DEFAULT_HUGE,
+ 0 /* unpriv */ );
+ if (!error)
+ goto buffer_pool_create;
+
+ /* If alloc failed, retry without hugepages */
+ vlib_log_warn (bm->log_default,
+ "numa[%u] falling back to non-hugepage backed "
+ "buffer pool (%U)", numa_node, format_clib_error, error);
+ clib_error_free (error);
+
+ error = vlib_buffer_main_init_numa_alloc (vm, numa_node,
+ &physmem_map_index,
+ CLIB_MEM_PAGE_SZ_DEFAULT,
+ 1 /* unpriv */ );
+ }
+ else
+ error = vlib_buffer_main_init_numa_alloc (vm, numa_node,
+ &physmem_map_index,
+ bm->log2_page_size,
+ 0 /* unpriv */ );
+ if (error)
+ return error;
+
+buffer_pool_create:
+ name = format (name, "default-numa-%d%c", numa_node, 0);
+ *index = vlib_buffer_pool_create (vm, (char *) name,
+ vlib_buffer_get_default_data_size (vm),
+ physmem_map_index);
+
+ if (*index == (u8) ~ 0)
+ error = clib_error_return (0, "maximum number of buffer pools reached");
+ vec_free (name);
+
+
+ return error;
+}
+
+void
+vlib_buffer_main_alloc (vlib_main_t * vm)
+{
+ vlib_buffer_main_t *bm;
+
+ if (vm->buffer_main)
+ return;
+
+ vm->buffer_main = bm = clib_mem_alloc (sizeof (bm[0]));
+ clib_memset (vm->buffer_main, 0, sizeof (bm[0]));
+ bm->default_data_size = VLIB_BUFFER_DEFAULT_DATA_SIZE;
+}
+
+static u32
+buffer_get_cached (vlib_buffer_pool_t * bp)
+{
+ u32 cached = 0;
+ vlib_buffer_pool_thread_t *bpt;
+
+ clib_spinlock_lock (&bp->lock);
+
+ /* *INDENT-OFF* */
+ vec_foreach (bpt, bp->threads)
+ cached += bpt->n_cached;
+ /* *INDENT-ON* */
+
+ clib_spinlock_unlock (&bp->lock);
+
+ return cached;
+}
+
+static vlib_buffer_pool_t *
+buffer_get_by_index (vlib_buffer_main_t * bm, u32 index)
+{
+ vlib_buffer_pool_t *bp;
+ if (!bm->buffer_pools || vec_len (bm->buffer_pools) < index)
+ return 0;
+ bp = vec_elt_at_index (bm->buffer_pools, index);
+
+ return bp;
+}
+
+static void
+buffer_gauges_update_used_fn (stat_segment_directory_entry_t * e, u32 index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index);
+ if (!bp)
+ return;
+
+ e->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp);
+}
+
+static void
+buffer_gauges_update_available_fn (stat_segment_directory_entry_t * e,
+ u32 index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index);
+ if (!bp)
+ return;
+
+ e->value = bp->n_avail;
+}
+
+static void
+buffer_gauges_update_cached_fn (stat_segment_directory_entry_t * e, u32 index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index);
+ if (!bp)
+ return;
+
+ e->value = buffer_get_cached (bp);
+}
+
+clib_error_t *
+vlib_buffer_main_init (struct vlib_main_t * vm)
+{
+ vlib_buffer_main_t *bm;
+ clib_error_t *err;
+ clib_bitmap_t *bmp = 0, *bmp_has_memory = 0;
+ u32 numa_node;
+ vlib_buffer_pool_t *bp;
+ u8 *name = 0, first_valid_buffer_pool_index = ~0;
+
+ vlib_buffer_main_alloc (vm);
+
+ bm = vm->buffer_main;
+ bm->log_default = vlib_log_register_class ("buffer", 0);
+ bm->ext_hdr_size = __vlib_buffer_external_hdr_size;
+
+ clib_spinlock_init (&bm->buffer_known_hash_lockp);
+
+ if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U",
+ unformat_bitmap_list, &bmp)))
+ clib_error_free (err);
+
+ if ((err = clib_sysfs_read ("/sys/devices/system/node/has_memory", "%U",
+ unformat_bitmap_list, &bmp_has_memory)))
+ clib_error_free (err);
+
+ if (bmp && bmp_has_memory)
+ bmp = clib_bitmap_and (bmp, bmp_has_memory);
+
+ /* no info from sysfs, assuming that only numa 0 exists */
+ if (bmp == 0)
+ bmp = clib_bitmap_set (bmp, 0, 1);
+
+ if (clib_bitmap_last_set (bmp) >= VLIB_BUFFER_MAX_NUMA_NODES)
+ clib_panic ("system have more than %u NUMA nodes",
+ VLIB_BUFFER_MAX_NUMA_NODES);
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (numa_node, bmp)
+ {
+ u8 *index = bm->default_buffer_pool_index_for_numa + numa_node;
+ index[0] = ~0;
+ if ((err = vlib_buffer_main_init_numa_node (vm, numa_node, index)))
+ {
+ clib_error_report (err);
+ clib_error_free (err);
+ continue;
+ }
+
+ if (first_valid_buffer_pool_index == 0xff)
+ first_valid_buffer_pool_index = index[0];
+ }
+ /* *INDENT-ON* */
+
+ if (first_valid_buffer_pool_index == (u8) ~ 0)
+ {
+ err = clib_error_return (0, "failed to allocate buffer pool(s)");
+ goto done;
+ }
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (numa_node, bmp)
+ {
+ if (bm->default_buffer_pool_index_for_numa[numa_node] == (u8) ~0)
+ bm->default_buffer_pool_index_for_numa[numa_node] =
+ first_valid_buffer_pool_index;
+ }
+ /* *INDENT-ON* */
+
+ vec_foreach (bp, bm->buffer_pools)
+ {
+ if (bp->n_buffers == 0)
+ continue;
+
+ vec_reset_length (name);
+ name = format (name, "/buffer-pools/%s/cached%c", bp->name, 0);
+ stat_segment_register_gauge (name, buffer_gauges_update_cached_fn,
+ bp - bm->buffer_pools);
+
+ vec_reset_length (name);
+ name = format (name, "/buffer-pools/%s/used%c", bp->name, 0);
+ stat_segment_register_gauge (name, buffer_gauges_update_used_fn,
+ bp - bm->buffer_pools);
+
+ vec_reset_length (name);
+ name = format (name, "/buffer-pools/%s/available%c", bp->name, 0);
+ stat_segment_register_gauge (name, buffer_gauges_update_available_fn,
+ bp - bm->buffer_pools);
+ }
+
+done:
+ vec_free (bmp);
+ vec_free (bmp_has_memory);
+ vec_free (name);
+ return err;
+}
+
+static clib_error_t *
+vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ vlib_buffer_main_t *bm;
+
+ vlib_buffer_main_alloc (vm);
+
+ bm = vm->buffer_main;
+ bm->log2_page_size = CLIB_MEM_PAGE_SZ_UNKNOWN;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "buffers-per-numa %u", &bm->buffers_per_numa))
+ ;
+ else if (unformat (input, "page-size %U", unformat_log2_page_size,
+ &bm->log2_page_size))
+ ;
+ else if (unformat (input, "default data-size %u",
+ &bm->default_data_size))
+ ;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_buffers_configure, "buffers");
+
+#if VLIB_BUFFER_ALLOC_FAULT_INJECTOR > 0
+u32
+vlib_buffer_alloc_may_fail (vlib_main_t * vm, u32 n_buffers)
+{
+ f64 r;
+
+ r = random_f64 (&vm->buffer_alloc_success_seed);
+
+ /* Fail this request? */
+ if (r > vm->buffer_alloc_success_rate)
+ n_buffers--;
+ /* 5% chance of returning nothing at all */
+ if (r > vm->buffer_alloc_success_rate && r > 0.95)
+ n_buffers = 0;