#include <vppinfra/cache.h>
#include <vppinfra/serialize.h>
#include <vppinfra/vector.h>
+#include <vppinfra/lock.h>
#include <vlib/error.h> /* for vlib_error_t */
#include <vlib/config.h> /* for __PRE_DATA_SIZE */
-#define VLIB_BUFFER_DATA_SIZE (2048)
#define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE
+#define VLIB_BUFFER_DEFAULT_DATA_SIZE (2048)
+
+/* Minimum buffer chain segment size. Does not apply to last buffer in chain.
+ Dataplane code can safely asume that specified amount of data is not split
+ into 2 chained buffers */
+#define VLIB_BUFFER_MIN_CHAIN_SEG_SIZE (128)
+
+/* Amount of head buffer data copied to each replica head buffer */
+#define VLIB_BUFFER_CLONE_HEAD_SIZE (256)
+
/** \file
vlib buffer structure definition and a few select
access methods. This structure and the buffer allocation
of typing to make it so.
*/
-/* VLIB buffer representation. */
-typedef struct
+/**
+ * Buffer Flags
+ */
+#define foreach_vlib_buffer_flag \
+ _( 0, IS_TRACED, 0) \
+ _( 1, NEXT_PRESENT, "next-present") \
+ _( 2, TOTAL_LENGTH_VALID, 0) \
+ _( 3, EXT_HDR_VALID, "ext-hdr-valid")
+
+/* NOTE: only buffer generic flags should be defined here, please consider
+ using user flags. i.e. src/vnet/buffer.h */
+
+enum
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /* Offset within data[] that we are currently processing.
- If negative current header points into predata area. */
- i16 current_data; /**< signed offset in data[], pre_data[]
- that we are currently processing.
- If negative current header points into predata area.
- */
- u16 current_length; /**< Nbytes between current data and
- the end of this buffer.
- */
- u32 flags; /**< buffer flags:
- <br> VLIB_BUFFER_IS_TRACED: trace this buffer.
- <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
- <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says
- <br> VLIB_BUFFER_REPL_FAIL: packet replication failure
- <br> VLIB_BUFFER_RECYCLE: as it says
- <br> VLIB_BUFFER_FLOW_REPORT: buffer is a flow report,
- <br> VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header,
- set to avoid adding it to a flow report
- <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N
- */
-#define VLIB_BUFFER_IS_TRACED (1 << 0)
-#define VLIB_BUFFER_LOG2_NEXT_PRESENT (1)
-#define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT)
-#define VLIB_BUFFER_IS_RECYCLED (1 << 2)
-#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 3)
-#define VLIB_BUFFER_REPL_FAIL (1 << 4)
-#define VLIB_BUFFER_RECYCLE (1 << 5)
-#define VLIB_BUFFER_FLOW_REPORT (1 << 6)
-#define VLIB_BUFFER_EXT_HDR_VALID (1 << 7)
+#define _(bit, name, v) VLIB_BUFFER_##name = (1 << (bit)),
+ foreach_vlib_buffer_flag
+#undef _
+};
+
+enum
+{
+#define _(bit, name, v) VLIB_BUFFER_LOG2_##name = (bit),
+ foreach_vlib_buffer_flag
+#undef _
+};
/* User defined buffer flags. */
#define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n))
#define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n))
+#define VLIB_BUFFER_FLAGS_ALL (0x0f)
+
+/** VLIB buffer representation. */
+typedef union
+{
+ struct
+ {
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /** signed offset in data[], pre_data[] that we are currently
+ * processing. If negative current header points into predata area. */
+ i16 current_data;
+
+ /** Nbytes between current data and the end of this buffer. */
+ u16 current_length;
+
+ /** buffer flags:
+ <br> VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,
+ <br> VLIB_BUFFER_IS_TRACED: trace this buffer.
+ <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
+ <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says
+ <br> VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header,
+ set to avoid adding it to a flow report
+ <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N
+ */
+ u32 flags;
+
+ /** Generic flow identifier */
+ u32 flow_id;
+
+ /** Reference count for this buffer. */
+ volatile u8 ref_count;
+
+ /** index of buffer pool this buffer belongs. */
+ u8 buffer_pool_index;
+
+ /** Error code for buffers to be enqueued to error handler. */
+ vlib_error_t error;
+
+ /** Next buffer for this linked-list of buffers. Only valid if
+ * VLIB_BUFFER_NEXT_PRESENT flag is set. */
+ u32 next_buffer;
+
+ /** The following fields can be in a union because once a packet enters
+ * the punt path, it is no longer on a feature arc */
+ union
+ {
+ /** Used by feature subgraph arcs to visit enabled feature nodes */
+ u32 current_config_index;
+ /* the reason the packet once punted */
+ u32 punt_reason;
+ };
+
+ /** Opaque data used by sub-graphs for their own purposes. */
+ u32 opaque[10];
+
+ /** part of buffer metadata which is initialized on alloc ends here. */
+ STRUCT_MARK (template_end);
+
+ /** start of 2nd cache line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ /** Specifies trace buffer handle if VLIB_PACKET_IS_TRACED flag is
+ * set. */
+ u32 trace_handle;
+
+ /** Only valid for first buffer in chain. Current length plus total length
+ * given here give total number of bytes in buffer chain. */
+ u32 total_length_not_including_first_buffer;
+
+ /**< More opaque data, see ../vnet/vnet/buffer.h */
+ u32 opaque2[14];
+
+ /** start of third cache line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
- u32 free_list_index; /**< Buffer free list that this buffer was
- allocated from and will be freed to.
- */
-
- u32 total_length_not_including_first_buffer;
- /**< Only valid for first buffer in chain. Current length plus
- total length given here give total number of bytes in buffer chain.
- */
-
- u32 next_buffer; /**< Next buffer for this linked-list of buffers.
- Only valid if VLIB_BUFFER_NEXT_PRESENT flag is set.
- */
-
- vlib_error_t error; /**< Error code for buffers to be enqueued
- to error handler.
- */
- u32 current_config_index; /**< Used by feature subgraph arcs to
- visit enabled feature nodes
- */
-
- u8 feature_arc_index; /**< Used to identify feature arcs by intermediate
- feature node
- */
-
- u8 n_add_refs; /**< Number of additional references to this buffer. */
-
- u8 dont_waste_me[2]; /**< Available space in the (precious)
- first 32 octets of buffer metadata
- Before allocating any of it, discussion required!
- */
-
- u32 opaque[8]; /**< Opaque data used by sub-graphs for their own purposes.
- See .../vnet/vnet/buffer.h
- */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
-
- u32 trace_index; /**< Specifies index into trace buffer
- if VLIB_PACKET_IS_TRACED flag is set.
- */
- u32 recycle_count; /**< Used by L2 path recycle code */
- u32 opaque2[14]; /**< More opaque data, currently unused */
-
- /***** end of second cache line */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
- u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]; /**< Space for inserting data
- before buffer start.
- Packet rewrite string will be
- rewritten backwards and may extend
- back before buffer->data[0].
- Must come directly before packet data.
- */
-
- u8 data[0]; /**< Packet data. Hardware DMA here */
-} vlib_buffer_t; /* Must be a multiple of 64B. */
+ /** Space for inserting data before buffer start. Packet rewrite string
+ * will be rewritten backwards and may extend back before
+ * buffer->data[0]. Must come directly before packet data. */
+ u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE];
+
+ /** Packet data */
+ u8 data[];
+ };
+#ifdef CLIB_HAVE_VEC128
+ u8x16 as_u8x16[4];
+#endif
+#ifdef CLIB_HAVE_VEC256
+ u8x32 as_u8x32[2];
+#endif
+#ifdef CLIB_HAVE_VEC512
+ u8x64 as_u8x64[1];
+#endif
+} vlib_buffer_t;
#define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE)
*/
#define vlib_prefetch_buffer_header(b,type) CLIB_PREFETCH (b, 64, type)
-
-always_inline vlib_buffer_t *
-vlib_buffer_next_contiguous (vlib_buffer_t * b, u32 buffer_bytes)
-{
- return (void *) (b + 1) + buffer_bytes;
-}
+#define vlib_prefetch_buffer_data(b,type) \
+ CLIB_PREFETCH (vlib_buffer_get_current(b), CLIB_CACHE_LINE_BYTES, type)
always_inline void
vlib_buffer_struct_is_sane (vlib_buffer_t * b)
ASSERT (b->pre_data + VLIB_BUFFER_PRE_DATA_SIZE == b->data);
}
+always_inline uword
+vlib_buffer_get_va (vlib_buffer_t * b)
+{
+ return pointer_to_uword (b->data);
+}
+
/** \brief Get pointer to current data to process
@param b - (vlib_buffer_t *) pointer to the buffer
return b->data + b->current_data;
}
+always_inline uword
+vlib_buffer_get_current_va (vlib_buffer_t * b)
+{
+ return vlib_buffer_get_va (b) + b->current_data;
+}
+
/** \brief Advance current data pointer by the supplied (signed!) amount
@param b - (vlib_buffer_t *) pointer to the buffer
ASSERT (b->current_length >= l);
b->current_data += l;
b->current_length -= l;
+
+ ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0 ||
+ b->current_length >= VLIB_BUFFER_MIN_CHAIN_SEG_SIZE);
+}
+
+/** \brief Check if there is enough space in buffer to advance
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param l - (word) size to check
+ @return - 0 if there is less space than 'l' in buffer
+*/
+always_inline u8
+vlib_buffer_has_space (vlib_buffer_t * b, word l)
+{
+ return b->current_length >= l;
}
/** \brief Reset current header & length to state they were in when
return (void *) b->opaque2;
}
-/* Forward declaration. */
-struct vlib_main_t;
+/** \brief Get pointer to the end of buffer's data
+ * @param b pointer to the buffer
+ * @return pointer to tail of packet's data
+ */
+always_inline u8 *
+vlib_buffer_get_tail (vlib_buffer_t * b)
+{
+ return b->data + b->current_data + b->current_length;
+}
-typedef struct vlib_buffer_free_list_t
+/** \brief Append uninitialized data to buffer
+ * @param b pointer to the buffer
+ * @param size number of uninitialized bytes
+ * @return pointer to beginning of uninitialized data
+ */
+always_inline void *
+vlib_buffer_put_uninit (vlib_buffer_t * b, u16 size)
+{
+ void *p = vlib_buffer_get_tail (b);
+ /* XXX make sure there's enough space */
+ b->current_length += size;
+ return p;
+}
+
+/** \brief Prepend uninitialized data to buffer
+ * @param b pointer to the buffer
+ * @param size number of uninitialized bytes
+ * @return pointer to beginning of uninitialized data
+ */
+always_inline void *
+vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size)
{
- /* Template buffer used to initialize first 16 bytes of buffers
- allocated on this free list. */
- vlib_buffer_t buffer_init_template;
+ ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size);
+ b->current_data -= size;
+ b->current_length += size;
- /* Our index into vlib_main_t's buffer_free_list_pool. */
- u32 index;
+ return vlib_buffer_get_current (b);
+}
- /* Number of data bytes for buffers in this free list. */
- u32 n_data_bytes;
+/** \brief Make head room, typically for packet headers
+ * @param b pointer to the buffer
+ * @param size number of head room bytes
+ * @return pointer to start of buffer (current data)
+ */
+always_inline void *
+vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size)
+{
+ b->current_data += size;
+ return vlib_buffer_get_current (b);
+}
- /* Number of buffers to allocate when we need to allocate new buffers
- from physmem heap. */
- u32 min_n_buffers_each_physmem_alloc;
+/** \brief Construct a trace handle from thread and pool index
+ * @param thread Thread id
+ * @param pool_index Pool index
+ * @return trace handle
+ */
+always_inline u32
+vlib_buffer_make_trace_handle (u32 thread, u32 pool_index)
+{
+ u32 rv;
+ ASSERT (thread < 0xff);
+ ASSERT (pool_index < 0x00FFFFFF);
+ rv = (thread << 24) | (pool_index & 0x00FFFFFF);
+ return rv;
+}
- /* Total number of buffers allocated from this free list. */
- u32 n_alloc;
+/** \brief Extract the thread id from a trace handle
+ * @param trace_handle the trace handle
+ * @return the thread id
+ */
+always_inline u32
+vlib_buffer_get_trace_thread (vlib_buffer_t * b)
+{
+ u32 trace_handle = b->trace_handle;
- /* Vector of free buffers. Each element is a byte offset into I/O heap. */
- u32 *buffers;
+ return trace_handle >> 24;
+}
- /* Memory chunks allocated for this free list
- recorded here so they can be freed when free list
- is deleted. */
- void **buffer_memory_allocated;
+/** \brief Extract the trace (pool) index from a trace handle
+ * @param trace_handle the trace handle
+ * @return the trace index
+ */
+always_inline u32
+vlib_buffer_get_trace_index (vlib_buffer_t * b)
+{
+ u32 trace_handle = b->trace_handle;
+ return trace_handle & 0x00FFFFFF;
+}
- /* Free list name. */
- u8 *name;
+/** \brief Retrieve bytes from buffer head
+ * @param b pointer to the buffer
+ * @param size number of bytes to pull
+ * @return pointer to start of buffer (current data)
+ */
+always_inline void *
+vlib_buffer_pull (vlib_buffer_t * b, u8 size)
+{
+ if (b->current_length + VLIB_BUFFER_PRE_DATA_SIZE < size)
+ return 0;
- /* Callback functions to initialize newly allocated buffers.
- If null buffers are zeroed. */
- void (*buffer_init_function) (struct vlib_main_t * vm,
- struct vlib_buffer_free_list_t * fl,
- u32 * buffers, u32 n_buffers);
+ void *data = vlib_buffer_get_current (b);
+ vlib_buffer_advance (b, size);
+ return data;
+}
- /* Callback function to announce that buffers have been
- added to the freelist */
- void (*buffers_added_to_freelist_function)
- (struct vlib_main_t * vm, struct vlib_buffer_free_list_t * fl);
+/* Forward declaration. */
+struct vlib_main_t;
- uword buffer_init_function_opaque;
-} __attribute__ ((aligned (16))) vlib_buffer_free_list_t;
+#define VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ 512
typedef struct
{
- u32 (*vlib_buffer_alloc_cb) (struct vlib_main_t * vm, u32 * buffers,
- u32 n_buffers);
- u32 (*vlib_buffer_alloc_from_free_list_cb) (struct vlib_main_t * vm,
- u32 * buffers, u32 n_buffers,
- u32 free_list_index);
- void (*vlib_buffer_free_cb) (struct vlib_main_t * vm, u32 * buffers,
- u32 n_buffers);
- void (*vlib_buffer_free_no_next_cb) (struct vlib_main_t * vm, u32 * buffers,
- u32 n_buffers);
- void (*vlib_packet_template_init_cb) (struct vlib_main_t * vm, void *t,
- void *packet_data,
- uword n_packet_data_bytes,
- uword
- min_n_buffers_each_physmem_alloc,
- u8 * name);
- void (*vlib_buffer_delete_free_list_cb) (struct vlib_main_t * vm,
- u32 free_list_index);
-} vlib_buffer_callbacks_t;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 cached_buffers[VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ];
+ u32 n_cached;
+} vlib_buffer_pool_thread_t;
typedef struct
{
- /* Buffer free callback, for subversive activities */
- u32 (*buffer_free_callback) (struct vlib_main_t * vm,
- u32 * buffers,
- u32 n_buffers, u32 follow_buffer_next);
- /* Pool of buffer free lists.
- Multiple free lists exist for packet generator which uses
- separate free lists for each packet stream --- so as to avoid
- initializing static data for each packet generated. */
- vlib_buffer_free_list_t *buffer_free_list_pool;
-#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX (0)
-#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES VLIB_BUFFER_DATA_SIZE
-
- /* Hash table mapping buffer size (rounded to next unit of
- sizeof (vlib_buffer_t)) to free list index. */
- uword *free_list_by_size;
-
- /* Hash table mapping buffer index into number
- 0 => allocated but free, 1 => allocated and not-free.
- If buffer index is not in hash table then this buffer
- has never been allocated. */
- uword *buffer_known_hash;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ uword start;
+ uword size;
+ uword log2_page_size;
+ u8 index;
+ u32 numa_node;
+ u32 physmem_map_index;
+ u32 data_size;
+ u32 n_buffers;
+ u32 n_avail;
+ u32 *buffers;
+ u8 *name;
+ clib_spinlock_t lock;
- /* List of free-lists needing Blue Light Special announcements */
- vlib_buffer_free_list_t **announce_list;
+ /* per-thread data */
+ vlib_buffer_pool_thread_t *threads;
- /* Callbacks */
- vlib_buffer_callbacks_t cb;
- int extern_buffer_mgmt;
-} vlib_buffer_main_t;
+ /* buffer metadata template */
+ vlib_buffer_t buffer_template;
+} vlib_buffer_pool_t;
-void vlib_buffer_cb_init (struct vlib_main_t *vm);
-int vlib_buffer_cb_register (struct vlib_main_t *vm,
- vlib_buffer_callbacks_t * cb);
+#define VLIB_BUFFER_MAX_NUMA_NODES 32
typedef struct
{
- struct vlib_main_t *vlib_main;
-
- u32 first_buffer, last_buffer;
-
- union
- {
- struct
- {
- /* Total accumulated bytes in chain starting with first_buffer. */
- u32 n_total_data_bytes;
-
- /* Max number of bytes to accumulate in chain starting with first_buffer.
- As this limit is reached buffers are enqueued to next node. */
- u32 max_n_data_bytes_per_chain;
-
- /* Next node to enqueue buffers to relative to current process node. */
- u32 next_index;
-
- /* Free list to use to allocate new buffers. */
- u32 free_list_index;
- } tx;
-
- struct
- {
- /* CLIB fifo of buffer indices waiting to be unserialized. */
- u32 *buffer_fifo;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ /* Virtual memory address and size of buffer memory, used for calculating
+ buffer index */
+ uword buffer_mem_start;
+ uword buffer_mem_size;
+ vlib_buffer_pool_t *buffer_pools;
- /* Event type used to signal that RX buffers have been added to fifo. */
- uword ready_one_time_event;
- } rx;
- };
-} vlib_serialize_buffer_main_t;
+ /* Hash table mapping buffer index into number
+ 0 => allocated but free, 1 => allocated and not-free.
+ If buffer index is not in hash table then this buffer
+ has never been allocated. */
+ uword *buffer_known_hash;
+ clib_spinlock_t buffer_known_hash_lockp;
+ u8 default_buffer_pool_index_for_numa[VLIB_BUFFER_MAX_NUMA_NODES];
-void serialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t *vm,
- vlib_serialize_buffer_main_t * sm);
-void unserialize_open_vlib_buffer (serialize_main_t * m,
- struct vlib_main_t *vm,
- vlib_serialize_buffer_main_t * sm);
+ /* config */
+ u32 buffers_per_numa;
+ u16 ext_hdr_size;
+ u32 default_data_size;
-u32 serialize_close_vlib_buffer (serialize_main_t * m);
-void unserialize_close_vlib_buffer (serialize_main_t * m);
-void *vlib_set_buffer_free_callback (struct vlib_main_t *vm, void *fp);
+ /* logging */
+ vlib_log_class_t log_default;
+} vlib_buffer_main_t;
-always_inline u32
-serialize_vlib_buffer_n_bytes (serialize_main_t * m)
-{
- serialize_stream_t *s = &m->stream;
- vlib_serialize_buffer_main_t *sm
- = uword_to_pointer (m->stream.data_function_opaque,
- vlib_serialize_buffer_main_t *);
- return sm->tx.n_total_data_bytes + s->current_buffer_index +
- vec_len (s->overflow_buffer);
-}
+clib_error_t *vlib_buffer_main_init (struct vlib_main_t *vm);
/*
*/
#define VLIB_BUFFER_TRACE_TRAJECTORY 0
#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
-#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) (b)->pre_data[0]=0
+extern void (*vlib_buffer_trace_trajectory_cb) (vlib_buffer_t * b, u32 index);
+extern void (*vlib_buffer_trace_trajectory_init_cb) (vlib_buffer_t * b);
+extern void vlib_buffer_trace_trajectory_init (vlib_buffer_t * b);
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) \
+ vlib_buffer_trace_trajectory_init (b);
#else
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
#endif /* VLIB_BUFFER_TRACE_TRAJECTORY */
+extern u16 __vlib_buffer_external_hdr_size;
+#define VLIB_BUFFER_SET_EXT_HDR_SIZE(x) \
+static void __clib_constructor \
+vnet_buffer_set_ext_hdr_size() \
+{ \
+ if (__vlib_buffer_external_hdr_size) \
+ clib_error ("buffer external header space already set"); \
+ __vlib_buffer_external_hdr_size = CLIB_CACHE_LINE_ROUND (x); \
+}
+
#endif /* included_vlib_buffer_h */
/*