#include <vlib/vlib.h>
-/* VLIB buffer flags for ip4/ip6 packets. Set by input interfaces for ip4/ip6
- tcp/udp packets with hardware computed checksums. */
-#define LOG2_IP_BUFFER_L4_CHECKSUM_COMPUTED LOG2_VLIB_BUFFER_FLAG_USER(1)
-#define LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT LOG2_VLIB_BUFFER_FLAG_USER(2)
-#define IP_BUFFER_L4_CHECKSUM_COMPUTED (1 << LOG2_IP_BUFFER_L4_CHECKSUM_COMPUTED)
-#define IP_BUFFER_L4_CHECKSUM_CORRECT (1 << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT)
-
-/* VLAN header flags.
- * These bits are zeroed in vlib_buffer_init_for_free_list()
- * meaning wherever the buffer comes from they have a reasonable
- * value (eg, if ip4/ip6 generates the packet.)
+/**
+ * Flags that are set in the high order bits of ((vlib_buffer*)b)->flags
+ *
+ */
+#define foreach_vnet_buffer_flag \
+ _( 1, L4_CHECKSUM_COMPUTED, "l4-cksum-computed", 1) \
+ _( 2, L4_CHECKSUM_CORRECT, "l4-cksum-correct", 1) \
+ _( 3, VLAN_2_DEEP, "vlan-2-deep", 1) \
+ _( 4, VLAN_1_DEEP, "vlan-1-deep", 1) \
+ _( 5, SPAN_CLONE, "span-clone", 1) \
+ _( 6, LOOP_COUNTER_VALID, "loop-counter-valid", 0) \
+ _( 7, LOCALLY_ORIGINATED, "local", 1) \
+ _( 8, IS_IP4, "ip4", 1) \
+ _( 9, IS_IP6, "ip6", 1) \
+ _(10, OFFLOAD_IP_CKSUM, "offload-ip-cksum", 1) \
+ _(11, OFFLOAD_TCP_CKSUM, "offload-tcp-cksum", 1) \
+ _(12, OFFLOAD_UDP_CKSUM, "offload-udp-cksum", 1) \
+ _(13, IS_NATED, "natted", 1) \
+ _(14, L2_HDR_OFFSET_VALID, "l2_hdr_offset_valid", 0) \
+ _(15, L3_HDR_OFFSET_VALID, "l3_hdr_offset_valid", 0) \
+ _(16, L4_HDR_OFFSET_VALID, "l4_hdr_offset_valid", 0) \
+ _(17, FLOW_REPORT, "flow-report", 1) \
+ _(18, IS_DVR, "dvr", 1) \
+ _(19, QOS_DATA_VALID, "qos-data-valid", 0) \
+ _(20, GSO, "gso", 0) \
+ _(21, AVAIL1, "avail1", 1) \
+ _(22, AVAIL2, "avail2", 1) \
+ _(23, AVAIL3, "avail3", 1) \
+ _(24, AVAIL4, "avail4", 1) \
+ _(25, AVAIL5, "avail5", 1) \
+ _(26, AVAIL6, "avail6", 1) \
+ _(27, AVAIL7, "avail7", 1)
+
+/*
+ * Please allocate the FIRST available bit, redefine
+ * AVAIL 1 ... AVAILn-1, and remove AVAILn. Please maintain the
+ * VNET_BUFFER_FLAGS_ALL_AVAIL definition.
*/
-#define LOG2_ETH_BUFFER_VLAN_2_DEEP LOG2_VLIB_BUFFER_FLAG_USER(3)
-#define LOG2_ETH_BUFFER_VLAN_1_DEEP LOG2_VLIB_BUFFER_FLAG_USER(4)
-#define ETH_BUFFER_VLAN_2_DEEP (1 << LOG2_ETH_BUFFER_VLAN_2_DEEP)
-#define ETH_BUFFER_VLAN_1_DEEP (1 << LOG2_ETH_BUFFER_VLAN_1_DEEP)
-#define ETH_BUFFER_VLAN_BITS (ETH_BUFFER_VLAN_1_DEEP | \
- ETH_BUFFER_VLAN_2_DEEP)
-#define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6)
-#define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID)
+#define VNET_BUFFER_FLAGS_ALL_AVAIL \
+ (VNET_BUFFER_F_AVAIL1 | VNET_BUFFER_F_AVAIL2 | VNET_BUFFER_F_AVAIL3 | \
+ VNET_BUFFER_F_AVAIL4 | VNET_BUFFER_F_AVAIL5 | VNET_BUFFER_F_AVAIL6 | \
+ VNET_BUFFER_F_AVAIL7)
+
+#define VNET_BUFFER_FLAGS_VLAN_BITS \
+ (VNET_BUFFER_F_VLAN_1_DEEP | VNET_BUFFER_F_VLAN_2_DEEP)
+
+enum
+{
+#define _(bit, name, s, v) VNET_BUFFER_F_##name = (1 << LOG2_VLIB_BUFFER_FLAG_USER(bit)),
+ foreach_vnet_buffer_flag
+#undef _
+};
-#define LOG2_VNET_BUFFER_LOCALLY_ORIGINATED LOG2_VLIB_BUFFER_FLAG_USER(7)
-#define VNET_BUFFER_LOCALLY_ORIGINATED (1 << LOG2_VNET_BUFFER_LOCALLY_ORIGINATED)
+enum
+{
+#define _(bit, name, s, v) VNET_BUFFER_F_LOG2_##name = LOG2_VLIB_BUFFER_FLAG_USER(bit),
+ foreach_vnet_buffer_flag
+#undef _
+};
-#define LOG2_VNET_BUFFER_SPAN_CLONE LOG2_VLIB_BUFFER_FLAG_USER(8)
-#define VNET_BUFFER_SPAN_CLONE (1 << LOG2_VNET_BUFFER_SPAN_CLONE)
+/* Make sure that the vnet and vlib bits are disjoint */
+STATIC_ASSERT (((VNET_BUFFER_FLAGS_ALL_AVAIL & VLIB_BUFFER_FLAGS_ALL) == 0),
+ "VLIB / VNET buffer flags overlap");
#define foreach_buffer_opaque_union_subtype \
-_(ethernet) \
_(ip) \
-_(swt) \
_(l2) \
_(l2t) \
-_(gre) \
_(l2_classify) \
-_(handoff) \
_(policer) \
_(ipsec) \
_(map) \
_(map_t) \
_(ip_frag) \
+_(mpls) \
_(tcp)
/*
typedef struct
{
u32 sw_if_index[VLIB_N_RX_TX];
+ i16 l2_hdr_offset;
+ i16 l3_hdr_offset;
+ i16 l4_hdr_offset;
+ u8 feature_arc_index;
+ u8 dont_waste_me;
union
{
- /* Ethernet. */
- struct
- {
- /* Saved value of current header by ethernet-input. */
- i32 start_of_ethernet_header;
- } ethernet;
-
/* IP4/6 buffer opaque. */
struct
{
protocol and ports. */
u32 flow_hash;
- /* next protocol */
- u32 save_protocol;
+ union
+ {
+ /* next protocol */
+ u32 save_protocol;
+
+ /* Hint for transport protocols */
+ u32 fib_index;
+ };
/* Rewrite length */
- u32 save_rewrite_length;
+ u8 save_rewrite_length;
/* MFIB RPF ID */
u32 rpf_id;
u32 data;
} icmp;
- /* IP header offset from vlib_buffer.data - saved by ip*_local nodes */
- i32 start_of_ip_header;
+ /* reassembly */
+ union
+ {
+ /* group input/output to simplify the code, this way
+ * we can handoff while keeping input variables intact */
+ struct
+ {
+ /* input variables */
+ struct
+ {
+ u32 next_index; /* index of next node - used by custom apps */
+ u32 error_next_index; /* index of next node if error - used by custom apps */
+ };
+ /* handoff variables */
+ struct
+ {
+ u16 owner_thread_index;
+ };
+ };
+ /* output variables */
+ struct
+ {
+ union
+ {
+ /* shallow virtual reassembly output variables */
+ struct
+ {
+ u16 l4_src_port; /* tcp/udp/icmp src port */
+ u16 l4_dst_port; /* tcp/udp/icmp dst port */
+ u32 tcp_ack_number;
+ u8 save_rewrite_length;
+ u8 ip_proto; /* protocol in ip header */
+ u8 icmp_type_or_tcp_flags;
+ u8 is_non_first_fragment;
+ u32 tcp_seq_number;
+ };
+ /* full reassembly output variables */
+ struct
+ {
+ u16 estimated_mtu; /* estimated MTU calculated during reassembly */
+ };
+ };
+ };
+ /* internal variables used during reassembly */
+ struct
+ {
+ u16 fragment_first;
+ u16 fragment_last;
+ u16 range_first;
+ u16 range_last;
+ u32 next_range_bi;
+ u16 ip6_frag_hdr_offset;
+ };
+ } reass;
};
-
} ip;
/*
*/
struct
{
+ /* do not overlay w/ ip.adj_index[0,1] nor flow hash */
+ u32 pad[VLIB_N_RX_TX + 1];
u8 ttl;
u8 exp;
u8 first;
+ u8 pyld_proto:3; /* dpo_proto_t */
+ u8 rsvd:5;
+ /* Rewrite length */
+ u8 save_rewrite_length;
+ /* Save the mpls header length including all label stack */
+ u8 mpls_hdr_length;
+ /*
+ * BIER - the number of bytes in the header.
+ * the len field in the header is not authoritative. It's the
+ * value in the table that counts.
+ */
+ struct
+ {
+ u8 n_bytes;
+ } bier;
} mpls;
- /* ip4-in-ip6 softwire termination, only valid there */
- struct
- {
- u8 swt_disable;
- u32 mapping_index;
- } swt;
-
/* l2 bridging path, only valid there */
struct opaque_l2
{
u32 feature_bitmap;
u16 bd_index; /* bridge-domain index */
+ u16 l2fib_sn; /* l2fib bd/int seq_num */
u8 l2_len; /* ethernet header length */
u8 shg; /* split-horizon group */
- u16 l2fib_sn; /* l2fib bd/int seq_num */
+ u8 bd_age; /* aging enabled */
} l2;
/* l2tpv3 softwire encap, only valid there */
u32 session_index;
} l2t;
- struct
- {
- u32 src, dst;
- } gre;
-
/* L2 classify */
struct
{
u64 hash;
} l2_classify;
- /* IO - worker thread handoff */
- struct
- {
- u32 next_index;
- } handoff;
-
/* vnet policer */
struct
{
/* interface output features */
struct
{
- u32 flags;
u32 sad_index;
+ u32 protect_index;
} ipsec;
/* MAP */
/* IP Fragmentation */
struct
{
- u16 header_offset;
+ u32 pad[2]; /* do not overlay w/ ip.adj_index[0,1] */
u16 mtu;
u8 next_index;
u8 flags; //See ip_frag.h
u16 overlay_afi;
} lisp;
- /* Driver rx feature */
- struct
- {
- u32 saved_next_index; /**< saved by drivers for short-cut */
- u16 buffer_advance;
- } device_input_feat;
-
/* TCP */
struct
{
u32 connection_index;
- u32 seq_number;
+ union
+ {
+ u32 seq_number;
+ u32 next_node_opaque;
+ };
u32 seq_end;
u32 ack_number;
u16 hdr_offset; /**< offset relative to ip hdr */
u8 flags;
} tcp;
+ /* SNAT */
+ struct
+ {
+ u32 flags;
+ } snat;
+
u32 unused[6];
};
} vnet_buffer_opaque_t;
+#define VNET_REWRITE_TOTAL_BYTES (VLIB_BUFFER_PRE_DATA_SIZE)
+
+STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length)
+ == STRUCT_SIZE_OF (vnet_buffer_opaque_t,
+ ip.reass.save_rewrite_length)
+ && STRUCT_SIZE_OF (vnet_buffer_opaque_t,
+ ip.reass.save_rewrite_length) ==
+ STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length)
+ && STRUCT_SIZE_OF (vnet_buffer_opaque_t,
+ mpls.save_rewrite_length) == 1
+ && VNET_REWRITE_TOTAL_BYTES < UINT8_MAX,
+ "save_rewrite_length member must be able to hold the max value of rewrite length");
+
+STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length)
+ == STRUCT_OFFSET_OF (vnet_buffer_opaque_t,
+ ip.reass.save_rewrite_length)
+ && STRUCT_OFFSET_OF (vnet_buffer_opaque_t,
+ mpls.save_rewrite_length) ==
+ STRUCT_OFFSET_OF (vnet_buffer_opaque_t,
+ ip.reass.save_rewrite_length),
+ "save_rewrite_length must be aligned so that reass doesn't overwrite it");
+
/*
- * The opaque field of the vlib_buffer_t is intepreted as a
+ * The opaque field of the vlib_buffer_t is interpreted as a
* vnet_buffer_opaque_t. Hence it should be big enough to accommodate one.
*/
STATIC_ASSERT (sizeof (vnet_buffer_opaque_t) <=
/* Full cache line (64 bytes) of additional space */
typedef struct
{
+ /**
+ * QoS marking data that needs to persist from the recording nodes
+ * (nominally in the ingress path) to the marking node (in the
+ * egress path)
+ */
+ struct
+ {
+ u8 bits;
+ u8 source;
+ } qos;
+
+ u8 loop_counter;
+ u8 __unused[1];
+
+ /* Group Based Policy */
+ struct
+ {
+ u8 __unused;
+ u8 flags;
+ u16 sclass;
+ } gbp;
+
+ /**
+ * The L4 payload size set on input on GSO enabled interfaces
+ * when we receive a GSO packet (a chain of buffers with the first one
+ * having GSO bit set), and needs to persist all the way to the interface-output,
+ * in case the egress interface is not GSO-enabled - then we need to perform
+ * the segmentation, and use this value to cut the payload appropriately.
+ */
+ u16 gso_size;
+ /* size of L4 prototol header */
+ u16 gso_l4_hdr_sz;
+
+ /* The union below has a u64 alignment, so this space is unused */
+ u32 __unused2[1];
+
union
{
+ struct
+ {
+#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
+ /* buffer trajectory tracing */
+ u16 *trajectory_trace;
+#endif
+ };
+ struct
+ {
+ u64 pad[1];
+ u64 pg_replay_timestamp;
+ };
+ u32 unused[8];
};
} vnet_buffer_opaque2_t;
+#define vnet_buffer2(b) ((vnet_buffer_opaque2_t *) (b)->opaque2)
+
+/*
+ * The opaque2 field of the vlib_buffer_t is interpreted as a
+ * vnet_buffer_opaque2_t. Hence it should be big enough to accommodate one.
+ */
+STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) <=
+ STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
+ "VNET buffer opaque2 meta-data too large for vlib_buffer");
+
+#define gso_mtu_sz(b) (vnet_buffer2(b)->gso_size + \
+ vnet_buffer2(b)->gso_l4_hdr_sz + \
+ vnet_buffer(b)->l4_hdr_offset - \
+ vnet_buffer (b)->l3_hdr_offset)
+
+format_function_t format_vnet_buffer;
#endif /* included_vnet_buffer_h */