#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
+#include <vnet/vnet.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/gso/gso.h>
+#include <vnet/gso/gro_func.h>
+#include <vnet/gso/hdr_offset_parser.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/tcp/tcp_packet.h>
_(NO_FREE_SLOTS, "no free tx slots") \
_(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \
_(PENDING_MSGS, "pending msgs in tx ring") \
-_(NO_TX_QUEUES, "no tx queues") \
-_(OUT_OF_ORDER, "out-of-order buffers in used ring")
+_(INDIRECT_DESC_ALLOC_FAILED, "indirect descriptor allocation failed - packet drop") \
+_(OUT_OF_ORDER, "out-of-order buffers in used ring") \
+_(GSO_PACKET_DROP, "gso disabled on itf -- gso packet drop") \
+_(CSUM_OFFLOAD_PACKET_DROP, "checksum offload disabled on itf -- csum offload packet drop")
typedef enum
{
return s;
}
+typedef struct
+{
+ u32 buffer_index;
+ u32 sw_if_index;
+ vlib_buffer_t buffer;
+ generic_header_offset_t gho;
+} virtio_tx_trace_t;
+
static u8 *
-format_virtio_tx_trace (u8 * s, va_list * args)
+format_virtio_tx_trace (u8 * s, va_list * va)
{
- s = format (s, "Unimplemented...");
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ virtio_tx_trace_t *t = va_arg (*va, virtio_tx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "%U ", format_generic_header_offset, &t->gho);
+ s = format (s, "%Ubuffer 0x%x: %U",
+ format_white_space, indent,
+ t->buffer_index, format_vnet_buffer, &t->buffer);
+
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ethernet_header_with_length, t->buffer.pre_data,
+ sizeof (t->buffer.pre_data));
return s;
}
+static_always_inline void
+virtio_interface_drop_inline (vlib_main_t * vm, uword node_index,
+ u32 * buffers, u16 n,
+ virtio_tx_func_error_t error)
+{
+ vlib_error_count (vm, node_index, error, n);
+ vlib_buffer_free (vm, buffers, n);
+}
+
static_always_inline void
virtio_memset_ring_u32 (u32 * ring, u32 start, u32 ring_size, u32 n_buffers)
{
n_left--;
last++;
n_buffers++;
+ struct vring_desc *d = &vring->desc[e->id];
+ u16 next;
+ while (d->flags & VRING_DESC_F_NEXT)
+ {
+ n_buffers++;
+ next = d->next;
+ d = &vring->desc[next];
+ }
if (n_left == 0)
break;
e = &vring->used->ring[last & mask];
}
static_always_inline void
-set_checksum_offsets (vlib_main_t * vm, virtio_if_t * vif, vlib_buffer_t * b,
- struct virtio_net_hdr_v1 *hdr)
+set_checksum_offsets (vlib_buffer_t * b, struct virtio_net_hdr_v1 *hdr,
+ int is_l2)
{
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
- gso_header_offset_t gho = vnet_gso_header_offset_parser (b, 0);
+ generic_header_offset_t gho = { 0 };
+ vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
+ 0 /* ip6 */ );
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
hdr->csum_start = gho.l4_hdr_offset; // 0x22;
if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
- hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ {
+ hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ }
else if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
- hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ {
+ hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ }
/*
* virtio devices do not support IP4 checksum offload. So driver takes care
}
else if (b->flags & VNET_BUFFER_F_IS_IP6)
{
- gso_header_offset_t gho = vnet_gso_header_offset_parser (b, 1);
+ generic_header_offset_t gho = { 0 };
+ vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
+ 1 /* ip6 */ );
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
hdr->csum_start = gho.l4_hdr_offset; // 0x36;
if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
- hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ {
+ hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ }
else if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
- hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ {
+ hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ }
+ }
+}
+
+static_always_inline void
+set_gso_offsets (vlib_buffer_t * b, struct virtio_net_hdr_v1 *hdr, int is_l2)
+{
+ if (b->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ ip4_header_t *ip4;
+ generic_header_offset_t gho = { 0 };
+ vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
+ 0 /* ip6 */ );
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->hdr_len = gho.hdr_sz;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = gho.l4_hdr_offset; // 0x22;
+ hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ ip4 =
+ (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ /*
+ * virtio devices do not support IP4 checksum offload. So driver takes care
+ * of it while doing tx.
+ */
+ if (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+ }
+ else if (b->flags & VNET_BUFFER_F_IS_IP6)
+ {
+ generic_header_offset_t gho = { 0 };
+ vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
+ 1 /* ip6 */ );
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->hdr_len = gho.hdr_sz;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = gho.l4_hdr_offset; // 0x36;
+ hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
}
static_always_inline u16
add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
- virtio_vring_t * vring, u32 bi, u16 avail, u16 next,
- u16 mask, int do_gso, int csum_offload)
+ virtio_if_type_t type, virtio_vring_t * vring,
+ u32 bi, u16 free_desc_count,
+ u16 avail, u16 next, u16 mask, int do_gso,
+ int csum_offload, uword node_index)
{
u16 n_added = 0;
int hdr_sz = vif->virtio_net_hdr_sz;
d = &vring->desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
struct virtio_net_hdr_v1 *hdr = vlib_buffer_get_current (b) - hdr_sz;
+ int is_l2 = (type & (VIRTIO_IF_TYPE_TAP | VIRTIO_IF_TYPE_PCI));
clib_memset (hdr, 0, hdr_sz);
- if (do_gso && (b->flags & VNET_BUFFER_F_GSO))
+ if (b->flags & VNET_BUFFER_F_GSO)
{
- if (b->flags & VNET_BUFFER_F_IS_IP4)
+ if (do_gso)
+ set_gso_offsets (b, hdr, is_l2);
+ else
{
- ip4_header_t *ip4;
- gso_header_offset_t gho = vnet_gso_header_offset_parser (b, 0);
- hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = gho.l4_hdr_offset + gho.l4_hdr_sz;
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x22;
- hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) +
- gho.l3_hdr_offset);
- /*
- * virtio devices do not support IP4 checksum offload. So driver takes care
- * of it while doing tx.
- */
- if (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)
- ip4->checksum = ip4_header_checksum (ip4);
- }
- else if (b->flags & VNET_BUFFER_F_IS_IP6)
- {
- gso_header_offset_t gho = vnet_gso_header_offset_parser (b, 1);
- hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = gho.l4_hdr_offset + gho.l4_hdr_sz;
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x36;
- hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ virtio_interface_drop_inline (vm, node_index, &bi, 1,
+ VIRTIO_TX_ERROR_GSO_PACKET_DROP);
+ return n_added;
}
}
- else if (csum_offload
- && (b->flags & (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
- VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)))
+ else if (b->flags & (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
+ VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
{
- set_checksum_offsets (vm, vif, b, hdr);
+ if (csum_offload)
+ set_checksum_offsets (b, hdr, is_l2);
+ else
+ {
+ virtio_interface_drop_inline (vm, node_index, &bi, 1,
+ VIRTIO_TX_ERROR_CSUM_OFFLOAD_PACKET_DROP);
+ return n_added;
+ }
}
if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
{
d->addr =
- ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm,
- b) :
+ ((type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm,
+ b) :
pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz;
d->len = b->current_length + hdr_sz;
d->flags = 0;
}
- else
+ else if (vif->features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC))
{
/*
* We are using single vlib_buffer_t for indirect descriptor(s)
*/
u32 indirect_buffer = 0;
if (PREDICT_FALSE (vlib_buffer_alloc (vm, &indirect_buffer, 1) == 0))
- return n_added;
+ {
+ virtio_interface_drop_inline (vm, node_index, &bi, 1,
+ VIRTIO_TX_ERROR_INDIRECT_DESC_ALLOC_FAILED);
+ return n_added;
+ }
vlib_buffer_t *indirect_desc = vlib_get_buffer (vm, indirect_buffer);
indirect_desc->current_data = 0;
struct vring_desc *id =
(struct vring_desc *) vlib_buffer_get_current (indirect_desc);
u32 count = 1;
- if (vif->type == VIRTIO_IF_TYPE_PCI)
+ if (type == VIRTIO_IF_TYPE_PCI)
{
d->addr = vlib_physmem_get_pa (vm, id);
id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz;
id->len = b->current_length;
}
}
- else /* VIRTIO_IF_TYPE_TAP */
+ else /* VIRTIO_IF_TYPE_[TAP | TUN] */
{
d->addr = pointer_to_uword (id);
/* first buffer in chain */
d->len = count * sizeof (struct vring_desc);
d->flags = VRING_DESC_F_INDIRECT;
}
+ else if (type == VIRTIO_IF_TYPE_PCI)
+ {
+ u16 count = next;
+ vlib_buffer_t *b_temp = b;
+ u16 n_buffers_in_chain = 1;
+
+ /*
+ * Check the length of the chain for the required number of
+ * descriptors. Return from here, retry to get more descriptors,
+ * if chain length is greater than available descriptors.
+ */
+ while (b_temp->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ n_buffers_in_chain++;
+ b_temp = vlib_get_buffer (vm, b_temp->next_buffer);
+ }
+
+ if (n_buffers_in_chain > free_desc_count)
+ return n_buffers_in_chain;
+
+ d->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz;
+ d->len = b->current_length + hdr_sz;
+
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ d->flags = VRING_DESC_F_NEXT;
+ vring->buffers[count] = bi;
+ b->flags &=
+ ~(VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID);
+ bi = b->next_buffer;
+ b->next_buffer = 0;
+ n_added++;
+ count = (count + 1) & mask;
+ d->next = count;
+ d = &vring->desc[count];
+ b = vlib_get_buffer (vm, bi);
+ d->addr = vlib_buffer_get_current_pa (vm, b);
+ d->len = b->current_length;
+ }
+ d->flags = 0;
+ vring->buffers[count] = bi;
+ vring->avail->ring[avail & mask] = next;
+ n_added++;
+ return n_added;
+ }
+ else
+ {
+ ASSERT (0);
+ }
vring->buffers[next] = bi;
vring->avail->ring[avail & mask] = next;
n_added++;
}
static_always_inline uword
-virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif,
- int do_gso, int csum_offload)
+virtio_interface_tx_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, virtio_if_t * vif,
+ virtio_if_type_t type, int do_gso,
+ int csum_offload, int do_gro)
{
u16 n_left = frame->n_vectors;
virtio_vring_t *vring;
u16 mask = sz - 1;
u16 retry_count = 2;
u32 *buffers = vlib_frame_vector_args (frame);
+ u32 to[GRO_TO_VECTOR_SIZE (n_left)];
clib_spinlock_lock_if_init (&vring->lockp);
(vring->last_kick_avail_idx != vring->avail->idx))
virtio_kick (vm, vring, vif);
+ if (do_gro)
+ {
+ n_left = vnet_gro_inline (vm, vring->flow_table, buffers, n_left, to);
+ buffers = to;
+ }
+
retry:
/* free consumed buffers */
virtio_free_used_device_desc (vm, vring, node->node_index);
while (n_left && free_desc_count)
{
u16 n_added = 0;
+ virtio_tx_trace_t *t;
+
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]);
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ t->buffer_index = buffers[0];
+ if (type == VIRTIO_IF_TYPE_TUN)
+ {
+ int is_ip4 = 0, is_ip6 = 0;
+
+ switch (((u8 *) vlib_buffer_get_current (b0))[0] & 0xf0)
+ {
+ case 0x40:
+ is_ip4 = 1;
+ break;
+ case 0x60:
+ is_ip6 = 1;
+ break;
+ default:
+ break;
+ }
+ vnet_generic_header_offset_parser (b0, &t->gho, 0, is_ip4,
+ is_ip6);
+ }
+ else
+ vnet_generic_header_offset_parser (b0, &t->gho, 1,
+ b0->flags &
+ VNET_BUFFER_F_IS_IP4,
+ b0->flags &
+ VNET_BUFFER_F_IS_IP6);
+
+ clib_memcpy_fast (&t->buffer, b0,
+ sizeof (*b0) - sizeof (b0->pre_data));
+ clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0),
+ sizeof (t->buffer.pre_data));
+ }
n_added =
- add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask,
- do_gso, csum_offload);
- if (!n_added)
+ add_buffer_to_slot (vm, vif, type, vring, buffers[0], free_desc_count,
+ avail, next, mask, do_gso, csum_offload,
+ node->node_index);
+
+ if (PREDICT_FALSE (n_added == 0))
+ {
+ buffers++;
+ n_left--;
+ continue;
+ }
+ else if (PREDICT_FALSE (n_added > free_desc_count))
break;
- avail += n_added;
+
+ avail++;
next = (next + n_added) & mask;
used += n_added;
buffers++;
n_left--;
- free_desc_count--;
+ free_desc_count -= n_added;
}
if (n_left != frame->n_vectors)
if (retry_count--)
goto retry;
- vlib_error_count (vm, node->node_index, VIRTIO_TX_ERROR_NO_FREE_SLOTS,
- n_left);
- vlib_buffer_free (vm, buffers, n_left);
+ virtio_interface_drop_inline (vm, node->node_index,
+ buffers, n_left,
+ VIRTIO_TX_ERROR_NO_FREE_SLOTS);
}
clib_spinlock_unlock_if_init (&vring->lockp);
return frame->n_vectors - n_left;
}
+static_always_inline uword
+virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, virtio_if_t * vif,
+ virtio_if_type_t type)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
+
+ if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
+ return virtio_interface_tx_gso_inline (vm, node, frame, vif, type,
+ 1 /* do_gso */ ,
+ 1 /* checksum offload */ ,
+ vif->packet_coalesce);
+ else if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD)
+ return virtio_interface_tx_gso_inline (vm, node, frame, vif, type,
+ 0 /* no do_gso */ ,
+ 1 /* checksum offload */ ,
+ 0 /* do_gro */ );
+ else
+ return virtio_interface_tx_gso_inline (vm, node, frame, vif, type,
+ 0 /* no do_gso */ ,
+ 0 /* no checksum offload */ ,
+ 0 /* do_gro */ );
+}
+
VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- vnet_main_t *vnm = vnet_get_main ();
virtio_main_t *nm = &virtio_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
- return virtio_interface_tx_inline (vm, node, frame, vif, 1 /* do_gso */ ,
- 1);
- else if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD)
+ if (vif->type == VIRTIO_IF_TYPE_TAP)
return virtio_interface_tx_inline (vm, node, frame, vif,
- 0 /* no do_gso */ , 1);
- else
+ VIRTIO_IF_TYPE_TAP);
+ else if (vif->type == VIRTIO_IF_TYPE_PCI)
return virtio_interface_tx_inline (vm, node, frame, vif,
- 0 /* no do_gso */ , 0);
+ VIRTIO_IF_TYPE_PCI);
+ else if (vif->type == VIRTIO_IF_TYPE_TUN)
+ return virtio_interface_tx_inline (vm, node, frame, vif,
+ VIRTIO_IF_TYPE_TUN);
+ else
+ ASSERT (0);
+
+ return 0;
}
static void
}
if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
- vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT;
+ {
+ /* only enable packet coalesce in poll mode */
+ gro_flow_table_set_is_enable (vring->flow_table, 1);
+ vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT;
+ }
else
- vring->avail->flags &= ~VIRTIO_RING_FLAG_MASK_INT;
+ {
+ gro_flow_table_set_is_enable (vring->flow_table, 0);
+ vring->avail->flags &= ~VIRTIO_RING_FLAG_MASK_INT;
+ }
return 0;
}
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
+ {
+ vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ }
else
- vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
-
+ {
+ vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
+ }
return 0;
}
.subif_add_del_function = virtio_subif_add_del_function,
.rx_mode_change_function = virtio_interface_rx_mode_change,
};
+
/* *INDENT-ON* */
/*