X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Favf%2Foutput.c;h=cd5c97bbb832ff3784218ae330463b6c63b073df;hb=6e9b0030d72c58d14cebcb0290fedcc25556749d;hp=68420a7a1d49a254044c35762396bbc8bb2ea8a3;hpb=7f1f7e7865f07f98b5ab6d4ab483cd9a51e4ce4e;p=vpp.git diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c index 68420a7a1d4..cd5c97bbb83 100644 --- a/src/plugins/avf/output.c +++ b/src/plugins/avf/output.c @@ -18,162 +18,464 @@ #include #include #include +#include + #include +#include +#include +#include +#include + #include #include -#define AVF_TXQ_DESC_CMD(x) (1 << (x + 4)) -#define AVF_TXQ_DESC_CMD_EOP AVF_TXQ_DESC_CMD(0) -#define AVF_TXQ_DESC_CMD_RS AVF_TXQ_DESC_CMD(1) -#define AVF_TXQ_DESC_CMD_RSV AVF_TXQ_DESC_CMD(2) - static_always_inline u8 avf_tx_desc_get_dtyp (avf_tx_desc_t * d) { return d->qword[1] & 0x0f; } -uword -CLIB_MULTIARCH_FN (avf_interface_tx) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +struct avf_ip4_psh +{ + u32 src; + u32 dst; + u8 zero; + u8 proto; + u16 l4len; +}; + +struct avf_ip6_psh +{ + ip6_address_t src; + ip6_address_t dst; + u32 l4len; + u32 proto; +}; + +static_always_inline u64 +avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso) +{ + u64 flags = 0; + if (!is_tso && !(b->flags & VNET_BUFFER_F_OFFLOAD)) + return 0; + + u32 oflags = vnet_buffer2 (b)->oflags; + u32 is_tcp = is_tso || oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; + u32 is_udp = !is_tso && oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM; + u32 is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4; + u32 is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6; + ASSERT (!is_tcp || !is_udp); + ASSERT (is_ip4 || is_ip6); + i16 l2_hdr_offset = b->current_data; + i16 l3_hdr_offset = vnet_buffer (b)->l3_hdr_offset; + i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset; + u16 l2_len = l3_hdr_offset - l2_hdr_offset; + u16 l3_len = l4_hdr_offset - l3_hdr_offset; + ip4_header_t *ip4 = (void *) (b->data + l3_hdr_offset); + ip6_header_t *ip6 = (void *) (b->data + l3_hdr_offset); + tcp_header_t *tcp = (void *) (b->data + l4_hdr_offset); + udp_header_t *udp = (void *) (b->data + l4_hdr_offset); + u16 l4_len = + is_tcp ? tcp_header_bytes (tcp) : is_udp ? sizeof (udp_header_t) : 0; + u16 sum = 0; + + flags |= AVF_TXD_OFFSET_MACLEN (l2_len) | + AVF_TXD_OFFSET_IPLEN (l3_len) | AVF_TXD_OFFSET_L4LEN (l4_len); + flags |= is_ip4 ? AVF_TXD_CMD_IIPT_IPV4 : AVF_TXD_CMD_IIPT_IPV6; + flags |= is_tcp ? AVF_TXD_CMD_L4T_TCP : is_udp ? AVF_TXD_CMD_L4T_UDP : 0; + + if (is_ip4) + ip4->checksum = 0; + + if (is_tso) + { + if (is_ip4) + ip4->length = 0; + else + ip6->payload_length = 0; + } + + if (is_tcp || is_udp) + { + if (is_ip4) + { + struct avf_ip4_psh psh = { 0 }; + psh.src = ip4->src_address.as_u32; + psh.dst = ip4->dst_address.as_u32; + psh.proto = ip4->protocol; + psh.l4len = + is_tso ? 0 : + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + (l4_hdr_offset - l3_hdr_offset)); + sum = ~ip_csum (&psh, sizeof (psh)); + } + else + { + struct avf_ip6_psh psh = { 0 }; + psh.src = ip6->src_address; + psh.dst = ip6->dst_address; + psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol); + psh.l4len = is_tso ? 0 : ip6->payload_length; + sum = ~ip_csum (&psh, sizeof (psh)); + } + } + /* ip_csum does a byte swap for some reason... */ + sum = clib_net_to_host_u16 (sum); + if (is_tcp) + tcp->checksum = sum; + else if (is_udp) + udp->checksum = sum; + return flags; +} + +static_always_inline u32 +avf_tx_fill_ctx_desc (vlib_main_t *vm, avf_txq_t *txq, avf_tx_desc_t *d, + vlib_buffer_t *b) +{ + vlib_buffer_t *ctx_ph; + u32 *bi = txq->ph_bufs; + +next: + ctx_ph = vlib_get_buffer (vm, bi[0]); + if (PREDICT_FALSE (ctx_ph->ref_count == 255)) + { + bi++; + goto next; + } + + /* Acquire a reference on the placeholder buffer */ + ctx_ph->ref_count++; + + u16 l234hdr_sz = vnet_buffer (b)->l4_hdr_offset - b->current_data + + vnet_buffer2 (b)->gso_l4_hdr_sz; + u16 tlen = vlib_buffer_length_in_chain (vm, b) - l234hdr_sz; + d[0].qword[0] = 0; + d[0].qword[1] = AVF_TXD_DTYP_CTX | AVF_TXD_CTX_CMD_TSO + | AVF_TXD_CTX_SEG_MSS (vnet_buffer2 (b)->gso_size) | + AVF_TXD_CTX_SEG_TLEN (tlen); + return bi[0]; +} + +static_always_inline void +avf_tx_copy_desc (avf_tx_desc_t *d, avf_tx_desc_t *s, u32 n_descs) +{ +#if defined CLIB_HAVE_VEC512 + while (n_descs >= 8) + { + u64x8u *dv = (u64x8u *) d; + u64x8u *sv = (u64x8u *) s; + + dv[0] = sv[0]; + dv[1] = sv[1]; + + /* next */ + d += 8; + s += 8; + n_descs -= 8; + } +#elif defined CLIB_HAVE_VEC256 + while (n_descs >= 4) + { + u64x4u *dv = (u64x4u *) d; + u64x4u *sv = (u64x4u *) s; + + dv[0] = sv[0]; + dv[1] = sv[1]; + + /* next */ + d += 4; + s += 4; + n_descs -= 4; + } +#elif defined CLIB_HAVE_VEC128 + while (n_descs >= 2) + { + u64x2u *dv = (u64x2u *) d; + u64x2u *sv = (u64x2u *) s; + + dv[0] = sv[0]; + dv[1] = sv[1]; + + /* next */ + d += 2; + s += 2; + n_descs -= 2; + } +#endif + while (n_descs) + { + d[0].qword[0] = s[0].qword[0]; + d[0].qword[1] = s[0].qword[1]; + d++; + s++; + n_descs--; + } +} + +static_always_inline void +avf_tx_fill_data_desc (vlib_main_t *vm, avf_tx_desc_t *d, vlib_buffer_t *b, + u64 cmd, int use_va_dma) +{ + if (use_va_dma) + d->qword[0] = vlib_buffer_get_current_va (b); + else + d->qword[0] = vlib_buffer_get_current_pa (vm, b); + d->qword[1] = (((u64) b->current_length) << 34 | cmd | AVF_TXD_CMD_RSV); +} +static_always_inline u16 +avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq, + u32 *buffers, u32 n_packets, u16 *n_enq_descs, int use_va_dma) +{ + const u64 cmd_eop = AVF_TXD_CMD_EOP; + u16 n_free_desc, n_desc_left, n_packets_left = n_packets; + vlib_buffer_t *b[4]; + avf_tx_desc_t *d = txq->tmp_descs; + u32 *tb = txq->tmp_bufs; + + n_free_desc = n_desc_left = txq->size - txq->n_enqueued - 8; + + if (n_desc_left == 0) + return 0; + + while (n_packets_left && n_desc_left) + { + u32 flags, or_flags; + + if (n_packets_left < 8 || n_desc_left < 4) + goto one_by_one; + + vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[7], LOAD); + + b[0] = vlib_get_buffer (vm, buffers[0]); + b[1] = vlib_get_buffer (vm, buffers[1]); + b[2] = vlib_get_buffer (vm, buffers[2]); + b[3] = vlib_get_buffer (vm, buffers[3]); + + or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags; + + if (PREDICT_FALSE (or_flags & + (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | + VNET_BUFFER_F_GSO))) + goto one_by_one; + + vlib_buffer_copy_indices (tb, buffers, 4); + + avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma); + + buffers += 4; + n_packets_left -= 4; + n_desc_left -= 4; + d += 4; + tb += 4; + continue; + + one_by_one: + tb[0] = buffers[0]; + b[0] = vlib_get_buffer (vm, buffers[0]); + flags = b[0]->flags; + + /* No chained buffers or TSO case */ + if (PREDICT_TRUE ( + (flags & (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_GSO)) == 0)) + { + u64 cmd = cmd_eop; + + if (PREDICT_FALSE (flags & VNET_BUFFER_F_OFFLOAD)) + cmd |= avf_tx_prepare_cksum (b[0], 0 /* is_tso */); + + avf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma); + } + else + { + u16 n_desc_needed = 1; + u64 cmd = 0; + + if (flags & VLIB_BUFFER_NEXT_PRESENT) + { + vlib_buffer_t *next = vlib_get_buffer (vm, b[0]->next_buffer); + n_desc_needed = 2; + while (next->flags & VLIB_BUFFER_NEXT_PRESENT) + { + next = vlib_get_buffer (vm, next->next_buffer); + n_desc_needed++; + } + } + + if (flags & VNET_BUFFER_F_GSO) + { + n_desc_needed++; + } + else if (PREDICT_FALSE (n_desc_needed > 8)) + { + vlib_buffer_free_one (vm, buffers[0]); + vlib_error_count (vm, node->node_index, + AVF_TX_ERROR_SEGMENT_SIZE_EXCEEDED, 1); + n_packets_left -= 1; + buffers += 1; + continue; + } + + if (PREDICT_FALSE (n_desc_left < n_desc_needed)) + break; + + if (flags & VNET_BUFFER_F_GSO) + { + /* Enqueue a context descriptor */ + tb[1] = tb[0]; + tb[0] = avf_tx_fill_ctx_desc (vm, txq, d, b[0]); + n_desc_left -= 1; + d += 1; + tb += 1; + cmd = avf_tx_prepare_cksum (b[0], 1 /* is_tso */); + } + else if (flags & VNET_BUFFER_F_OFFLOAD) + { + cmd = avf_tx_prepare_cksum (b[0], 0 /* is_tso */); + } + + /* Deal with chain buffer if present */ + while (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT) + { + avf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma); + + n_desc_left -= 1; + d += 1; + tb += 1; + + tb[0] = b[0]->next_buffer; + b[0] = vlib_get_buffer (vm, b[0]->next_buffer); + } + + avf_tx_fill_data_desc (vm, d, b[0], cmd_eop | cmd, use_va_dma); + } + + buffers += 1; + n_packets_left -= 1; + n_desc_left -= 1; + d += 1; + tb += 1; + } + + *n_enq_descs = n_free_desc - n_desc_left; + return n_packets - n_packets_left; +} + +VNET_DEVICE_CLASS_TX_FN (avf_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - avf_main_t *am = &avf_main; vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - avf_device_t *ad = pool_elt_at_index (am->devices, rd->dev_instance); + avf_device_t *ad = avf_get_device (rd->dev_instance); u32 thread_index = vm->thread_index; u8 qid = thread_index; avf_txq_t *txq = vec_elt_at_index (ad->txqs, qid % ad->num_queue_pairs); - avf_tx_desc_t *d0, *d1, *d2, *d3; - u32 *buffers = vlib_frame_args (frame); - u32 bi0, bi1, bi2, bi3; - u16 n_left = frame->n_vectors; - vlib_buffer_t *b0, *b1, *b2, *b3; + u16 next = txq->next; u16 mask = txq->size - 1; - u64 bits = (AVF_TXQ_DESC_CMD_EOP | AVF_TXQ_DESC_CMD_RS | - AVF_TXQ_DESC_CMD_RSV); + u32 *buffers = vlib_frame_vector_args (frame); + u16 n_enq, n_left, n_desc, *slot; + u16 n_retry = 2; clib_spinlock_lock_if_init (&txq->lock); - /* release cosumed bufs */ + n_left = frame->n_vectors; + +retry: + /* release consumed bufs */ if (txq->n_enqueued) { - u16 first, slot, n_free = 0; - first = slot = (txq->next - txq->n_enqueued) & mask; - d0 = txq->descs + slot; - while (n_free < txq->n_enqueued && avf_tx_desc_get_dtyp (d0) == 0x0F) + i32 complete_slot = -1; + while (1) { - n_free++; - slot = (slot + 1) & mask; - d0 = txq->descs + slot; + u16 *slot = clib_ring_get_first (txq->rs_slots); + + if (slot == 0) + break; + + if (avf_tx_desc_get_dtyp (txq->descs + slot[0]) != 0x0F) + break; + + complete_slot = slot[0]; + + clib_ring_deq (txq->rs_slots); } - if (n_free) + if (complete_slot >= 0) { + u16 first, mask, n_free; + mask = txq->size - 1; + first = (txq->next - txq->n_enqueued) & mask; + n_free = (complete_slot + 1 - first) & mask; + txq->n_enqueued -= n_free; - vlib_buffer_free_from_ring (vm, txq->bufs, first, txq->size, - n_free); + vlib_buffer_free_from_ring_no_next (vm, txq->bufs, first, txq->size, + n_free); } } - while (n_left >= 8) - { - u16 slot0, slot1, slot2, slot3; + n_desc = 0; + if (ad->flags & AVF_DEVICE_F_VA_DMA) + n_enq = avf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 1); + else + n_enq = avf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 0); - vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD); - vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD); - vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD); - vlib_prefetch_buffer_with_index (vm, buffers[7], LOAD); - - slot0 = txq->next; - slot1 = (txq->next + 1) & mask; - slot2 = (txq->next + 2) & mask; - slot3 = (txq->next + 3) & mask; - - d0 = txq->descs + slot0; - d1 = txq->descs + slot1; - d2 = txq->descs + slot2; - d3 = txq->descs + slot3; - - bi0 = buffers[0]; - bi1 = buffers[1]; - bi2 = buffers[2]; - bi3 = buffers[3]; - - txq->bufs[slot0] = bi0; - txq->bufs[slot1] = bi1; - txq->bufs[slot2] = bi2; - txq->bufs[slot3] = bi3; - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - -#if 0 - d->qword[0] = vlib_get_buffer_data_physical_address (vm, bi0) + - b0->current_data; -#else - d0->qword[0] = pointer_to_uword (b0->data) + b0->current_data; - d1->qword[0] = pointer_to_uword (b1->data) + b1->current_data; - d2->qword[0] = pointer_to_uword (b2->data) + b2->current_data; - d3->qword[0] = pointer_to_uword (b3->data) + b3->current_data; + if (n_desc) + { + if (PREDICT_TRUE (next + n_desc <= txq->size)) + { + /* no wrap */ + avf_tx_copy_desc (txq->descs + next, txq->tmp_descs, n_desc); + vlib_buffer_copy_indices (txq->bufs + next, txq->tmp_bufs, n_desc); + } + else + { + /* wrap */ + u32 n_not_wrap = txq->size - next; + avf_tx_copy_desc (txq->descs + next, txq->tmp_descs, n_not_wrap); + avf_tx_copy_desc (txq->descs, txq->tmp_descs + n_not_wrap, + n_desc - n_not_wrap); + vlib_buffer_copy_indices (txq->bufs + next, txq->tmp_bufs, + n_not_wrap); + vlib_buffer_copy_indices (txq->bufs, txq->tmp_bufs + n_not_wrap, + n_desc - n_not_wrap); + } -#endif - d0->qword[1] = ((u64) b0->current_length) << 34 | bits; - d1->qword[1] = ((u64) b1->current_length) << 34 | bits; - d2->qword[1] = ((u64) b2->current_length) << 34 | bits; - d3->qword[1] = ((u64) b3->current_length) << 34 | bits; + next += n_desc; + if ((slot = clib_ring_enq (txq->rs_slots))) + { + u16 rs_slot = slot[0] = (next - 1) & mask; + txq->descs[rs_slot].qword[1] |= AVF_TXD_CMD_RS; + } - txq->next = (txq->next + 4) & mask; - txq->n_enqueued += 4; - buffers += 4; - n_left -= 4; + txq->next = next & mask; + avf_tail_write (txq->qtx_tail, txq->next); + txq->n_enqueued += n_desc; + n_left -= n_enq; } - while (n_left) + if (n_left) { - d0 = txq->descs + txq->next; - bi0 = buffers[0]; - txq->bufs[txq->next] = bi0; - b0 = vlib_get_buffer (vm, bi0); - -#if 0 - d->qword[0] = vlib_get_buffer_data_physical_address (vm, bi0) + - b0->current_data; -#else - d0->qword[0] = pointer_to_uword (b0->data) + b0->current_data; -#endif - d0->qword[1] = (((u64) b0->current_length) << 34) | bits; + buffers += n_enq; + + if (n_retry--) + goto retry; - txq->next = (txq->next + 1) & mask; - txq->n_enqueued++; - buffers++; - n_left--; + vlib_buffer_free (vm, buffers, n_left); + vlib_error_count (vm, node->node_index, + AVF_TX_ERROR_NO_FREE_SLOTS, n_left); } - CLIB_MEMORY_BARRIER (); - *(txq->qtx_tail) = txq->next; clib_spinlock_unlock_if_init (&txq->lock); return frame->n_vectors - n_left; } -#ifndef CLIB_MARCH_VARIANT -#if __x86_64__ -vlib_node_function_t __clib_weak avf_interface_tx_avx512; -vlib_node_function_t __clib_weak avf_interface_tx_avx2; -static void __clib_constructor -avf_interface_tx_multiarch_select (void) -{ - if (avf_interface_tx_avx512 && clib_cpu_supports_avx512f ()) - avf_device_class.tx_function = avf_interface_tx_avx512; - else if (avf_interface_tx_avx2 && clib_cpu_supports_avx2 ()) - avf_device_class.tx_function = avf_interface_tx_avx2; -} -#endif -#endif - /* * fd.io coding-style-patch-verification: ON *