X-Git-Url: https://gerrit.fd.io/r/gitweb?p=deb_dpdk.git;a=blobdiff_plain;f=drivers%2Fnet%2Fsfc%2Fsfc_ef10_tx.c;h=bcd3153ff3e15335789b75cf775107c5d9c6096e;hp=d0daa3b3f6e6bac7c9fb89b53cb5c2fb34c1fb73;hb=8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82;hpb=b63264c8342e6a1b6971c79550d2af2024b6a4de diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c index d0daa3b3..bcd3153f 100644 --- a/drivers/net/sfc/sfc_ef10_tx.c +++ b/drivers/net/sfc/sfc_ef10_tx.c @@ -11,6 +11,8 @@ #include #include +#include +#include #include "efx.h" #include "efx_types.h" @@ -21,6 +23,7 @@ #include "sfc_tweak.h" #include "sfc_kvargs.h" #include "sfc_ef10.h" +#include "sfc_tso.h" #define sfc_ef10_tx_err(dpq, ...) \ SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__) @@ -62,6 +65,9 @@ struct sfc_ef10_txq { efx_qword_t *txq_hw_ring; volatile void *doorbell; efx_qword_t *evq_hw_ring; + uint8_t *tsoh; + rte_iova_t tsoh_iova; + uint16_t tso_tcp_header_offset_limit; /* Datapath transmit queue anchor */ struct sfc_dp_txq dp; @@ -184,6 +190,30 @@ sfc_ef10_tx_qdesc_dma_create(rte_iova_t addr, uint16_t size, bool eop, ESF_DZ_TX_KER_BUF_ADDR, addr); } +static void +sfc_ef10_tx_qdesc_tso2_create(struct sfc_ef10_txq * const txq, + unsigned int added, uint16_t ipv4_id, + uint16_t outer_ipv4_id, uint32_t tcp_seq, + uint16_t tcp_mss) +{ + EFX_POPULATE_QWORD_5(txq->txq_hw_ring[added & txq->ptr_mask], + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_OPTION_TYPE, + ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A, + ESF_DZ_TX_TSO_IP_ID, ipv4_id, + ESF_DZ_TX_TSO_TCP_SEQNO, tcp_seq); + EFX_POPULATE_QWORD_5(txq->txq_hw_ring[(added + 1) & txq->ptr_mask], + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_OPTION_TYPE, + ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B, + ESF_DZ_TX_TSO_TCP_MSS, tcp_mss, + ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id); +} + static inline void sfc_ef10_tx_qpush(struct sfc_ef10_txq *txq, unsigned int added, unsigned int pushed) @@ -263,6 +293,252 @@ sfc_ef10_tx_pkt_descs_max(const struct rte_mbuf *m) extra_descs_per_pkt); } +static bool +sfc_ef10_try_reap(struct sfc_ef10_txq * const txq, unsigned int added, + unsigned int needed_desc, unsigned int *dma_desc_space, + bool *reap_done) +{ + if (*reap_done) + return false; + + if (added != txq->added) { + sfc_ef10_tx_qpush(txq, added, txq->added); + txq->added = added; + } + + sfc_ef10_tx_reap(txq); + *reap_done = true; + + /* + * Recalculate DMA descriptor space since Tx reap may change + * the number of completed descriptors + */ + *dma_desc_space = txq->max_fill_level - + (added - txq->completed); + + return (needed_desc <= *dma_desc_space); +} + +static int +sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg, + unsigned int *added, unsigned int *dma_desc_space, + bool *reap_done) +{ + size_t iph_off = m_seg->l2_len; + size_t tcph_off = m_seg->l2_len + m_seg->l3_len; + size_t header_len = m_seg->l2_len + m_seg->l3_len + m_seg->l4_len; + /* Offset of the payload in the last segment that contains the header */ + size_t in_off = 0; + const struct tcp_hdr *th; + uint16_t packet_id; + uint32_t sent_seq; + uint8_t *hdr_addr; + rte_iova_t hdr_iova; + struct rte_mbuf *first_m_seg = m_seg; + unsigned int pkt_start = *added; + unsigned int needed_desc; + struct rte_mbuf *m_seg_to_free_up_to = first_m_seg; + bool eop; + + /* Both checks may be done, so use bit OR to have only one branching */ + if (unlikely((header_len > SFC_TSOH_STD_LEN) | + (tcph_off > txq->tso_tcp_header_offset_limit))) + return EMSGSIZE; + + /* + * Preliminary estimation of required DMA descriptors, including extra + * descriptor for TSO header that is needed when the header is + * separated from payload in one segment. It does not include + * extra descriptors that may appear when a big segment is split across + * several descriptors. + */ + needed_desc = m_seg->nb_segs + + (unsigned int)SFC_TSO_OPT_DESCS_NUM + + (unsigned int)SFC_TSO_HDR_DESCS_NUM; + + if (needed_desc > *dma_desc_space && + !sfc_ef10_try_reap(txq, pkt_start, needed_desc, + dma_desc_space, reap_done)) { + /* + * If a future Tx reap may increase available DMA descriptor + * space, do not try to send the packet. + */ + if (txq->completed != pkt_start) + return ENOSPC; + /* + * Do not allow to send packet if the maximum DMA + * descriptor space is not sufficient to hold TSO + * descriptors, header descriptor and at least 1 + * segment descriptor. + */ + if (*dma_desc_space < SFC_TSO_OPT_DESCS_NUM + + SFC_TSO_HDR_DESCS_NUM + 1) + return EMSGSIZE; + } + + /* Check if the header is not fragmented */ + if (rte_pktmbuf_data_len(m_seg) >= header_len) { + hdr_addr = rte_pktmbuf_mtod(m_seg, uint8_t *); + hdr_iova = rte_mbuf_data_iova(m_seg); + if (rte_pktmbuf_data_len(m_seg) == header_len) { + /* + * Associate header mbuf with header descriptor + * which is located after TSO descriptors. + */ + txq->sw_ring[(pkt_start + SFC_TSO_OPT_DESCS_NUM) & + txq->ptr_mask].mbuf = m_seg; + m_seg = m_seg->next; + in_off = 0; + + /* + * If there is no payload offset (payload starts at the + * beginning of a segment) then an extra descriptor for + * separated header is not needed. + */ + needed_desc--; + } else { + in_off = header_len; + } + } else { + unsigned int copied_segs; + unsigned int hdr_addr_off = (*added & txq->ptr_mask) * + SFC_TSOH_STD_LEN; + + hdr_addr = txq->tsoh + hdr_addr_off; + hdr_iova = txq->tsoh_iova + hdr_addr_off; + copied_segs = sfc_tso_prepare_header(hdr_addr, header_len, + &m_seg, &in_off); + + m_seg_to_free_up_to = m_seg; + /* + * Reduce the number of needed descriptors by the number of + * segments that entirely consist of header data. + */ + needed_desc -= copied_segs; + + /* Extra descriptor for separated header is not needed */ + if (in_off == 0) + needed_desc--; + } + + switch (first_m_seg->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) { + case PKT_TX_IPV4: { + const struct ipv4_hdr *iphe4; + + iphe4 = (const struct ipv4_hdr *)(hdr_addr + iph_off); + rte_memcpy(&packet_id, &iphe4->packet_id, sizeof(uint16_t)); + packet_id = rte_be_to_cpu_16(packet_id); + break; + } + case PKT_TX_IPV6: + packet_id = 0; + break; + default: + return EINVAL; + } + + th = (const struct tcp_hdr *)(hdr_addr + tcph_off); + rte_memcpy(&sent_seq, &th->sent_seq, sizeof(uint32_t)); + sent_seq = rte_be_to_cpu_32(sent_seq); + + sfc_ef10_tx_qdesc_tso2_create(txq, *added, packet_id, 0, sent_seq, + first_m_seg->tso_segsz); + (*added) += SFC_TSO_OPT_DESCS_NUM; + + sfc_ef10_tx_qdesc_dma_create(hdr_iova, header_len, false, + &txq->txq_hw_ring[(*added) & txq->ptr_mask]); + (*added)++; + + do { + rte_iova_t next_frag = rte_mbuf_data_iova(m_seg); + unsigned int seg_len = rte_pktmbuf_data_len(m_seg); + unsigned int id; + + next_frag += in_off; + seg_len -= in_off; + in_off = 0; + + do { + rte_iova_t frag_addr = next_frag; + size_t frag_len; + + frag_len = RTE_MIN(seg_len, + SFC_EF10_TX_DMA_DESC_LEN_MAX); + + next_frag += frag_len; + seg_len -= frag_len; + + eop = (seg_len == 0 && m_seg->next == NULL); + + id = (*added) & txq->ptr_mask; + (*added)++; + + /* + * Initially we assume that one DMA descriptor is needed + * for every segment. When the segment is split across + * several DMA descriptors, increase the estimation. + */ + needed_desc += (seg_len != 0); + + /* + * When no more descriptors can be added, but not all + * segments are processed. + */ + if (*added - pkt_start == *dma_desc_space && + !eop && + !sfc_ef10_try_reap(txq, pkt_start, needed_desc, + dma_desc_space, reap_done)) { + struct rte_mbuf *m; + struct rte_mbuf *m_next; + + if (txq->completed != pkt_start) { + unsigned int i; + + /* + * Reset mbuf associations with added + * descriptors. + */ + for (i = pkt_start; i != *added; i++) { + id = i & txq->ptr_mask; + txq->sw_ring[id].mbuf = NULL; + } + return ENOSPC; + } + + /* Free the segments that cannot be sent */ + for (m = m_seg->next; m != NULL; m = m_next) { + m_next = m->next; + rte_pktmbuf_free_seg(m); + } + eop = true; + /* Ignore the rest of the segment */ + seg_len = 0; + } + + sfc_ef10_tx_qdesc_dma_create(frag_addr, frag_len, + eop, &txq->txq_hw_ring[id]); + + } while (seg_len != 0); + + txq->sw_ring[id].mbuf = m_seg; + + m_seg = m_seg->next; + } while (!eop); + + /* + * Free segments which content was entirely copied to the TSO header + * memory space of Tx queue + */ + for (m_seg = first_m_seg; m_seg != m_seg_to_free_up_to;) { + struct rte_mbuf *seg_to_free = m_seg; + + m_seg = m_seg->next; + rte_pktmbuf_free_seg(seg_to_free); + } + + return 0; +} + static uint16_t sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { @@ -296,6 +572,30 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) if (likely(pktp + 1 != pktp_end)) rte_mbuf_prefetch_part1(pktp[1]); + if (m_seg->ol_flags & PKT_TX_TCP_SEG) { + int rc; + + rc = sfc_ef10_xmit_tso_pkt(txq, m_seg, &added, + &dma_desc_space, &reap_done); + if (rc != 0) { + added = pkt_start; + + /* Packet can be sent in following xmit calls */ + if (likely(rc == ENOSPC)) + break; + + /* + * Packet cannot be sent, tell RTE that + * it is sent, but actually drop it and + * continue with another packet + */ + rte_pktmbuf_free(*pktp); + continue; + } + + goto dma_desc_space_update; + } + if (sfc_ef10_tx_pkt_descs_max(m_seg) > dma_desc_space) { if (reap_done) break; @@ -349,6 +649,7 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } while ((m_seg = m_seg->next) != 0); +dma_desc_space_update: dma_desc_space -= (added - pkt_start); } @@ -524,6 +825,18 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id, if (txq->sw_ring == NULL) goto fail_sw_ring_alloc; + if (info->offloads & DEV_TX_OFFLOAD_TCP_TSO) { + txq->tsoh = rte_calloc_socket("sfc-ef10-txq-tsoh", + info->txq_entries, + SFC_TSOH_STD_LEN, + RTE_CACHE_LINE_SIZE, + socket_id); + if (txq->tsoh == NULL) + goto fail_tsoh_alloc; + + txq->tsoh_iova = rte_malloc_virt2iova(txq->tsoh); + } + txq->flags = SFC_EF10_TXQ_NOT_RUNNING; txq->ptr_mask = info->txq_entries - 1; txq->max_fill_level = info->max_fill_level; @@ -533,10 +846,14 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id, ER_DZ_TX_DESC_UPD_REG_OFST + (info->hw_index << info->vi_window_shift); txq->evq_hw_ring = info->evq_hw_ring; + txq->tso_tcp_header_offset_limit = info->tso_tcp_header_offset_limit; *dp_txqp = &txq->dp; return 0; +fail_tsoh_alloc: + rte_free(txq->sw_ring); + fail_sw_ring_alloc: rte_free(txq); @@ -551,6 +868,7 @@ sfc_ef10_tx_qdestroy(struct sfc_dp_txq *dp_txq) { struct sfc_ef10_txq *txq = sfc_ef10_txq_by_dp_txq(dp_txq); + rte_free(txq->tsoh); rte_free(txq->sw_ring); rte_free(txq); } @@ -618,12 +936,49 @@ sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq) txq->flags &= ~SFC_EF10_TXQ_STARTED; } +static unsigned int +sfc_ef10_tx_qdesc_npending(struct sfc_ef10_txq *txq) +{ + const unsigned int curr_done = txq->completed - 1; + unsigned int anew_done = curr_done; + efx_qword_t tx_ev; + const unsigned int evq_old_read_ptr = txq->evq_read_ptr; + + if (unlikely(txq->flags & + (SFC_EF10_TXQ_NOT_RUNNING | SFC_EF10_TXQ_EXCEPTION))) + return 0; + + while (sfc_ef10_tx_get_event(txq, &tx_ev)) + anew_done = EFX_QWORD_FIELD(tx_ev, ESF_DZ_TX_DESCR_INDX); + + /* + * The function does not process events, so return event queue read + * pointer to the original position to allow the events that were + * read to be processed later + */ + txq->evq_read_ptr = evq_old_read_ptr; + + return (anew_done - curr_done) & txq->ptr_mask; +} + static sfc_dp_tx_qdesc_status_t sfc_ef10_tx_qdesc_status; static int -sfc_ef10_tx_qdesc_status(__rte_unused struct sfc_dp_txq *dp_txq, - __rte_unused uint16_t offset) +sfc_ef10_tx_qdesc_status(struct sfc_dp_txq *dp_txq, + uint16_t offset) { - return -ENOTSUP; + struct sfc_ef10_txq *txq = sfc_ef10_txq_by_dp_txq(dp_txq); + unsigned int npending = sfc_ef10_tx_qdesc_npending(txq); + + if (unlikely(offset > txq->ptr_mask)) + return -EINVAL; + + if (unlikely(offset >= txq->max_fill_level)) + return RTE_ETH_TX_DESC_UNAVAIL; + + if (unlikely(offset < npending)) + return RTE_ETH_TX_DESC_FULL; + + return RTE_ETH_TX_DESC_DONE; } struct sfc_dp_tx sfc_ef10_tx = { @@ -632,7 +987,8 @@ struct sfc_dp_tx sfc_ef10_tx = { .type = SFC_DP_TX, .hw_fw_caps = SFC_DP_HW_FW_CAP_EF10, }, - .features = SFC_DP_TX_FEAT_MULTI_SEG | + .features = SFC_DP_TX_FEAT_TSO | + SFC_DP_TX_FEAT_MULTI_SEG | SFC_DP_TX_FEAT_MULTI_POOL | SFC_DP_TX_FEAT_REFCNT | SFC_DP_TX_FEAT_MULTI_PROCESS,