implement sw segmentation for tcp 30/5830/2
authorMohammad Abdul Awal <mohammad.abdul.awal@intel.com>
Fri, 3 Mar 2017 17:38:14 +0000 (17:38 +0000)
committerMohammad Abdul Awal <mohammad.abdul.awal@intel.com>
Thu, 23 Mar 2017 11:46:16 +0000 (11:46 +0000)
Change-Id: Ibe3ac4b401ea9c7680ab5d3e8c73557d95402ff2
Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com>
examples/l4fwd/tcp.h
lib/libtle_l4p/tcp_rxtx.c
lib/libtle_l4p/tcp_tx_seg.h [new file with mode: 0644]

index f6ca3a5..e4aadb5 100644 (file)
@@ -279,9 +279,9 @@ netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes)
                                __func__, lcore, proto_name[fes->proto],
                                fed->s, n, k);
 
-                       fed->stat.txp += k;
-                       fed->stat.drops += n - k;
-                       fes->stat.fwp += k;
+               fed->stat.txp += k;
+               fed->stat.drops += n - k;
+               fes->stat.fwp += k;
 
        } else {
                NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
index 6085814..a6b3989 100644 (file)
@@ -27,6 +27,7 @@
 #include "tcp_ctl.h"
 #include "tcp_rxq.h"
 #include "tcp_txq.h"
+#include "tcp_tx_seg.h"
 
 #define        TCP_MAX_PKT_SEG 0x20
 
@@ -2142,13 +2143,42 @@ tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
        return n;
 }
 
+static inline int32_t
+tx_segments(struct tle_tcp_stream *s, uint64_t ol_flags,
+       struct rte_mbuf *segs[], uint32_t num)
+{
+       uint32_t i;
+       int32_t rc;
+
+       for (i = 0; i != num; i++) {
+               /* Build L2/L3/L4 header */
+               rc = tcp_fill_mbuf(segs[i], s, &s->tx.dst, ol_flags, s->s.port,
+                       0, TCP_FLAG_ACK, 0, 0);
+               if (rc != 0) {
+                       free_segments(segs, num);
+                       break;
+               }
+       }
+
+       if (i == num) {
+               /* queue packets for further transmission. */
+               rc = rte_ring_mp_enqueue_bulk(s->tx.q, (void **)segs, num);
+               if (rc != 0)
+                       free_segments(segs, num);
+       }
+
+       return rc;
+}
+
 uint16_t
 tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
 {
-       uint32_t i, j, mss, n, state, type;
+       uint32_t i, j, k, mss, n, state, type;
+       int32_t rc;
        uint64_t ol_flags;
        struct tle_tcp_stream *s;
        struct tle_dev *dev;
+       struct rte_mbuf *segs[TCP_MAX_PKT_SEG];
 
        s = TCP_STREAM(ts);
 
@@ -2161,53 +2191,87 @@ tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
        state = s->tcb.state;
        if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) {
                rte_errno = ENOTCONN;
-               n = 0;
-       } else {
-               mss = s->tcb.snd.mss;
-               dev = s->tx.dst.dev;
-               type = s->s.type;
-               ol_flags = dev->tx.ol_flags[type];
+               rwl_release(&s->tx.use);
+               return 0;
+       }
 
-               /* prepare and check for TX */
-               for (i = 0; i != num; i++) {
+       mss = s->tcb.snd.mss;
+       dev = s->tx.dst.dev;
+       type = s->s.type;
+       ol_flags = dev->tx.ol_flags[type];
 
-                       /* !!! need to be modified !!! */
+       k = 0;
+       rc = 0;
+       while (k != num) {
+               /* prepare and check for TX */
+               for (i = k; i != num; i++) {
                        if (pkt[i]->pkt_len > mss ||
-                                       pkt[i]->nb_segs > TCP_MAX_PKT_SEG) {
-                               rte_errno = EBADMSG;
+                                       pkt[i]->nb_segs > TCP_MAX_PKT_SEG)
                                break;
-                       } else if (tcp_fill_mbuf(pkt[i], s, &s->tx.dst,
-                                       ol_flags, s->s.port, 0, TCP_FLAG_ACK,
-                                       0, 0) != 0)
+                       rc = tcp_fill_mbuf(pkt[i], s, &s->tx.dst, ol_flags,
+                               s->s.port, 0, TCP_FLAG_ACK, 0, 0);
+                       if (rc != 0)
                                break;
                }
 
-               /* queue packets for further transmision. */
-               n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt, i);
+               if (i != k) {
+                       /* queue packets for further transmission. */
+                       n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt + k,
+                               (i - k));
+                       k += n;
+
+                       /*
+                        * for unsent, but already modified packets:
+                        * remove pkt l2/l3 headers, restore ol_flags
+                        */
+                       if (i != k) {
+                               ol_flags = ~dev->tx.ol_flags[type];
+                               for (j = k; j != i; j++) {
+                                       rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len +
+                                               pkt[j]->l3_len +
+                                               pkt[j]->l4_len);
+                                       pkt[j]->ol_flags &= ol_flags;
+                               }
+                               break;
+                       }
+               }
 
-               /* notify BE about more data to send */
-               if (n != 0)
-                       txs_enqueue(s->s.ctx, s);
+               if (rc != 0) {
+                       rte_errno = -rc;
+                       break;
 
-               /*
-                * for unsent, but already modified packets:
-                * remove pkt l2/l3 headers, restore ol_flags
-                */
-               if (n != i) {
-                       ol_flags = ~dev->tx.ol_flags[type];
-                       for (j = n; j != i; j++) {
-                               rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len +
-                                       pkt[j]->l3_len + pkt[j]->l4_len);
-                               pkt[j]->ol_flags &= ol_flags;
+               /* segment large packet and enqueue for sending */
+               } else if (i != num) {
+                       /* segment the packet. */
+                       rc = tcp_segmentation(pkt[i], segs, RTE_DIM(segs),
+                               &s->tx.dst, mss);
+                       if (rc < 0) {
+                               rte_errno = -rc;
+                               break;
                        }
-               /* if possible, rearm stream write event. */
-               } else if (rte_ring_free_count(s->tx.q) != 0 &&
-                               s->tx.ev != NULL)
-                       tle_event_raise(s->tx.ev);
+
+                       rc = tx_segments(s, dev->tx.ol_flags[type], segs, rc);
+                       if (rc == 0) {
+                               /* free the large mbuf */
+                               rte_pktmbuf_free(pkt[i]);
+                               /* set the mbuf as consumed */
+                               k++;
+                       } else
+                               /* no space left in tx queue */
+                               break;
+               }
        }
 
+       /* notify BE about more data to send */
+       if (k != 0)
+               txs_enqueue(s->s.ctx, s);
+       /* if possible, re-arm stream write event. */
+       if (rte_ring_free_count(s->tx.q) != 0 && s->tx.ev != NULL)
+               tle_event_raise(s->tx.ev);
+
        rwl_release(&s->tx.use);
-       return n;
+
+       return k;
 }
 
 /* send data and FIN (if needed) */
diff --git a/lib/libtle_l4p/tcp_tx_seg.h b/lib/libtle_l4p/tcp_tx_seg.h
new file mode 100644 (file)
index 0000000..3a80fdd
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016  Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_TX_SEG_H_
+#define _TCP_TX_SEG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+free_segments(struct rte_mbuf *mb[], uint32_t num)
+{
+       uint32_t i;
+
+       for (i = 0; i != num; i++)
+               rte_pktmbuf_free(mb[i]);
+}
+
+static inline int32_t
+tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
+       const struct tle_dest *dst, uint16_t mss)
+{
+       struct rte_mbuf *in_seg = NULL;
+       uint32_t nbseg, in_seg_data_pos;
+       uint32_t more_in_segs;
+
+       in_seg = mbin;
+       in_seg_data_pos = 0;
+       nbseg = 0;
+
+       /* Check that pkts_out is big enough to hold all fragments */
+       if (mss * num < (uint16_t)mbin->pkt_len)
+               return -ENOSPC;
+
+       more_in_segs = 1;
+       while (more_in_segs) {
+               struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
+               uint32_t more_out_segs;
+
+               /* Allocate direct buffer */
+               out_pkt = rte_pktmbuf_alloc(dst->head_mp);
+               if (out_pkt == NULL) {
+                       free_segments(mbout, nbseg);
+                       return -ENOMEM;
+               }
+
+               out_seg_prev = out_pkt;
+               more_out_segs = 1;
+               while (more_out_segs && more_in_segs) {
+                       struct rte_mbuf *out_seg = NULL;
+                       uint32_t len;
+
+                       /* Allocate indirect buffer */
+                       out_seg = rte_pktmbuf_alloc(dst->head_mp);
+                       if (out_seg == NULL) {
+                               rte_pktmbuf_free(out_pkt);
+                               free_segments(mbout, nbseg);
+                               return -ENOMEM;
+                       }
+                       out_seg_prev->next = out_seg;
+                       out_seg_prev = out_seg;
+
+                       /* Prepare indirect buffer */
+                       rte_pktmbuf_attach(out_seg, in_seg);
+                       len = mss;
+                       if (len > (in_seg->data_len - in_seg_data_pos))
+                               len = in_seg->data_len - in_seg_data_pos;
+
+                       out_seg->data_off = in_seg->data_off + in_seg_data_pos;
+                       out_seg->data_len = (uint16_t)len;
+                       out_pkt->pkt_len = (uint16_t)(len + out_pkt->pkt_len);
+                       out_pkt->nb_segs += 1;
+                       in_seg_data_pos += len;
+
+                       /* Current output packet (i.e. fragment) done ? */
+                       if (out_pkt->pkt_len >= mss)
+                               more_out_segs = 0;
+
+                       /* Current input segment done ? */
+                       if (in_seg_data_pos == in_seg->data_len) {
+                               in_seg = in_seg->next;
+                               in_seg_data_pos = 0;
+
+                               if (in_seg == NULL)
+                                       more_in_segs = 0;
+                       }
+               }
+
+               /* Write the segment to the output list */
+               mbout[nbseg] = out_pkt;
+               nbseg++;
+       }
+
+       return nbseg;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_TX_SEG_H_ */