New upstream version 17.08
[deb_dpdk.git] / lib / librte_gro / gro_tcp4.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_cycles.h>
36 #include <rte_ethdev.h>
37 #include <rte_ip.h>
38 #include <rte_tcp.h>
39
40 #include "gro_tcp4.h"
41
42 void *
43 gro_tcp4_tbl_create(uint16_t socket_id,
44                 uint16_t max_flow_num,
45                 uint16_t max_item_per_flow)
46 {
47         struct gro_tcp4_tbl *tbl;
48         size_t size;
49         uint32_t entries_num, i;
50
51         entries_num = max_flow_num * max_item_per_flow;
52         entries_num = RTE_MIN(entries_num, GRO_TCP4_TBL_MAX_ITEM_NUM);
53
54         if (entries_num == 0)
55                 return NULL;
56
57         tbl = rte_zmalloc_socket(__func__,
58                         sizeof(struct gro_tcp4_tbl),
59                         RTE_CACHE_LINE_SIZE,
60                         socket_id);
61         if (tbl == NULL)
62                 return NULL;
63
64         size = sizeof(struct gro_tcp4_item) * entries_num;
65         tbl->items = rte_zmalloc_socket(__func__,
66                         size,
67                         RTE_CACHE_LINE_SIZE,
68                         socket_id);
69         if (tbl->items == NULL) {
70                 rte_free(tbl);
71                 return NULL;
72         }
73         tbl->max_item_num = entries_num;
74
75         size = sizeof(struct gro_tcp4_key) * entries_num;
76         tbl->keys = rte_zmalloc_socket(__func__,
77                         size,
78                         RTE_CACHE_LINE_SIZE,
79                         socket_id);
80         if (tbl->keys == NULL) {
81                 rte_free(tbl->items);
82                 rte_free(tbl);
83                 return NULL;
84         }
85         /* INVALID_ARRAY_INDEX indicates empty key */
86         for (i = 0; i < entries_num; i++)
87                 tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
88         tbl->max_key_num = entries_num;
89
90         return tbl;
91 }
92
93 void
94 gro_tcp4_tbl_destroy(void *tbl)
95 {
96         struct gro_tcp4_tbl *tcp_tbl = tbl;
97
98         if (tcp_tbl) {
99                 rte_free(tcp_tbl->items);
100                 rte_free(tcp_tbl->keys);
101         }
102         rte_free(tcp_tbl);
103 }
104
105 /*
106  * merge two TCP/IPv4 packets without updating checksums.
107  * If cmp is larger than 0, append the new packet to the
108  * original packet. Otherwise, pre-pend the new packet to
109  * the original packet.
110  */
111 static inline int
112 merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
113                 struct rte_mbuf *pkt,
114                 uint16_t ip_id,
115                 uint32_t sent_seq,
116                 int cmp)
117 {
118         struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
119         uint16_t tcp_datalen;
120
121         if (cmp > 0) {
122                 pkt_head = item_src->firstseg;
123                 pkt_tail = pkt;
124         } else {
125                 pkt_head = pkt;
126                 pkt_tail = item_src->firstseg;
127         }
128
129         /* check if the packet length will be beyond the max value */
130         tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
131                 pkt_tail->l3_len - pkt_tail->l4_len;
132         if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
133                         TCP4_MAX_L3_LENGTH)
134                 return 0;
135
136         /* remove packet header for the tail packet */
137         rte_pktmbuf_adj(pkt_tail,
138                         pkt_tail->l2_len +
139                         pkt_tail->l3_len +
140                         pkt_tail->l4_len);
141
142         /* chain two packets together */
143         if (cmp > 0) {
144                 item_src->lastseg->next = pkt;
145                 item_src->lastseg = rte_pktmbuf_lastseg(pkt);
146                 /* update IP ID to the larger value */
147                 item_src->ip_id = ip_id;
148         } else {
149                 lastseg = rte_pktmbuf_lastseg(pkt);
150                 lastseg->next = item_src->firstseg;
151                 item_src->firstseg = pkt;
152                 /* update sent_seq to the smaller value */
153                 item_src->sent_seq = sent_seq;
154         }
155         item_src->nb_merged++;
156
157         /* update mbuf metadata for the merged packet */
158         pkt_head->nb_segs += pkt_tail->nb_segs;
159         pkt_head->pkt_len += pkt_tail->pkt_len;
160
161         return 1;
162 }
163
164 static inline int
165 check_seq_option(struct gro_tcp4_item *item,
166                 struct tcp_hdr *tcp_hdr,
167                 uint16_t tcp_hl,
168                 uint16_t tcp_dl,
169                 uint16_t ip_id,
170                 uint32_t sent_seq)
171 {
172         struct rte_mbuf *pkt0 = item->firstseg;
173         struct ipv4_hdr *ipv4_hdr0;
174         struct tcp_hdr *tcp_hdr0;
175         uint16_t tcp_hl0, tcp_dl0;
176         uint16_t len;
177
178         ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
179                         pkt0->l2_len);
180         tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
181         tcp_hl0 = pkt0->l4_len;
182
183         /* check if TCP option fields equal. If not, return 0. */
184         len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
185         if ((tcp_hl != tcp_hl0) ||
186                         ((len > 0) && (memcmp(tcp_hdr + 1,
187                                         tcp_hdr0 + 1,
188                                         len) != 0)))
189                 return 0;
190
191         /* check if the two packets are neighbors */
192         tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
193         if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
194                         (ip_id == (item->ip_id + 1)))
195                 /* append the new packet */
196                 return 1;
197         else if (((sent_seq + tcp_dl) == item->sent_seq) &&
198                         ((ip_id + item->nb_merged) == item->ip_id))
199                 /* pre-pend the new packet */
200                 return -1;
201         else
202                 return 0;
203 }
204
205 static inline uint32_t
206 find_an_empty_item(struct gro_tcp4_tbl *tbl)
207 {
208         uint32_t i;
209         uint32_t max_item_num = tbl->max_item_num;
210
211         for (i = 0; i < max_item_num; i++)
212                 if (tbl->items[i].firstseg == NULL)
213                         return i;
214         return INVALID_ARRAY_INDEX;
215 }
216
217 static inline uint32_t
218 find_an_empty_key(struct gro_tcp4_tbl *tbl)
219 {
220         uint32_t i;
221         uint32_t max_key_num = tbl->max_key_num;
222
223         for (i = 0; i < max_key_num; i++)
224                 if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
225                         return i;
226         return INVALID_ARRAY_INDEX;
227 }
228
229 static inline uint32_t
230 insert_new_item(struct gro_tcp4_tbl *tbl,
231                 struct rte_mbuf *pkt,
232                 uint16_t ip_id,
233                 uint32_t sent_seq,
234                 uint32_t prev_idx,
235                 uint64_t start_time)
236 {
237         uint32_t item_idx;
238
239         item_idx = find_an_empty_item(tbl);
240         if (item_idx == INVALID_ARRAY_INDEX)
241                 return INVALID_ARRAY_INDEX;
242
243         tbl->items[item_idx].firstseg = pkt;
244         tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
245         tbl->items[item_idx].start_time = start_time;
246         tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
247         tbl->items[item_idx].sent_seq = sent_seq;
248         tbl->items[item_idx].ip_id = ip_id;
249         tbl->items[item_idx].nb_merged = 1;
250         tbl->item_num++;
251
252         /* if the previous packet exists, chain the new one with it */
253         if (prev_idx != INVALID_ARRAY_INDEX) {
254                 tbl->items[item_idx].next_pkt_idx =
255                         tbl->items[prev_idx].next_pkt_idx;
256                 tbl->items[prev_idx].next_pkt_idx = item_idx;
257         }
258
259         return item_idx;
260 }
261
262 static inline uint32_t
263 delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
264                 uint32_t prev_item_idx)
265 {
266         uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
267
268         /* set NULL to firstseg to indicate it's an empty item */
269         tbl->items[item_idx].firstseg = NULL;
270         tbl->item_num--;
271         if (prev_item_idx != INVALID_ARRAY_INDEX)
272                 tbl->items[prev_item_idx].next_pkt_idx = next_idx;
273
274         return next_idx;
275 }
276
277 static inline uint32_t
278 insert_new_key(struct gro_tcp4_tbl *tbl,
279                 struct tcp4_key *key_src,
280                 uint32_t item_idx)
281 {
282         struct tcp4_key *key_dst;
283         uint32_t key_idx;
284
285         key_idx = find_an_empty_key(tbl);
286         if (key_idx == INVALID_ARRAY_INDEX)
287                 return INVALID_ARRAY_INDEX;
288
289         key_dst = &(tbl->keys[key_idx].key);
290
291         ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
292         ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
293         key_dst->ip_src_addr = key_src->ip_src_addr;
294         key_dst->ip_dst_addr = key_src->ip_dst_addr;
295         key_dst->recv_ack = key_src->recv_ack;
296         key_dst->src_port = key_src->src_port;
297         key_dst->dst_port = key_src->dst_port;
298
299         /* non-INVALID_ARRAY_INDEX value indicates this key is valid */
300         tbl->keys[key_idx].start_index = item_idx;
301         tbl->key_num++;
302
303         return key_idx;
304 }
305
306 static inline int
307 is_same_key(struct tcp4_key k1, struct tcp4_key k2)
308 {
309         if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
310                 return 0;
311
312         if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
313                 return 0;
314
315         return ((k1.ip_src_addr == k2.ip_src_addr) &&
316                         (k1.ip_dst_addr == k2.ip_dst_addr) &&
317                         (k1.recv_ack == k2.recv_ack) &&
318                         (k1.src_port == k2.src_port) &&
319                         (k1.dst_port == k2.dst_port));
320 }
321
322 /*
323  * update packet length for the flushed packet.
324  */
325 static inline void
326 update_header(struct gro_tcp4_item *item)
327 {
328         struct ipv4_hdr *ipv4_hdr;
329         struct rte_mbuf *pkt = item->firstseg;
330
331         ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
332                         pkt->l2_len);
333         ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
334                         pkt->l2_len);
335 }
336
337 int32_t
338 gro_tcp4_reassemble(struct rte_mbuf *pkt,
339                 struct gro_tcp4_tbl *tbl,
340                 uint64_t start_time)
341 {
342         struct ether_hdr *eth_hdr;
343         struct ipv4_hdr *ipv4_hdr;
344         struct tcp_hdr *tcp_hdr;
345         uint32_t sent_seq;
346         uint16_t tcp_dl, ip_id;
347
348         struct tcp4_key key;
349         uint32_t cur_idx, prev_idx, item_idx;
350         uint32_t i, max_key_num;
351         int cmp;
352
353         eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
354         ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
355         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
356
357         /*
358          * if FIN, SYN, RST, PSH, URG, ECE or
359          * CWR is set, return immediately.
360          */
361         if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
362                 return -1;
363         /* if payload length is 0, return immediately */
364         tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
365                 pkt->l4_len;
366         if (tcp_dl == 0)
367                 return -1;
368
369         ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
370         sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
371
372         ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
373         ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
374         key.ip_src_addr = ipv4_hdr->src_addr;
375         key.ip_dst_addr = ipv4_hdr->dst_addr;
376         key.src_port = tcp_hdr->src_port;
377         key.dst_port = tcp_hdr->dst_port;
378         key.recv_ack = tcp_hdr->recv_ack;
379
380         /* search for a key */
381         max_key_num = tbl->max_key_num;
382         for (i = 0; i < max_key_num; i++) {
383                 if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
384                                 is_same_key(tbl->keys[i].key, key))
385                         break;
386         }
387
388         /* can't find a key, so insert a new key and a new item. */
389         if (i == tbl->max_key_num) {
390                 item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
391                                 INVALID_ARRAY_INDEX, start_time);
392                 if (item_idx == INVALID_ARRAY_INDEX)
393                         return -1;
394                 if (insert_new_key(tbl, &key, item_idx) ==
395                                 INVALID_ARRAY_INDEX) {
396                         /*
397                          * fail to insert a new key, so
398                          * delete the inserted item
399                          */
400                         delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
401                         return -1;
402                 }
403                 return 0;
404         }
405
406         /* traverse all packets in the item group to find one to merge */
407         cur_idx = tbl->keys[i].start_index;
408         prev_idx = cur_idx;
409         do {
410                 cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
411                                 pkt->l4_len, tcp_dl, ip_id, sent_seq);
412                 if (cmp) {
413                         if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
414                                                 pkt, ip_id,
415                                                 sent_seq, cmp))
416                                 return 1;
417                         /*
418                          * fail to merge two packets since the packet
419                          * length will be greater than the max value.
420                          * So insert the packet into the item group.
421                          */
422                         if (insert_new_item(tbl, pkt, ip_id, sent_seq,
423                                                 prev_idx, start_time) ==
424                                         INVALID_ARRAY_INDEX)
425                                 return -1;
426                         return 0;
427                 }
428                 prev_idx = cur_idx;
429                 cur_idx = tbl->items[cur_idx].next_pkt_idx;
430         } while (cur_idx != INVALID_ARRAY_INDEX);
431
432         /*
433          * can't find a packet in the item group to merge,
434          * so insert the packet into the item group.
435          */
436         if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
437                                 start_time) == INVALID_ARRAY_INDEX)
438                 return -1;
439
440         return 0;
441 }
442
443 uint16_t
444 gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
445                 uint64_t flush_timestamp,
446                 struct rte_mbuf **out,
447                 uint16_t nb_out)
448 {
449         uint16_t k = 0;
450         uint32_t i, j;
451         uint32_t max_key_num = tbl->max_key_num;
452
453         for (i = 0; i < max_key_num; i++) {
454                 /* all keys have been checked, return immediately */
455                 if (tbl->key_num == 0)
456                         return k;
457
458                 j = tbl->keys[i].start_index;
459                 while (j != INVALID_ARRAY_INDEX) {
460                         if (tbl->items[j].start_time <= flush_timestamp) {
461                                 out[k++] = tbl->items[j].firstseg;
462                                 if (tbl->items[j].nb_merged > 1)
463                                         update_header(&(tbl->items[j]));
464                                 /*
465                                  * delete the item and get
466                                  * the next packet index
467                                  */
468                                 j = delete_item(tbl, j,
469                                                 INVALID_ARRAY_INDEX);
470
471                                 /*
472                                  * delete the key as all of
473                                  * packets are flushed
474                                  */
475                                 if (j == INVALID_ARRAY_INDEX) {
476                                         tbl->keys[i].start_index =
477                                                 INVALID_ARRAY_INDEX;
478                                         tbl->key_num--;
479                                 } else
480                                         /* update start_index of the key */
481                                         tbl->keys[i].start_index = j;
482
483                                 if (k == nb_out)
484                                         return k;
485                         } else
486                                 /*
487                                  * left packets of this key won't be
488                                  * timeout, so go to check other keys.
489                                  */
490                                 break;
491                 }
492         }
493         return k;
494 }
495
496 uint32_t
497 gro_tcp4_tbl_pkt_count(void *tbl)
498 {
499         struct gro_tcp4_tbl *gro_tbl = tbl;
500
501         if (gro_tbl)
502                 return gro_tbl->item_num;
503
504         return 0;
505 }