Initial commit of vpp code.
[vpp.git] / vnet / vnet / ip / tcp.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/tcp.c: tcp protocol
17  *
18  * Copyright (c) 2011 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/ip/ip.h>
41 #include <vnet/ip/tcp.h>
42 #include <math.h>
43
44 static u8 my_zero_mask_table[256] = {
45   [0xf0] = (1 << 1),
46   [0x0f] = (1 << 0),
47   [0xff] = (1 << 0) | (1 << 1),
48 };
49
50 static_always_inline u32 my_zero_mask (u32 x)
51 {
52   return ((my_zero_mask_table[(x >> 0) & 0xff] << 0)
53           | (my_zero_mask_table[(x >> 8) & 0xff] << 2));
54 }
55
56 static u8 my_first_set_table[256] = {
57   [0x00] = 4,
58   [0xf0] = 1,
59   [0x0f] = 0,
60   [0xff] = 0,
61 };
62
63 static_always_inline u32 my_first_set (u32 zero_mask)
64 {
65   u8 r0 = my_first_set_table[(zero_mask >> 0) & 0xff];
66   u8 r1 = 2 + my_first_set_table[(zero_mask >> 8) & 0xff];
67   return r0 != 4 ? r0 : r1;
68 }
69
70 static_always_inline void
71 ip4_tcp_udp_address_x4_set_from_headers (ip4_tcp_udp_address_x4_t * a,
72                                          ip4_header_t * ip,
73                                          tcp_header_t * tcp,
74                                          u32 i)
75 {
76   a->src.as_ip4_address[i] = ip->src_address;
77   a->dst.as_ip4_address[i] = ip->dst_address;
78   a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst;
79 }
80
81 static_always_inline void
82 ip4_tcp_udp_address_x4_copy_and_invalidate (ip4_tcp_udp_address_x4_t * dst,
83                                             ip4_tcp_udp_address_x4_t * src,
84                                             u32 dst_i, u32 src_i)
85 {
86 #define _(d,s) d = s; s = 0;
87   _ (dst->src.as_ip4_address[dst_i].as_u32, src->src.as_ip4_address[src_i].as_u32);
88   _ (dst->dst.as_ip4_address[dst_i].as_u32, src->dst.as_ip4_address[src_i].as_u32);
89   _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32);
90 #undef _
91 }
92
93 static_always_inline void
94 ip4_tcp_udp_address_x4_invalidate (ip4_tcp_udp_address_x4_t * a, u32 i)
95 {
96   a->src.as_ip4_address[i].as_u32 = 0;
97   a->dst.as_ip4_address[i].as_u32 = 0;
98   a->ports.as_ports[i].as_u32 = 0;
99 }
100
101 static_always_inline uword
102 ip4_tcp_udp_address_x4_is_valid (ip4_tcp_udp_address_x4_t * a, u32 i)
103 {
104   return !(a->src.as_ip4_address[i].as_u32 == 0
105            && a->dst.as_ip4_address[i].as_u32 == 0
106            && a->ports.as_ports[i].as_u32 == 0);
107 }
108
109 #ifdef TCP_HAVE_VEC128
110 static_always_inline uword
111 ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4,
112                                      u32x4 src, u32x4 dst, u32x4 ports)
113 {
114   u32x4 r;
115   u32 m;
116
117   r = u32x4_is_equal (src, ax4->src.as_u32x4);
118   r &= u32x4_is_equal (dst, ax4->dst.as_u32x4);
119   r &= u32x4_is_equal (ports, ax4->ports.as_u32x4);
120
121   /* At this point r will be either all zeros (if nothing matched)
122      or have 32 1s in the position that did match. */
123   m = u8x16_compare_byte_mask ((u8x16) r);
124
125   return m;
126 }
127
128 static_always_inline uword
129 ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4,
130                               ip4_header_t * ip,
131                               tcp_header_t * tcp)
132 {
133   u32x4 src = u32x4_splat (ip->src_address.as_u32);
134   u32x4 dst = u32x4_splat (ip->dst_address.as_u32);
135   u32x4 ports = u32x4_splat (tcp->ports.src_and_dst);
136   return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, src, dst, ports));
137 }
138
139 static_always_inline uword
140 ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4)
141 {
142   u32x4 zero = {0};
143   return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero));
144 }
145
146 static_always_inline uword
147 ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4)
148 {
149   u32x4 zero = {0};
150   return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero));
151 }
152 #else /* TCP_HAVE_VEC128 */
153 static_always_inline uword
154 ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4,
155                                      u32 src, u32 dst, u32 ports)
156 {
157   u32 r0, r1, r2, r3;
158
159 #define _(i)                                            \
160   r##i = (src == ax4->src.as_ip4_address[i].as_u32      \
161           && dst == ax4->dst.as_ip4_address[i].as_u32   \
162           && ports == ax4->ports.as_ports[i].as_u32)
163
164   _ (0);
165   _ (1);
166   _ (2);
167   _ (3);
168
169 #undef _
170
171   return (((r0 ? 0xf : 0x0) << 0)
172           | ((r1 ? 0xf : 0x0) << 4)
173           | ((r2 ? 0xf : 0x0) << 8)
174           | ((r3 ? 0xf : 0x0) << 12));
175 }
176
177 static_always_inline uword
178 ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4,
179                               ip4_header_t * ip,
180                               tcp_header_t * tcp)
181 {
182   return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4,
183                                                             ip->src_address.as_u32,
184                                                             ip->dst_address.as_u32,
185                                                             tcp->ports.src_and_dst));
186 }
187
188 static_always_inline uword
189 ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4)
190 {
191   return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0));
192 }
193
194 static_always_inline uword
195 ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4)
196 {
197   return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0));
198 }
199 #endif
200
201 static u8 * format_ip4_tcp_udp_address_x4 (u8 * s, va_list * va)
202 {
203   ip4_tcp_udp_address_x4_t * a = va_arg (*va, ip4_tcp_udp_address_x4_t *);  
204   u32 ai = va_arg (*va, u32);
205   ASSERT (ai < 4);
206
207   s = format (s, "%U:%d -> %U:%d",
208               format_ip4_address, &a->src.as_ip4_address[ai],
209               clib_net_to_host_u16 (a->ports.as_ports[ai].src),
210               format_ip4_address, &a->dst.as_ip4_address[ai],
211               clib_net_to_host_u16 (a->ports.as_ports[ai].dst));
212
213   return s;
214 }
215
216 static_always_inline void
217 ip6_tcp_udp_address_x4_set_from_headers (ip6_tcp_udp_address_x4_t * a,
218                                          ip6_header_t * ip,
219                                          tcp_header_t * tcp,
220                                          u32 i)
221 {
222   a->src.as_u32[0][i] = ip->src_address.as_u32[0];
223   a->src.as_u32[1][i] = ip->src_address.as_u32[1];
224   a->src.as_u32[2][i] = ip->src_address.as_u32[2];
225   a->src.as_u32[3][i] = ip->src_address.as_u32[3];
226   a->dst.as_u32[0][i] = ip->dst_address.as_u32[0];
227   a->dst.as_u32[1][i] = ip->dst_address.as_u32[1];
228   a->dst.as_u32[2][i] = ip->dst_address.as_u32[2];
229   a->dst.as_u32[3][i] = ip->dst_address.as_u32[3];
230   a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst;
231 }
232
233 static_always_inline void
234 ip6_tcp_udp_address_x4_copy_and_invalidate (ip6_tcp_udp_address_x4_t * dst,
235                                             ip6_tcp_udp_address_x4_t * src,
236                                             u32 dst_i, u32 src_i)
237 {
238 #define _(d,s) d = s; s = 0;
239   _ (dst->src.as_u32[0][dst_i], src->src.as_u32[0][src_i]);
240   _ (dst->src.as_u32[1][dst_i], src->src.as_u32[1][src_i]);
241   _ (dst->src.as_u32[2][dst_i], src->src.as_u32[2][src_i]);
242   _ (dst->src.as_u32[3][dst_i], src->src.as_u32[3][src_i]);
243   _ (dst->dst.as_u32[0][dst_i], src->dst.as_u32[0][src_i]);
244   _ (dst->dst.as_u32[1][dst_i], src->dst.as_u32[1][src_i]);
245   _ (dst->dst.as_u32[2][dst_i], src->dst.as_u32[2][src_i]);
246   _ (dst->dst.as_u32[3][dst_i], src->dst.as_u32[3][src_i]);
247   _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32);
248 #undef _
249 }
250
251 static_always_inline void
252 ip6_tcp_udp_address_x4_invalidate (ip6_tcp_udp_address_x4_t * a, u32 i)
253 {
254   a->src.as_u32[0][i] = 0;
255   a->src.as_u32[1][i] = 0;
256   a->src.as_u32[2][i] = 0;
257   a->src.as_u32[3][i] = 0;
258   a->dst.as_u32[0][i] = 0;
259   a->dst.as_u32[1][i] = 0;
260   a->dst.as_u32[2][i] = 0;
261   a->dst.as_u32[3][i] = 0;
262   a->ports.as_ports[i].as_u32 = 0;
263 }
264
265 static_always_inline uword
266 ip6_tcp_udp_address_x4_is_valid (ip6_tcp_udp_address_x4_t * a, u32 i)
267 {
268   return !(a->src.as_u32[0][i] == 0
269            && a->src.as_u32[1][i] == 0
270            && a->src.as_u32[2][i] == 0
271            && a->src.as_u32[3][i] == 0
272            && a->dst.as_u32[0][i] == 0
273            && a->dst.as_u32[1][i] == 0
274            && a->dst.as_u32[2][i] == 0
275            && a->dst.as_u32[3][i] == 0
276            && a->ports.as_ports[i].as_u32 == 0);
277 }
278
279 #ifdef TCP_HAVE_VEC128
280 static_always_inline uword
281 ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4,
282                                      u32x4 src0, u32x4 src1, u32x4 src2, u32x4 src3,
283                                      u32x4 dst0, u32x4 dst1, u32x4 dst2, u32x4 dst3,
284                                      u32x4 ports)
285 {
286   u32x4 r;
287   u32 m;
288
289   r = u32x4_is_equal (src0, ax4->src.as_u32x4[0]);
290   r &= u32x4_is_equal (src1, ax4->src.as_u32x4[1]);
291   r &= u32x4_is_equal (src2, ax4->src.as_u32x4[2]);
292   r &= u32x4_is_equal (src3, ax4->src.as_u32x4[3]);
293   r &= u32x4_is_equal (dst0, ax4->dst.as_u32x4[0]);
294   r &= u32x4_is_equal (dst1, ax4->dst.as_u32x4[1]);
295   r &= u32x4_is_equal (dst2, ax4->dst.as_u32x4[2]);
296   r &= u32x4_is_equal (dst3, ax4->dst.as_u32x4[3]);
297   r &= u32x4_is_equal (ports, ax4->ports.as_u32x4);
298
299   /* At this point r will be either all zeros (if nothing matched)
300      or have 32 1s in the position that did match. */
301   m = u8x16_compare_byte_mask ((u8x16) r);
302
303   return m;
304 }
305
306 static_always_inline uword
307 ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4,
308                               ip6_header_t * ip,
309                               tcp_header_t * tcp)
310 {
311   u32x4 src0 = u32x4_splat (ip->src_address.as_u32[0]);
312   u32x4 src1 = u32x4_splat (ip->src_address.as_u32[1]);
313   u32x4 src2 = u32x4_splat (ip->src_address.as_u32[2]);
314   u32x4 src3 = u32x4_splat (ip->src_address.as_u32[3]);
315   u32x4 dst0 = u32x4_splat (ip->dst_address.as_u32[0]);
316   u32x4 dst1 = u32x4_splat (ip->dst_address.as_u32[1]);
317   u32x4 dst2 = u32x4_splat (ip->dst_address.as_u32[2]);
318   u32x4 dst3 = u32x4_splat (ip->dst_address.as_u32[3]);
319   u32x4 ports = u32x4_splat (tcp->ports.src_and_dst);
320   return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
321                                                             src0, src1, src2, src3,
322                                                             dst0, dst1, dst2, dst3,
323                                                             ports));
324 }
325
326 static_always_inline uword
327 ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4)
328 {
329   u32x4 zero = {0};
330   return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
331                                                             zero, zero, zero, zero,
332                                                             zero, zero, zero, zero,
333                                                             zero));
334 }
335
336 static_always_inline uword
337 ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4)
338 {
339   u32x4 zero = {0};
340   return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4,
341                                                             zero, zero, zero, zero,
342                                                             zero, zero, zero, zero,
343                                                             zero));
344 }
345 #else /* TCP_HAVE_VEC128 */
346 static_always_inline uword
347 ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4,
348                                      u32 src0, u32 src1, u32 src2, u32 src3,
349                                      u32 dst0, u32 dst1, u32 dst2, u32 dst3,
350                                      u32 ports)
351 {
352   u32 r0, r1, r2, r3;
353
354 #define _(i)                                                    \
355   r##i = (src0 == ax4->src.as_u32[i][0]         \
356           && src1 == ax4->src.as_u32[i][1]      \
357           && src2 == ax4->src.as_u32[i][2]      \
358           && src3 == ax4->src.as_u32[i][3]      \
359           && dst0 == ax4->dst.as_u32[i][0]      \
360           && dst1 == ax4->dst.as_u32[i][1]      \
361           && dst2 == ax4->dst.as_u32[i][2]      \
362           && dst3 == ax4->dst.as_u32[i][3]      \
363           && ports == ax4->ports.as_ports[i].as_u32)
364
365   _ (0);
366   _ (1);
367   _ (2);
368   _ (3);
369
370 #undef _
371
372   return (((r0 ? 0xf : 0x0) << 0)
373           | ((r1 ? 0xf : 0x0) << 4)
374           | ((r2 ? 0xf : 0x0) << 8)
375           | ((r3 ? 0xf : 0x0) << 12));
376 }
377
378 static_always_inline uword
379 ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4,
380                               ip6_header_t * ip,
381                               tcp_header_t * tcp)
382 {
383   u32 src0 = ip->src_address.as_u32[0];
384   u32 src1 = ip->src_address.as_u32[1];
385   u32 src2 = ip->src_address.as_u32[2];
386   u32 src3 = ip->src_address.as_u32[3];
387   u32 dst0 = ip->dst_address.as_u32[0];
388   u32 dst1 = ip->dst_address.as_u32[1];
389   u32 dst2 = ip->dst_address.as_u32[2];
390   u32 dst3 = ip->dst_address.as_u32[3];
391   u32 ports = tcp->ports.src_and_dst;
392   return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
393                                                             src0, src1, src2, src3,
394                                                             dst0, dst1, dst2, dst3,
395                                                             ports));
396 }
397
398 static_always_inline uword
399 ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4)
400 {
401   return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
402                                                             0, 0, 0, 0,
403                                                             0, 0, 0, 0,
404                                                             0));
405 }
406
407 static_always_inline uword
408 ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4)
409 {
410   return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4,
411                                                             0, 0, 0, 0,
412                                                             0, 0, 0, 0,
413                                                             0));
414 }
415 #endif /* ! TCP_HAVE_VEC128 */
416
417 static u8 * format_ip6_tcp_udp_address_x4 (u8 * s, va_list * va)
418 {
419   ip6_tcp_udp_address_x4_t * a = va_arg (*va, ip6_tcp_udp_address_x4_t *);  
420   u32 i, ai = va_arg (*va, u32);
421   ip6_address_t src, dst;
422
423   ASSERT (ai < 4);
424   for (i = 0; i < 4; i++)
425     {
426       src.as_u32[i] = a->src.as_u32[i][ai];
427       dst.as_u32[i] = a->dst.as_u32[i][ai];
428     }
429
430   s = format (s, "%U:%d -> %U:%d",
431               format_ip6_address, &src,
432               clib_net_to_host_u16 (a->ports.as_ports[ai].src),
433               format_ip6_address, &dst,
434               clib_net_to_host_u16 (a->ports.as_ports[ai].dst));
435
436   return s;
437 }
438
439 static_always_inline u32
440 find_oldest_timestamp_x4 (u32 * time_stamps, u32 now)
441 {
442   u32 dt0, dt_min0, i_min0;
443   u32 dt1, dt_min1, i_min1;
444
445   i_min0 = i_min1 = 0;
446   dt_min0 = now - time_stamps[0];
447   dt_min1 = now - time_stamps[2];
448   dt0 = now - time_stamps[1];
449   dt1 = now - time_stamps[3];
450
451   i_min0 += dt0 > dt_min0;
452   i_min1 += dt1 > dt_min1;
453
454   dt_min0 = i_min0 > 0 ? dt0 : dt_min0;
455   dt_min1 = i_min1 > 0 ? dt1 : dt_min1;
456
457   return dt_min0 > dt_min1 ? i_min0 : (2 + i_min1);
458 }
459
460 static_always_inline uword
461 tcp_round_trip_time_stats_is_valid (tcp_round_trip_time_stats_t * s)
462 { return s->count > 0; }
463
464 static_always_inline void
465 tcp_round_trip_time_stats_compute (tcp_round_trip_time_stats_t * s, f64 * r)
466 {
467   f64 ave, rms;
468   ASSERT (s->count > 0);
469   ave = s->sum / s->count;
470   rms = sqrt (s->sum2 / s->count - ave*ave);
471   r[0] = ave;
472   r[1] = rms;
473 }
474
475 typedef struct {
476   tcp_option_type_t type : 8;
477   u8 length;
478   u32 my_time_stamp, his_time_stamp;
479 } __attribute__ ((packed)) tcp_time_stamp_option_t;
480
481 typedef struct {
482   tcp_header_t header;
483
484   struct {
485     struct {
486       tcp_option_type_t type : 8;
487       u8 length;
488       u16 value;
489     } mss;
490
491     struct {
492       tcp_option_type_t type : 8;
493       u8 length;
494       u8 value;
495     } __attribute__ ((packed)) window_scale;
496
497     u8 nops[3];
498
499     tcp_time_stamp_option_t time_stamp;
500   } __attribute__ ((packed)) options;
501 } __attribute__ ((packed)) tcp_syn_packet_t;
502
503 typedef struct {
504   tcp_header_t header;
505
506   struct {
507     u8 nops[2];
508
509     tcp_time_stamp_option_t time_stamp;
510   } options;
511 } __attribute__ ((packed)) tcp_ack_packet_t;
512
513 typedef struct {
514   ip4_header_t ip4;
515   tcp_syn_packet_t tcp;
516 } ip4_tcp_syn_packet_t;
517
518 typedef struct {
519   ip4_header_t ip4;
520   tcp_ack_packet_t tcp;
521 } ip4_tcp_ack_packet_t;
522
523 typedef struct {
524   ip6_header_t ip6;
525   tcp_syn_packet_t tcp;
526 } ip6_tcp_syn_packet_t;
527
528 typedef struct {
529   ip6_header_t ip6;
530   tcp_ack_packet_t tcp;
531 } ip6_tcp_ack_packet_t;
532
533 static_always_inline void
534 ip4_tcp_packet_init (ip4_header_t * ip, u32 n_bytes)
535 {
536   ip->ip_version_and_header_length = 0x45;
537
538   ip->tos = ip4_main.host_config.tos;
539   ip->ttl = ip4_main.host_config.ttl;
540
541   /* No need to set fragment ID due to DF bit. */
542   ip->flags_and_fragment_offset = clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
543
544   ip->protocol = IP_PROTOCOL_TCP;
545
546   ip->length = clib_host_to_net_u16 (n_bytes);
547
548   ip->checksum = ip4_header_checksum (ip);
549 }
550
551 static_always_inline void
552 ip6_tcp_packet_init (ip6_header_t * ip, u32 n_bytes)
553 {
554   ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
555
556   ip->payload_length = clib_host_to_net_u16 (n_bytes - sizeof (ip[0]));
557
558   ip->hop_limit = ip6_main.host_config.ttl;
559 }
560
561 static_always_inline u32
562 tcp_time_now (tcp_main_t * tm, tcp_timer_type_t t)
563 {
564   ASSERT (t < ARRAY_LEN (tm->log2_clocks_per_tick));
565   return clib_cpu_time_now () >> tm->log2_clocks_per_tick[t];
566 }
567
568 static void
569 tcp_time_init (vlib_main_t * vm, tcp_main_t * tm)
570 {
571   int i;
572   f64 log2 = .69314718055994530941;
573
574   for (i = 0; i < ARRAY_LEN (tm->log2_clocks_per_tick); i++)
575     {
576       static f64 t[] = {
577 #define _(f,r) r,
578         foreach_tcp_timer
579 #undef _
580       };
581       tm->log2_clocks_per_tick[i] =
582         flt_round_nearest (log (t[i] / vm->clib_time.seconds_per_clock) / log2);
583       tm->secs_per_tick[i] = vm->clib_time.seconds_per_clock * (1 << tm->log2_clocks_per_tick[i]);
584     }
585 }
586
587 tcp_main_t tcp_main;
588
589 typedef enum {
590   TCP_LOOKUP_NEXT_DROP,
591   TCP_LOOKUP_NEXT_PUNT,
592   TCP_LOOKUP_NEXT_LISTEN_SYN,
593   TCP_LOOKUP_NEXT_LISTEN_ACK,
594   TCP_LOOKUP_NEXT_CONNECT_SYN_ACK,
595   TCP_LOOKUP_NEXT_ESTABLISHED,
596   TCP_LOOKUP_N_NEXT,
597 } tcp_lookup_next_t;
598
599 #define foreach_tcp_error                                               \
600   _ (NONE, "no error")                                                  \
601   _ (LOOKUP_DROPS, "lookup drops")                                      \
602   _ (LISTEN_RESPONSES, "listen responses sent")                         \
603   _ (CONNECTS_SENT, "connects sent")                                    \
604   _ (LISTENS_ESTABLISHED, "listens connected")                          \
605   _ (UNEXPECTED_SEQ_NUMBER, "unexpected sequence number drops")         \
606   _ (UNEXPECTED_ACK_NUMBER, "unexpected acknowledgment number drops")   \
607   _ (CONNECTS_ESTABLISHED, "connects established")                      \
608   _ (NO_LISTENER_FOR_PORT, "no listener for port")                      \
609   _ (WRONG_LOCAL_ADDRESS_FOR_PORT, "wrong local address for port")      \
610   _ (ACKS_SENT, "acks sent for established connections")                \
611   _ (NO_DATA, "acks with no data")                                      \
612   _ (FINS_RECEIVED, "fins received")                                    \
613   _ (SEGMENT_AFTER_FIN, "segments dropped after fin received")          \
614   _ (CONNECTIONS_CLOSED, "connections closed")
615
616 typedef enum {
617 #define _(sym,str) TCP_ERROR_##sym,
618   foreach_tcp_error
619 #undef _
620   TCP_N_ERROR,
621 } tcp_error_t;
622
623 #ifdef TCP_HAVE_VEC128
624 static_always_inline u32x4 u32x4_splat_x2 (u32 x)
625 {
626   u32x4 r = u32x4_set0 (x);
627   return u32x4_interleave_lo (r, r);
628 }
629
630 static_always_inline u32x4 u32x4_set_x2 (u32 x, u32 y)
631 {
632   u32x4 r0 = u32x4_set0 (x);
633   u32x4 r1 = u32x4_set0 (y);
634   return u32x4_interleave_lo (r0, r1);
635 }
636
637 /* FIXME */
638 #define u32x4_get(x,i)                                  \
639   __builtin_ia32_vec_ext_v4si ((i32x4) (x), (int) (i))
640 #else /* TCP_HAVE_VEC128 */
641 #endif /* TCP_HAVE_VEC128 */
642
643 /* Dispatching on tcp/udp listeners (by dst port)
644    and tcp/udp connections (by src/dst address/port). */
645 static_always_inline uword
646 ip46_tcp_lookup (vlib_main_t * vm,
647                  vlib_node_runtime_t * node,
648                  vlib_frame_t * frame,
649                  uword is_ip6)
650 {
651   tcp_main_t * tm = &tcp_main;
652   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
653   uword n_packets = frame->n_vectors;
654   u32 * from, * to_next;
655   u32 n_left_from, n_left_to_next, next, mini_now;
656   vlib_node_runtime_t * error_node = node;
657
658   from = vlib_frame_vector_args (frame);
659   n_left_from = n_packets;
660   next = node->cached_next_index;
661   mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection);
662   
663   while (n_left_from > 0)
664     {
665       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
666
667       while (n_left_from > 0 && n_left_to_next > 0)
668         {
669           vlib_buffer_t * p0;
670           ip6_header_t * ip60;
671           ip4_header_t * ip40;
672           tcp_header_t * tcp0;
673           u32 bi0, imin0, iest0, li0;
674           tcp_connection_state_t state0;
675           u8 error0, next0;
676           u8 min_match0, est_match0, is_min_match0, is_est_match0;
677           u8 min_oldest0, est_first_empty0;
678       
679           bi0 = to_next[0] = from[0];
680
681           from += 1;
682           n_left_from -= 1;
683           to_next += 1;
684           n_left_to_next -= 1;
685       
686           p0 = vlib_get_buffer (vm, bi0);
687
688 #ifdef TCP_HAVE_VEC128
689           {
690             u32x4 a0, b0, c0;
691
692             a0 = tm->connection_hash_seeds[is_ip6][0].as_u32x4;
693             b0 = tm->connection_hash_seeds[is_ip6][1].as_u32x4;
694             c0 = tm->connection_hash_seeds[is_ip6][2].as_u32x4;
695
696             if (is_ip6)
697               {
698                 ip60 = vlib_buffer_get_current (p0);
699                 tcp0 = ip6_next_header (ip60);
700
701                 a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[0]);
702                 b0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[1]);
703                 c0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[2]);
704
705                 hash_v3_mix_u32x (a0, b0, c0);
706
707                 a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[3]);
708                 b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[0]);
709                 c0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[1]);
710
711                 hash_v3_mix_u32x (a0, b0, c0);
712
713                 a0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[2]);
714                 b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[3]);
715                 c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst);
716               }
717             else
718               {
719                 ip40 = vlib_buffer_get_current (p0);
720                 tcp0 = ip4_next_header (ip40);
721
722                 a0 ^= u32x4_splat_x2 (ip40->src_address.as_u32);
723                 b0 ^= u32x4_splat_x2 (ip40->dst_address.as_u32);
724                 c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst);
725               }
726
727             hash_v3_finalize_u32x (a0, b0, c0);
728
729             c0 &= tm->connection_hash_masks[is_ip6].as_u32x4;
730
731             imin0 = u32x4_get0 (c0);
732             iest0 = u32x4_get (c0, 1);
733           }
734 #else
735           {
736             u32 a00, a01, b00, b01, c00, c01;
737
738             a00 = tm->connection_hash_seeds[is_ip6][0].as_u32[0];
739             a01 = tm->connection_hash_seeds[is_ip6][0].as_u32[1];
740             b00 = tm->connection_hash_seeds[is_ip6][1].as_u32[0];
741             b01 = tm->connection_hash_seeds[is_ip6][1].as_u32[1];
742             c00 = tm->connection_hash_seeds[is_ip6][2].as_u32[0];
743             c01 = tm->connection_hash_seeds[is_ip6][2].as_u32[1];
744
745             if (is_ip6)
746               {
747                 ip60 = vlib_buffer_get_current (p0);
748                 tcp0 = ip6_next_header (ip60);
749
750                 a00 ^= ip60->src_address.as_u32[0];
751                 a01 ^= ip60->src_address.as_u32[0];
752                 b00 ^= ip60->src_address.as_u32[1];
753                 b01 ^= ip60->src_address.as_u32[1];
754                 c00 ^= ip60->src_address.as_u32[2];
755                 c01 ^= ip60->src_address.as_u32[2];
756
757                 hash_v3_mix32 (a00, b00, c00);
758                 hash_v3_mix32 (a01, b01, c01);
759
760                 a00 ^= ip60->src_address.as_u32[3];
761                 a01 ^= ip60->src_address.as_u32[3];
762                 b00 ^= ip60->dst_address.as_u32[0];
763                 b01 ^= ip60->dst_address.as_u32[0];
764                 c00 ^= ip60->dst_address.as_u32[1];
765                 c01 ^= ip60->dst_address.as_u32[1];
766
767                 hash_v3_mix32 (a00, b00, c00);
768                 hash_v3_mix32 (a01, b01, c01);
769
770                 a00 ^= ip60->dst_address.as_u32[2];
771                 a01 ^= ip60->dst_address.as_u32[2];
772                 b00 ^= ip60->dst_address.as_u32[3];
773                 b01 ^= ip60->dst_address.as_u32[3];
774                 c00 ^= tcp0->ports.src_and_dst;
775                 c01 ^= tcp0->ports.src_and_dst;
776               }
777             else
778               {
779                 ip40 = vlib_buffer_get_current (p0);
780                 tcp0 = ip4_next_header (ip40);
781
782                 a00 ^= ip40->src_address.as_u32;
783                 a01 ^= ip40->src_address.as_u32;
784                 b00 ^= ip40->dst_address.as_u32;
785                 b01 ^= ip40->dst_address.as_u32;
786                 c00 ^= tcp0->ports.src_and_dst;
787                 c01 ^= tcp0->ports.src_and_dst;
788               }
789
790             hash_v3_finalize32 (a00, b00, c00);
791             hash_v3_finalize32 (a01, b01, c01);
792
793             c00 &= tm->connection_hash_masks[is_ip6].as_u32[0];
794             c01 &= tm->connection_hash_masks[is_ip6].as_u32[1];
795
796             imin0 = c00;
797             iest0 = c01;
798           }
799 #endif
800
801           if (is_ip6)
802             {
803               ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
804               ip6_tcp_udp_address_x4_t * esta0;
805
806               mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0);
807               esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0);
808
809               min_match0 = ip6_tcp_udp_address_x4_match (&mina0->address_x4, ip60, tcp0);
810               est_match0 = ip6_tcp_udp_address_x4_match (esta0, ip60, tcp0);
811
812               min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now);
813               est_first_empty0 = ip6_tcp_udp_address_x4_first_empty (esta0);
814
815               if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0))
816                 {
817                   /* Lookup in overflow hash. */
818                   ASSERT (0);
819                 }
820             }
821           else
822             {
823               ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
824               ip4_tcp_udp_address_x4_t * esta0;
825
826               mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0);
827               esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0);
828
829               min_match0 = ip4_tcp_udp_address_x4_match (&mina0->address_x4, ip40, tcp0);
830               est_match0 = ip4_tcp_udp_address_x4_match (esta0, ip40, tcp0);
831
832               min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now);
833               est_first_empty0 = ip4_tcp_udp_address_x4_first_empty (esta0);
834
835               if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0))
836                 {
837                   /* Lookup in overflow hash. */
838                   ASSERT (0);
839                 }
840             }
841
842           is_min_match0 = min_match0 < 4;
843           is_est_match0 = est_match0 < 4;
844
845           imin0 = 4 * imin0 + (is_min_match0 ? min_match0 : min_oldest0);
846           iest0 = 4 * iest0 + (is_est_match0 ? est_match0 : est_first_empty0);
847
848           /* Should simultaneously not match both in mini and established connection tables. */
849           ASSERT (! (is_min_match0 && is_est_match0));
850
851           {
852             tcp_mini_connection_t * min0;
853             tcp_connection_t * est0;
854             tcp_sequence_pair_t * seq_pair0;
855             u8 flags0;
856
857             min0 = vec_elt_at_index (tm46->mini_connections, imin0);
858             est0 = vec_elt_at_index (tm46->established_connections, iest0);
859
860             if (min_match0 < 4)
861               {
862                 ASSERT (min0->state != TCP_CONNECTION_STATE_unused);
863                 ASSERT (min0->state != TCP_CONNECTION_STATE_established);
864               }
865
866             seq_pair0 = is_min_match0 ? &min0->sequence_numbers : &est0->sequence_numbers;
867
868             state0 = is_min_match0 ? min0->state : TCP_CONNECTION_STATE_unused;
869             state0 = is_est_match0 ? TCP_CONNECTION_STATE_established : state0;
870
871             vnet_buffer (p0)->ip.tcp.established_connection_index = iest0;
872             vnet_buffer (p0)->ip.tcp.mini_connection_index = imin0;
873             vnet_buffer (p0)->ip.tcp.listener_index = li0 = tm->listener_index_by_dst_port[tcp0->ports.dst];
874
875             flags0 = tcp0->flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_FIN);
876
877             next0 = tm->disposition_by_state_and_flags[state0][flags0].next;
878             error0 = tm->disposition_by_state_and_flags[state0][flags0].error;
879
880             next0 = li0 != 0 ? next0 : TCP_LOOKUP_NEXT_PUNT;
881             error0 = li0 != 0 ? error0 : TCP_ERROR_NO_LISTENER_FOR_PORT;
882           }
883
884           p0->error = error_node->errors[error0];
885
886           if (PREDICT_FALSE (next0 != next))
887             {
888               to_next -= 1;
889               n_left_to_next += 1;
890
891               vlib_put_next_frame (vm, node, next, n_left_to_next);
892
893               next = next0;
894               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
895               to_next[0] = bi0;
896               to_next += 1;
897               n_left_to_next -= 1;
898             }
899         }
900   
901       vlib_put_next_frame (vm, node, next, n_left_to_next);
902     }
903
904   if (node->flags & VLIB_NODE_FLAG_TRACE)
905     /* FIXME */ ;
906
907   return frame->n_vectors;
908 }
909
910 static uword
911 ip4_tcp_lookup (vlib_main_t * vm,
912                 vlib_node_runtime_t * node,
913                 vlib_frame_t * frame)
914 { return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 0); }
915
916 static uword
917 ip6_tcp_lookup (vlib_main_t * vm,
918                 vlib_node_runtime_t * node,
919                 vlib_frame_t * frame)
920 { return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 1); }
921
922 static void
923 ip46_size_hash_tables (ip46_tcp_main_t * m)
924 {
925   m->mini_connection_hash_mask = pow2_mask (m->log2_n_mini_connection_hash_elts);
926   vec_validate_aligned (m->mini_connections,
927                         m->mini_connection_hash_mask,
928                         CLIB_CACHE_LINE_BYTES);
929
930   m->established_connection_hash_mask = pow2_mask (m->log2_n_established_connection_hash_elts);
931   vec_validate_aligned (m->established_connections,
932                         m->established_connection_hash_mask,
933                         CLIB_CACHE_LINE_BYTES);
934 }
935
936 static void
937 ip46_tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm, int is_ip6)
938 {
939   ip46_tcp_main_t * m = is_ip6 ? &tm->ip6 : &tm->ip4;
940
941   m->is_ip6 = is_ip6;
942
943   m->log2_n_mini_connection_hash_elts = 8;
944   m->log2_n_established_connection_hash_elts = 8;
945   ip46_size_hash_tables (m);
946
947   if (is_ip6)
948     {
949       vec_validate_aligned (tm->ip6_mini_connection_address_hash,
950                             m->mini_connection_hash_mask / 4,
951                             CLIB_CACHE_LINE_BYTES);
952       vec_validate_aligned (tm->ip6_established_connection_address_hash,
953                             m->established_connection_hash_mask / 4,
954                             CLIB_CACHE_LINE_BYTES);
955     }
956   else
957     {
958       vec_validate_aligned (tm->ip4_mini_connection_address_hash,
959                             m->mini_connection_hash_mask / 4,
960                             CLIB_CACHE_LINE_BYTES);
961       vec_validate_aligned (tm->ip4_established_connection_address_hash,
962                             m->established_connection_hash_mask / 4,
963                             CLIB_CACHE_LINE_BYTES);
964     }
965   tm->connection_hash_masks[is_ip6].as_u32[0] = m->mini_connection_hash_mask / 4;
966   tm->connection_hash_masks[is_ip6].as_u32[1] = m->established_connection_hash_mask / 4;
967 }
968
969 static void
970 tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm)
971 {
972   int is_ip6;
973
974   /* Initialize hash seeds. */
975   for (is_ip6 = 0; is_ip6 < 2; is_ip6++)
976     {
977       u32 * r = clib_random_buffer_get_data (&vm->random_buffer, 3 * 2 * sizeof (r[0]));
978       tm->connection_hash_seeds[is_ip6][0].as_u32[0] = r[0];
979       tm->connection_hash_seeds[is_ip6][0].as_u32[1] = r[1];
980       tm->connection_hash_seeds[is_ip6][1].as_u32[0] = r[2];
981       tm->connection_hash_seeds[is_ip6][1].as_u32[1] = r[3];
982       tm->connection_hash_seeds[is_ip6][2].as_u32[0] = r[4];
983       tm->connection_hash_seeds[is_ip6][2].as_u32[1] = r[5];
984
985       ip46_tcp_lookup_init (vm, tm, is_ip6);
986     }
987
988   {
989     tcp_listener_t * l;
990
991     pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES);
992
993     /* Null listener must always have zero index. */
994     ASSERT (l - tm->listener_pool == 0);
995
996     memset (l, 0, sizeof (l[0]));
997
998     /* No adjacencies are valid. */
999     l->valid_local_adjacency_bitmap = 0;
1000
1001     vec_validate_init_empty (tm->listener_index_by_dst_port,
1002                              (1 << 16) - 1,
1003                              l - tm->listener_pool);
1004   }
1005
1006   /* Initialize disposition table. */
1007   {
1008     int i, j;
1009     for (i = 0; i < ARRAY_LEN (tm->disposition_by_state_and_flags); i++)
1010       for (j = 0; j < ARRAY_LEN (tm->disposition_by_state_and_flags[i]); j++)
1011         {
1012           tm->disposition_by_state_and_flags[i][j].next = TCP_LOOKUP_NEXT_DROP;
1013           tm->disposition_by_state_and_flags[i][j].error = TCP_ERROR_LOOKUP_DROPS;
1014         }
1015
1016 #define _(t,f,n,e)                                                      \
1017 do {                                                                    \
1018     tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].next = (n); \
1019     tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].error = (e); \
1020 } while (0)
1021
1022     /* SYNs for new connections -> tcp-listen. */
1023     _ (unused, TCP_FLAG_SYN,
1024        TCP_LOOKUP_NEXT_LISTEN_SYN, TCP_ERROR_NONE);
1025     _ (listen_ack_wait, TCP_FLAG_ACK,
1026        TCP_LOOKUP_NEXT_LISTEN_ACK, TCP_ERROR_NONE);
1027     _ (established, TCP_FLAG_ACK,
1028        TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE);
1029     _ (established, TCP_FLAG_FIN | TCP_FLAG_ACK,
1030        TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE);
1031
1032 #undef _
1033   }
1034
1035   /* IP4 packet templates. */
1036   {
1037     ip4_tcp_syn_packet_t ip4_syn, ip4_syn_ack;
1038     ip4_tcp_ack_packet_t ip4_ack, ip4_fin_ack, ip4_rst_ack;
1039     ip6_tcp_syn_packet_t ip6_syn, ip6_syn_ack;
1040     ip6_tcp_ack_packet_t ip6_ack, ip6_fin_ack, ip6_rst_ack;
1041
1042     memset (&ip4_syn, 0, sizeof (ip4_syn));
1043     memset (&ip4_syn_ack, 0, sizeof (ip4_syn_ack));
1044     memset (&ip4_ack, 0, sizeof (ip4_ack));
1045     memset (&ip4_fin_ack, 0, sizeof (ip4_fin_ack));
1046     memset (&ip4_rst_ack, 0, sizeof (ip4_rst_ack));
1047     memset (&ip6_syn, 0, sizeof (ip6_syn));
1048     memset (&ip6_syn_ack, 0, sizeof (ip6_syn_ack));
1049     memset (&ip6_ack, 0, sizeof (ip6_ack));
1050     memset (&ip6_fin_ack, 0, sizeof (ip6_fin_ack));
1051     memset (&ip6_rst_ack, 0, sizeof (ip6_rst_ack));
1052
1053     ip4_tcp_packet_init (&ip4_syn.ip4, sizeof (ip4_syn));
1054     ip4_tcp_packet_init (&ip4_syn_ack.ip4, sizeof (ip4_syn_ack));
1055     ip4_tcp_packet_init (&ip4_ack.ip4, sizeof (ip4_ack));
1056     ip4_tcp_packet_init (&ip4_fin_ack.ip4, sizeof (ip4_fin_ack));
1057     ip4_tcp_packet_init (&ip4_rst_ack.ip4, sizeof (ip4_rst_ack));
1058
1059     ip6_tcp_packet_init (&ip6_syn.ip6, sizeof (ip6_syn));
1060     ip6_tcp_packet_init (&ip6_syn_ack.ip6, sizeof (ip6_syn_ack));
1061     ip6_tcp_packet_init (&ip6_ack.ip6, sizeof (ip6_ack));
1062     ip6_tcp_packet_init (&ip6_fin_ack.ip6, sizeof (ip6_fin_ack));
1063     ip6_tcp_packet_init (&ip6_rst_ack.ip6, sizeof (ip6_rst_ack));
1064
1065     /* TCP header. */
1066     {
1067       u8 window_scale = 7;
1068       tcp_syn_packet_t * s = &ip4_syn.tcp;
1069       tcp_syn_packet_t * sa = &ip4_syn_ack.tcp;
1070       tcp_ack_packet_t * a = &ip4_ack.tcp;
1071       tcp_ack_packet_t * fa = &ip4_fin_ack.tcp;
1072       tcp_ack_packet_t * ra = &ip4_rst_ack.tcp;
1073
1074       s->header.tcp_header_u32s_and_reserved = (sizeof (s[0]) / sizeof (u32)) << 4;
1075       a->header.tcp_header_u32s_and_reserved = (sizeof (a[0]) / sizeof (u32)) << 4;
1076
1077       s->header.flags = TCP_FLAG_SYN;
1078       a->header.flags = TCP_FLAG_ACK;
1079
1080       s->header.window = clib_host_to_net_u16 (32 << (10 - window_scale));
1081       a->header.window = s->header.window;
1082
1083       s->options.mss.type = TCP_OPTION_MSS;
1084       s->options.mss.length = 4;
1085
1086       s->options.window_scale.type = TCP_OPTION_WINDOW_SCALE;
1087       s->options.window_scale.length = 3;
1088       s->options.window_scale.value = window_scale;
1089
1090       s->options.time_stamp.type = TCP_OPTION_TIME_STAMP;
1091       s->options.time_stamp.length = 10;
1092
1093       memset (&s->options.nops, TCP_OPTION_NOP, sizeof (s->options.nops));
1094
1095       /* SYN-ACK is same as SYN but with ACK flag set. */
1096       sa[0] = s[0];
1097       sa->header.flags |= TCP_FLAG_ACK;
1098
1099       a->options.time_stamp.type = TCP_OPTION_TIME_STAMP;
1100       a->options.time_stamp.length = 10;
1101       memset (&a->options.nops, TCP_OPTION_NOP, sizeof (a->options.nops));
1102
1103       /* {FIN,RST}-ACK are same as ACK but with {FIN,RST} flag set. */
1104       fa[0] = a[0];
1105       fa->header.flags |= TCP_FLAG_FIN;
1106       ra[0] = a[0];
1107       ra->header.flags |= TCP_FLAG_RST;
1108
1109       /* IP6 TCP headers are identical. */
1110       ip6_syn.tcp = s[0];
1111       ip6_syn_ack.tcp = sa[0];
1112       ip6_ack.tcp = a[0];
1113       ip6_fin_ack.tcp = fa[0];
1114       ip6_rst_ack.tcp = ra[0];
1115
1116       /* TCP checksums. */
1117       {
1118         ip_csum_t sum;
1119
1120         sum = clib_host_to_net_u32 (sizeof (ip4_ack.tcp) + (ip4_ack.ip4.protocol << 16));
1121         sum = ip_incremental_checksum (sum, &ip4_ack.tcp, sizeof (ip4_ack.tcp));
1122         ip4_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1123
1124         sum = clib_host_to_net_u32 (sizeof (ip4_fin_ack.tcp) + (ip4_fin_ack.ip4.protocol << 16));
1125         sum = ip_incremental_checksum (sum, &ip4_fin_ack.tcp, sizeof (ip4_fin_ack.tcp));
1126         ip4_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1127
1128         sum = clib_host_to_net_u32 (sizeof (ip4_rst_ack.tcp) + (ip4_rst_ack.ip4.protocol << 16));
1129         sum = ip_incremental_checksum (sum, &ip4_rst_ack.tcp, sizeof (ip4_rst_ack.tcp));
1130         ip4_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1131
1132         sum = clib_host_to_net_u32 (sizeof (ip4_syn.tcp) + (ip4_syn.ip4.protocol << 16));
1133         sum = ip_incremental_checksum (sum, &ip4_syn.tcp, sizeof (ip4_syn.tcp));
1134         ip4_syn.tcp.header.checksum = ~ ip_csum_fold (sum);
1135
1136         sum = clib_host_to_net_u32 (sizeof (ip4_syn_ack.tcp) + (ip4_syn_ack.ip4.protocol << 16));
1137         sum = ip_incremental_checksum (sum, &ip4_syn_ack.tcp, sizeof (ip4_syn_ack.tcp));
1138         ip4_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1139
1140         sum = clib_host_to_net_u32 (sizeof (ip6_ack.tcp)) + ip6_ack.ip6.protocol;
1141         sum = ip_incremental_checksum (sum, &ip6_ack.tcp, sizeof (ip6_ack.tcp));
1142         ip6_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1143
1144         sum = clib_host_to_net_u32 (sizeof (ip6_fin_ack.tcp)) + ip6_fin_ack.ip6.protocol;
1145         sum = ip_incremental_checksum (sum, &ip6_fin_ack.tcp, sizeof (ip6_fin_ack.tcp));
1146         ip6_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1147
1148         sum = clib_host_to_net_u32 (sizeof (ip6_rst_ack.tcp)) + ip6_rst_ack.ip6.protocol;
1149         sum = ip_incremental_checksum (sum, &ip6_rst_ack.tcp, sizeof (ip6_rst_ack.tcp));
1150         ip6_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1151
1152         sum = clib_host_to_net_u32 (sizeof (ip6_syn.tcp)) + ip6_syn.ip6.protocol;
1153         sum = ip_incremental_checksum (sum, &ip6_syn.tcp, sizeof (ip6_syn.tcp));
1154         ip6_syn.tcp.header.checksum = ~ ip_csum_fold (sum);
1155
1156         sum = clib_host_to_net_u32 (sizeof (ip6_syn_ack.tcp)) + ip6_syn_ack.ip6.protocol;
1157         sum = ip_incremental_checksum (sum, &ip6_syn_ack.tcp, sizeof (ip6_syn_ack.tcp));
1158         ip6_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1159       }
1160     }
1161
1162 #define _(t,x,n)                                                \
1163 do {                                                            \
1164   vlib_packet_template_init                                     \
1165     (vm,                                                        \
1166      &tm->ip4.packet_templates[t].vlib,                         \
1167      &x, sizeof (x),                                            \
1168      /* alloc chunk size */ VLIB_FRAME_SIZE,                    \
1169      (n));                                                      \
1170   tm->ip4.packet_templates[t].tcp_checksum_net_byte_order       \
1171     = x.tcp.header.checksum;                                    \
1172   tm->ip4.packet_templates[t].ip4_checksum_net_byte_order       \
1173     = x.ip4.checksum;                                           \
1174 } while (0)
1175
1176     _ (TCP_PACKET_TEMPLATE_SYN, ip4_syn, "ip4 tcp syn");
1177     _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip4_syn_ack, "ip4 tcp syn-ack");
1178     _ (TCP_PACKET_TEMPLATE_ACK, ip4_ack, "ip4 tcp ack");
1179     _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip4_fin_ack, "ip4 tcp fin-ack");
1180     _ (TCP_PACKET_TEMPLATE_RST_ACK, ip4_rst_ack, "ip4 tcp rst-ack");
1181
1182 #undef _
1183
1184 #define _(t,x,n)                                                \
1185 do {                                                            \
1186   vlib_packet_template_init                                     \
1187     (vm,                                                        \
1188      &tm->ip6.packet_templates[t].vlib,                         \
1189      &x, sizeof (x),                                            \
1190      /* alloc chunk size */ VLIB_FRAME_SIZE,                    \
1191      (n));                                                      \
1192   tm->ip6.packet_templates[t].tcp_checksum_net_byte_order       \
1193     = x.tcp.header.checksum;                                    \
1194   tm->ip6.packet_templates[t].ip4_checksum_net_byte_order       \
1195     = 0xdead;                                                   \
1196 } while (0)
1197
1198     _ (TCP_PACKET_TEMPLATE_SYN, ip6_syn, "ip6 tcp syn");
1199     _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip6_syn_ack, "ip6 tcp syn-ack");
1200     _ (TCP_PACKET_TEMPLATE_ACK, ip6_ack, "ip6 tcp ack");
1201     _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip6_fin_ack, "ip6 tcp fin-ack");
1202     _ (TCP_PACKET_TEMPLATE_RST_ACK, ip6_rst_ack, "ip6 tcp rst-ack");
1203
1204 #undef _
1205   }
1206 }
1207
1208 static char * tcp_error_strings[] = {
1209 #define _(sym,string) string,
1210   foreach_tcp_error
1211 #undef _
1212 };
1213
1214 VLIB_REGISTER_NODE (ip4_tcp_lookup_node,static) = {
1215   .function = ip4_tcp_lookup,
1216   .name = "ip4-tcp-lookup",
1217
1218   .vector_size = sizeof (u32),
1219
1220   .n_next_nodes = TCP_LOOKUP_N_NEXT,
1221   .next_nodes = {
1222     [TCP_LOOKUP_NEXT_DROP] = "error-drop",
1223     [TCP_LOOKUP_NEXT_PUNT] = "error-punt",
1224     [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip4-tcp-listen",
1225     [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish",
1226     [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip4-tcp-connect",
1227     [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip4-tcp-established",
1228   },
1229
1230   .n_errors = TCP_N_ERROR,
1231   .error_strings = tcp_error_strings,
1232 };
1233
1234 VLIB_REGISTER_NODE (ip6_tcp_lookup_node,static) = {
1235   .function = ip6_tcp_lookup,
1236   .name = "ip6-tcp-lookup",
1237
1238   .vector_size = sizeof (u32),
1239
1240   .n_next_nodes = TCP_LOOKUP_N_NEXT,
1241   .next_nodes = {
1242     [TCP_LOOKUP_NEXT_DROP] = "error-drop",
1243     [TCP_LOOKUP_NEXT_PUNT] = "error-punt",
1244     [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip6-tcp-listen",
1245     [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish",
1246     [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip6-tcp-connect",
1247     [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip6-tcp-established",
1248   },
1249
1250   .n_errors = TCP_N_ERROR,
1251   .error_strings = tcp_error_strings,
1252 };
1253
1254 static_always_inline void
1255 tcp_options_decode_for_syn (tcp_main_t * tm, tcp_mini_connection_t * m, tcp_header_t * tcp)
1256 {
1257   u8 * o = (void *) (tcp + 1);
1258   u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
1259   u8 * e = o + n_bytes;
1260   tcp_mini_connection_t * tmpl = &tm->option_decode_mini_connection_template;
1261   tcp_option_type_t t;
1262   u8 i, l, * p;
1263   u8 * option_decode[16];
1264
1265   /* Initialize defaults. */
1266   option_decode[TCP_OPTION_MSS] = (u8 *) &tmpl->max_segment_size;
1267   option_decode[TCP_OPTION_WINDOW_SCALE] = (u8 *) &tmpl->window_scale;
1268   option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &tmpl->time_stamps.his_net_byte_order;
1269
1270   if (n_bytes > 0)
1271     {
1272 #define _                                                       \
1273 do {                                                            \
1274   t = o[0];                                                     \
1275   i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t;      \
1276   option_decode[i] = o + 2;                                     \
1277   /* Skip nop; don't skip end; else length from packet. */      \
1278   l = t < 2 ? t : o[1];                                         \
1279   p = o + l;                                                    \
1280   o = p < e ? p : o;                                            \
1281 } while (0)
1282
1283       _; _; _;
1284       /* Fast path: NOP NOP TIMESTAMP. */
1285       if (o >= e) goto done;
1286       _; _;
1287       if (o >= e) goto done;
1288       _; _; _;
1289
1290 #undef _
1291
1292     done:;
1293     }
1294
1295   m->max_segment_size =
1296     clib_net_to_host_u16 (*(u16 *) option_decode[TCP_OPTION_MSS]);
1297   m->window_scale = *option_decode[TCP_OPTION_WINDOW_SCALE];
1298   m->time_stamps.his_net_byte_order = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0];
1299 }
1300
1301 static_always_inline u32
1302 tcp_options_decode_for_ack (tcp_main_t * tm, tcp_header_t * tcp,
1303                             u32 * his_time_stamp)
1304 {
1305   u8 * o = (void *) (tcp + 1);
1306   u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
1307   u8 * e = o + n_bytes;
1308   tcp_option_type_t t;
1309   u8 i, l, * p;
1310   u8 * option_decode[16];
1311   u32 default_time_stamps[2];
1312
1313   /* Initialize defaults. */
1314   default_time_stamps[0] = default_time_stamps[1] = 0;
1315   option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &default_time_stamps;
1316
1317   if (n_bytes > 0)
1318     {
1319 #define _                                                       \
1320 do {                                                            \
1321   t = o[0];                                                     \
1322   i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t;      \
1323   option_decode[i] = o + 2;                                     \
1324   /* Skip nop; don't skip end; else length from packet. */      \
1325   l = t < 2 ? t : o[1];                                         \
1326   p = o + l;                                                    \
1327   o = p < e ? p : o;                                            \
1328 } while (0)
1329
1330       _; _; _;
1331       /* Fast path: NOP NOP TIMESTAMP. */
1332       if (o >= e) goto done;
1333       _; _;
1334       if (o >= e) goto done;
1335       _; _; _;
1336 #undef _
1337
1338     done:;
1339     }
1340
1341   if (his_time_stamp)
1342     his_time_stamp[0] = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0];
1343
1344   return clib_net_to_host_u32 (((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[1]);
1345 }
1346
1347 static void
1348 tcp_options_decode_init (tcp_main_t * tm)
1349 {
1350   tcp_mini_connection_t * m = &tm->option_decode_mini_connection_template;
1351
1352   memset (m, 0, sizeof (m[0]));
1353   m->max_segment_size = clib_host_to_net_u16 (576 - 40);
1354   m->window_scale = 0;
1355   m->time_stamps.his_net_byte_order = 0;
1356 }
1357
1358 /* Initialize target buffer as "related" to given buffer. */
1359 always_inline void
1360 vlib_buffer_copy_shared_fields (vlib_main_t * vm, vlib_buffer_t * b, u32 bi_target)
1361 {
1362   vlib_buffer_t * b_target = vlib_get_buffer (vm, bi_target);
1363   vnet_buffer (b_target)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX];
1364   b_target->trace_index = b->trace_index;
1365   b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED;
1366 }
1367
1368 typedef enum {
1369   TCP_LISTEN_NEXT_DROP,
1370   TCP_LISTEN_NEXT_REPLY,
1371   TCP_LISTEN_N_NEXT,
1372 } tcp_listen_next_t;
1373
1374 static_always_inline uword
1375 ip46_tcp_listen (vlib_main_t * vm,
1376                  vlib_node_runtime_t * node,
1377                  vlib_frame_t * frame,
1378                  uword is_ip6)
1379 {
1380   tcp_main_t * tm = &tcp_main;
1381   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1382   uword n_packets = frame->n_vectors;
1383   u32 * from, * to_reply, * to_drop, * random_ack_numbers;
1384   u32 n_left_from, n_left_to_reply, n_left_to_drop, mini_now, timestamp_now;
1385   u16 * fid, * fragment_ids;
1386   vlib_node_runtime_t * error_node;
1387
1388   error_node = vlib_node_get_runtime
1389     (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
1390
1391   from = vlib_frame_vector_args (frame);
1392   n_left_from = n_packets;
1393   mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection);
1394   timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
1395   
1396   random_ack_numbers = clib_random_buffer_get_data (&vm->random_buffer,
1397                                                     n_packets * sizeof (random_ack_numbers[0]));
1398   /* Get random fragment IDs for replies. */
1399   fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
1400                                                     n_packets * sizeof (fragment_ids[0]));
1401
1402   while (n_left_from > 0)
1403     {
1404       vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY,
1405                            to_reply, n_left_to_reply);
1406       vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_DROP,
1407                            to_drop, n_left_to_drop);
1408
1409       while (n_left_from > 0 && n_left_to_reply > 0 && n_left_to_drop > 0)
1410         {
1411           vlib_buffer_t * p0;
1412           ip6_header_t * ip60;
1413           ip4_header_t * ip40;
1414           tcp_header_t * tcp0;
1415           tcp_mini_connection_t * min0;
1416           tcp_syn_packet_t * tcp_reply0;
1417           ip_csum_t tcp_sum0;
1418           u32 bi0, bi_reply0, imin0, my_seq_net0, his_seq_host0, his_seq_net0;
1419           u8 i0;
1420       
1421           bi0 = to_drop[0] = from[0];
1422
1423           from += 1;
1424           n_left_from -= 1;
1425           to_drop += 1;
1426           n_left_to_drop -= 1;
1427       
1428           p0 = vlib_get_buffer (vm, bi0);
1429
1430           p0->error = error_node->errors[TCP_ERROR_LISTEN_RESPONSES];
1431
1432           imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index;
1433           i0 = imin0 % 4;
1434
1435           if (is_ip6)
1436             {
1437               ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
1438
1439               ip60 = vlib_buffer_get_current (p0);
1440               tcp0 = ip6_next_header (ip60);
1441
1442               mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4);
1443
1444               ip6_tcp_udp_address_x4_set_from_headers (&mina0->address_x4,
1445                                                        ip60, tcp0, i0);
1446               mina0->time_stamps[i0] = mini_now;
1447             }
1448           else
1449             {
1450               ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
1451
1452               ip40 = vlib_buffer_get_current (p0);
1453               tcp0 = ip4_next_header (ip40);
1454
1455               mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4);
1456
1457               ip4_tcp_udp_address_x4_set_from_headers (&mina0->address_x4,
1458                                                        ip40, tcp0, i0);
1459               mina0->time_stamps[i0] = mini_now;
1460             }
1461
1462           min0 = vec_elt_at_index (tm46->mini_connections, imin0);
1463
1464           min0->state = TCP_CONNECTION_STATE_listen_ack_wait;
1465           min0->time_stamps.ours_host_byte_order = timestamp_now;
1466           tcp_options_decode_for_syn (tm, min0, tcp0);
1467
1468           my_seq_net0 = *random_ack_numbers++;
1469           his_seq_host0 = 1 + clib_net_to_host_u32 (tcp0->seq_number);
1470
1471           min0->sequence_numbers.ours = 1 + clib_net_to_host_u32 (my_seq_net0);
1472           min0->sequence_numbers.his = his_seq_host0;
1473
1474           if (is_ip6)
1475             {
1476               ip6_tcp_syn_packet_t * r0;
1477               uword tmp0, i;
1478
1479               r0 = vlib_packet_template_get_packet
1480                 (vm,
1481                  &tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib,
1482                  &bi_reply0);
1483               tcp_reply0 = &r0->tcp;
1484
1485               tcp_sum0 = (tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
1486                           .tcp_checksum_net_byte_order);
1487
1488               for (i = 0; i < ARRAY_LEN (ip60->dst_address.as_uword); i++)
1489                 {
1490                   tmp0 = r0->ip6.src_address.as_uword[i] = ip60->dst_address.as_uword[i];
1491                   tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
1492
1493                   tmp0 = r0->ip6.dst_address.as_uword[i] = ip60->src_address.as_uword[i];
1494                   tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
1495                 }
1496             }
1497           else
1498             {
1499               ip4_tcp_syn_packet_t * r0;
1500               ip_csum_t ip_sum0;
1501               u32 src0, dst0;
1502
1503               r0 = vlib_packet_template_get_packet
1504                 (vm,
1505                  &tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib,
1506                  &bi_reply0);
1507               tcp_reply0 = &r0->tcp;
1508
1509               tcp_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
1510                           .tcp_checksum_net_byte_order);
1511               ip_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
1512                          .ip4_checksum_net_byte_order);
1513
1514               src0 = r0->ip4.src_address.as_u32 = ip40->dst_address.as_u32;
1515               dst0 = r0->ip4.dst_address.as_u32 = ip40->src_address.as_u32;
1516
1517               ip_sum0 = ip_csum_add_even (ip_sum0, src0);
1518               tcp_sum0 = ip_csum_add_even (tcp_sum0, src0);
1519
1520               ip_sum0 = ip_csum_add_even (ip_sum0, dst0);
1521               tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0);
1522
1523               r0->ip4.checksum = ip_csum_fold (ip_sum0);
1524
1525               ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4));
1526             }
1527
1528           tcp_reply0->header.ports.src = tcp0->ports.dst;
1529           tcp_reply0->header.ports.dst = tcp0->ports.src;
1530           tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->header.ports.src_and_dst);
1531
1532           tcp_reply0->header.seq_number = my_seq_net0;
1533           tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0);
1534
1535           his_seq_net0 = clib_host_to_net_u32 (his_seq_host0);
1536           tcp_reply0->header.ack_number = his_seq_net0;
1537           tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0);
1538
1539           {
1540             ip_adjacency_t * adj0 = ip_get_adjacency (&ip4_main.lookup_main, vnet_buffer (p0)->ip.adj_index[VLIB_RX]);
1541             u16 my_mss =
1542               (adj0->rewrite_header.max_l3_packet_bytes
1543                - (is_ip6 ? sizeof (ip60[0]) : sizeof (ip40[0]))
1544                - sizeof (tcp0[0]));
1545
1546             my_mss = clib_min (my_mss, min0->max_segment_size);
1547             min0->max_segment_size = my_mss;
1548
1549             tcp_reply0->options.mss.value = clib_host_to_net_u16 (my_mss);
1550             tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.mss.value);
1551           }
1552
1553           tcp_reply0->options.time_stamp.my_time_stamp = clib_host_to_net_u32 (timestamp_now);
1554           tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.my_time_stamp);
1555
1556           tcp_reply0->options.time_stamp.his_time_stamp = min0->time_stamps.his_net_byte_order;
1557           tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.his_time_stamp);
1558
1559           tcp_reply0->header.checksum = ip_csum_fold (tcp_sum0);
1560
1561           vlib_buffer_copy_shared_fields (vm, p0, bi_reply0);
1562
1563           to_reply[0] = bi_reply0;
1564           n_left_to_reply -= 1;
1565           to_reply += 1;
1566         }
1567
1568       vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY, n_left_to_reply);
1569       vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_DROP, n_left_to_drop);
1570     }
1571
1572   if (node->flags & VLIB_NODE_FLAG_TRACE)
1573     /* FIXME */ ;
1574
1575   return frame->n_vectors;
1576 }
1577
1578 static uword
1579 ip4_tcp_listen (vlib_main_t * vm,
1580                 vlib_node_runtime_t * node,
1581                 vlib_frame_t * frame)
1582 { return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 0); }
1583
1584 static uword
1585 ip6_tcp_listen (vlib_main_t * vm,
1586                 vlib_node_runtime_t * node,
1587                 vlib_frame_t * frame)
1588 { return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 1); }
1589
1590 VLIB_REGISTER_NODE (ip4_tcp_listen_node,static) = {
1591   .function = ip4_tcp_listen,
1592   .name = "ip4-tcp-listen",
1593
1594   .vector_size = sizeof (u32),
1595
1596   .n_next_nodes = TCP_LISTEN_N_NEXT,
1597   .next_nodes = {
1598     [TCP_LISTEN_NEXT_DROP] = "error-drop",
1599     [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
1600   },
1601 };
1602
1603 VLIB_REGISTER_NODE (ip6_tcp_listen_node,static) = {
1604   .function = ip6_tcp_listen,
1605   .name = "ip6-tcp-listen",
1606
1607   .vector_size = sizeof (u32),
1608
1609   .n_next_nodes = TCP_LISTEN_N_NEXT,
1610   .next_nodes = {
1611     [TCP_LISTEN_NEXT_DROP] = "error-drop",
1612     [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
1613   },
1614 };
1615
1616 typedef enum {
1617   TCP_CONNECT_NEXT_DROP,
1618   TCP_CONNECT_NEXT_REPLY,
1619   TCP_CONNECT_N_NEXT,
1620 } tcp_connect_next_t;
1621
1622 static_always_inline uword
1623 ip46_tcp_connect (vlib_main_t * vm,
1624                  vlib_node_runtime_t * node,
1625                  vlib_frame_t * frame,
1626                  uword is_ip6)
1627 {
1628   tcp_main_t * tm = &tcp_main;
1629   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1630   uword n_packets = frame->n_vectors;
1631   u32 * from, * to_next;
1632   u32 n_left_from, n_left_to_next, next;
1633   vlib_node_runtime_t * error_node;
1634
1635   /* FIXME */
1636   clib_warning ("%p", tm46);
1637
1638   error_node = vlib_node_get_runtime
1639     (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
1640
1641   from = vlib_frame_vector_args (frame);
1642   n_left_from = n_packets;
1643   next = node->cached_next_index;
1644   
1645   while (n_left_from > 0)
1646     {
1647       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1648
1649       while (n_left_from > 0 && n_left_to_next > 0)
1650         {
1651           vlib_buffer_t * p0;
1652           ip6_header_t * ip60;
1653           ip4_header_t * ip40;
1654           tcp_header_t * tcp0;
1655           u32 bi0;
1656           u8 error0, next0;
1657       
1658           bi0 = to_next[0] = from[0];
1659
1660           from += 1;
1661           n_left_from -= 1;
1662           to_next += 1;
1663           n_left_to_next -= 1;
1664       
1665           p0 = vlib_get_buffer (vm, bi0);
1666
1667           if (is_ip6)
1668             {
1669               ip60 = vlib_buffer_get_current (p0);
1670               tcp0 = ip6_next_header (ip60);
1671             }
1672           else
1673             {
1674               ip40 = vlib_buffer_get_current (p0);
1675               tcp0 = ip4_next_header (ip40);
1676             }
1677
1678           ASSERT (0);
1679
1680           error0 = next0 = 0;
1681           p0->error = error_node->errors[error0];
1682
1683           if (PREDICT_FALSE (next0 != next))
1684             {
1685               to_next -= 1;
1686               n_left_to_next += 1;
1687
1688               vlib_put_next_frame (vm, node, next, n_left_to_next);
1689
1690               next = next0;
1691               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1692               to_next[0] = bi0;
1693               to_next += 1;
1694               n_left_to_next -= 1;
1695             }
1696         }
1697   
1698       vlib_put_next_frame (vm, node, next, n_left_to_next);
1699     }
1700
1701   if (node->flags & VLIB_NODE_FLAG_TRACE)
1702     /* FIXME */ ;
1703
1704   return frame->n_vectors;
1705 }
1706
1707 static uword
1708 ip4_tcp_connect (vlib_main_t * vm,
1709                 vlib_node_runtime_t * node,
1710                 vlib_frame_t * frame)
1711 { return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 0); }
1712
1713 static uword
1714 ip6_tcp_connect (vlib_main_t * vm,
1715                 vlib_node_runtime_t * node,
1716                 vlib_frame_t * frame)
1717 { return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 1); }
1718
1719 VLIB_REGISTER_NODE (ip4_tcp_connect_node,static) = {
1720   .function = ip4_tcp_connect,
1721   .name = "ip4-tcp-connect",
1722
1723   .vector_size = sizeof (u32),
1724
1725   .n_next_nodes = TCP_CONNECT_N_NEXT,
1726   .next_nodes = {
1727     [TCP_CONNECT_NEXT_DROP] = "error-drop",
1728     [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
1729   },
1730 };
1731
1732 VLIB_REGISTER_NODE (ip6_tcp_connect_node,static) = {
1733   .function = ip6_tcp_connect,
1734   .name = "ip6-tcp-connect",
1735
1736   .vector_size = sizeof (u32),
1737
1738   .n_next_nodes = TCP_CONNECT_N_NEXT,
1739   .next_nodes = {
1740     [TCP_CONNECT_NEXT_DROP] = "error-drop",
1741     [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
1742   },
1743 };
1744
1745 typedef enum {
1746   TCP_ESTABLISH_NEXT_DROP,
1747   TCP_ESTABLISH_NEXT_ESTABLISHED,
1748   TCP_ESTABLISH_N_NEXT,
1749 } tcp_establish_next_t;
1750
1751 static_always_inline uword
1752 ip46_tcp_establish (vlib_main_t * vm,
1753                     vlib_node_runtime_t * node,
1754                     vlib_frame_t * frame,
1755                     uword is_ip6)
1756 {
1757   tcp_main_t * tm = &tcp_main;
1758   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1759   uword n_packets = frame->n_vectors;
1760   u32 * from, * to_next;
1761   u32 n_left_from, n_left_to_next, next, mini_long_long_ago, timestamp_now;
1762   vlib_node_runtime_t * error_node;
1763
1764   error_node = vlib_node_get_runtime
1765     (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
1766
1767   from = vlib_frame_vector_args (frame);
1768   n_left_from = n_packets;
1769   next = node->cached_next_index;
1770   mini_long_long_ago =
1771     (tcp_time_now (tm, TCP_TIMER_mini_connection)
1772      + (1 << (BITS (mini_long_long_ago) - 1)));
1773   timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
1774   
1775   while (n_left_from > 0)
1776     {
1777       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1778
1779       while (n_left_from > 0 && n_left_to_next > 0)
1780         {
1781           vlib_buffer_t * p0;
1782           ip6_header_t * ip60;
1783           ip4_header_t * ip40;
1784           tcp_header_t * tcp0;
1785           tcp_mini_connection_t * min0;
1786           tcp_connection_t * est0;
1787           tcp_listener_t * l0;
1788           u32 bi0, imin0, iest0;
1789           u8 error0, next0, i0, e0;
1790       
1791           bi0 = to_next[0] = from[0];
1792
1793           from += 1;
1794           n_left_from -= 1;
1795           to_next += 1;
1796           n_left_to_next -= 1;
1797       
1798           p0 = vlib_get_buffer (vm, bi0);
1799
1800           imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index;
1801           iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index;
1802
1803           i0 = imin0 % 4;
1804           e0 = iest0 % 4;
1805
1806           min0 = vec_elt_at_index (tm46->mini_connections, imin0);
1807           if (PREDICT_FALSE (min0->state == TCP_CONNECTION_STATE_unused))
1808             goto already_established0;
1809           min0->state = TCP_CONNECTION_STATE_unused;
1810
1811           if (is_ip6)
1812             {
1813               ip60 = vlib_buffer_get_current (p0);
1814               tcp0 = ip6_next_header (ip60);
1815             }
1816           else
1817             {
1818               ip40 = vlib_buffer_get_current (p0);
1819               tcp0 = ip4_next_header (ip40);
1820             }
1821
1822           if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number)
1823                              != min0->sequence_numbers.his))
1824             goto unexpected_seq_number0;
1825           if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number)
1826                              != min0->sequence_numbers.ours))
1827             goto unexpected_ack_number0;
1828
1829           if (is_ip6)
1830             {
1831               ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
1832               ip6_tcp_udp_address_x4_t * esta0;
1833
1834               mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4);
1835               esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0 / 4);
1836
1837               ip6_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0);
1838
1839               mina0->time_stamps[i0] = mini_long_long_ago;
1840             }
1841           else
1842             {
1843               ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
1844               ip4_tcp_udp_address_x4_t * esta0;
1845
1846               mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4);
1847               esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0 / 4);
1848
1849               ip4_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0);
1850
1851               mina0->time_stamps[i0] = mini_long_long_ago;
1852             }
1853
1854           est0 = vec_elt_at_index (tm46->established_connections, iest0);
1855
1856           est0->sequence_numbers = min0->sequence_numbers;
1857           est0->max_segment_size = (min0->max_segment_size
1858                                     - STRUCT_SIZE_OF (tcp_ack_packet_t, options));
1859           est0->his_window_scale = min0->window_scale;
1860           est0->his_window = clib_net_to_host_u16 (tcp0->window);
1861           est0->time_stamps.ours_host_byte_order = min0->time_stamps.ours_host_byte_order;
1862
1863           /* Compute first measurement of round trip time. */
1864           {
1865             u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order);
1866             f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp];
1867             est0->round_trip_time_stats.sum = dt;
1868             est0->round_trip_time_stats.sum2 = dt*dt;
1869             est0->round_trip_time_stats.count = 1;
1870
1871             {
1872               ELOG_TYPE_DECLARE (e) = {
1873                 .format = "establish ack rtt: %.4e",
1874                 .format_args = "f8",
1875               };
1876               struct { f64 dt; } * ed;
1877               ed = ELOG_DATA (&vm->elog_main, e);
1878               ed->dt = dt;
1879             }
1880           }
1881
1882           est0->my_window_scale = 7;
1883           est0->my_window = 256;
1884
1885           l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index);
1886           vec_add1 (l0->event_connections[is_ip6], tcp_connection_handle_set (iest0, is_ip6));
1887
1888           next0 = TCP_ESTABLISH_NEXT_DROP;
1889           error0 = TCP_ERROR_LISTENS_ESTABLISHED;
1890
1891         enqueue0:
1892           p0->error = error_node->errors[error0];
1893           if (PREDICT_FALSE (next0 != next))
1894             {
1895               to_next -= 1;
1896               n_left_to_next += 1;
1897
1898               vlib_put_next_frame (vm, node, next, n_left_to_next);
1899
1900               next = next0;
1901               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1902               to_next[0] = bi0;
1903               to_next += 1;
1904               n_left_to_next -= 1;
1905             }
1906           continue;
1907
1908         already_established0:
1909           next0 = TCP_ESTABLISH_NEXT_ESTABLISHED;
1910           error0 = TCP_ERROR_NONE;
1911           goto enqueue0;
1912
1913         unexpected_seq_number0:
1914           next0 = TCP_ESTABLISH_NEXT_DROP;
1915           error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER;
1916           goto enqueue0;
1917
1918         unexpected_ack_number0:
1919           next0 = TCP_ESTABLISH_NEXT_DROP;
1920           error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER;
1921           goto enqueue0;
1922         }
1923   
1924       vlib_put_next_frame (vm, node, next, n_left_to_next);
1925     }
1926
1927   if (node->flags & VLIB_NODE_FLAG_TRACE)
1928     /* FIXME */ ;
1929
1930   /* Inform listeners of new connections. */
1931   {
1932     tcp_listener_t * l;
1933     uword n;
1934     pool_foreach (l, tm->listener_pool, ({
1935       if ((n = vec_len (l->event_connections[is_ip6])) > 0)
1936         {
1937           if (l->event_function)
1938             l->event_function (l->event_connections[is_ip6],
1939                                TCP_EVENT_connection_established);
1940           if (tm->n_established_connections[is_ip6] == 0)
1941             vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_POLLING);
1942           tm->n_established_connections[is_ip6] += n;
1943           _vec_len (l->event_connections[is_ip6]) = 0;
1944         }
1945     }));
1946   }
1947
1948   return frame->n_vectors;
1949 }
1950
1951 static uword
1952 ip4_tcp_establish (vlib_main_t * vm,
1953                 vlib_node_runtime_t * node,
1954                 vlib_frame_t * frame)
1955 { return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 0); }
1956
1957 static uword
1958 ip6_tcp_establish (vlib_main_t * vm,
1959                 vlib_node_runtime_t * node,
1960                 vlib_frame_t * frame)
1961 { return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 1); }
1962
1963 VLIB_REGISTER_NODE (ip4_tcp_establish_node,static) = {
1964   .function = ip4_tcp_establish,
1965   .name = "ip4-tcp-establish",
1966
1967   .vector_size = sizeof (u32),
1968
1969   .n_next_nodes = TCP_ESTABLISH_N_NEXT,
1970   .next_nodes = {
1971     [TCP_ESTABLISH_NEXT_DROP] = "error-drop",
1972     [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip4-tcp-established",
1973   },
1974 };
1975
1976 VLIB_REGISTER_NODE (ip6_tcp_establish_node,static) = {
1977   .function = ip6_tcp_establish,
1978   .name = "ip6-tcp-establish",
1979
1980   .vector_size = sizeof (u32),
1981
1982   .n_next_nodes = TCP_ESTABLISH_N_NEXT,
1983   .next_nodes = {
1984     [TCP_ESTABLISH_NEXT_DROP] = "error-drop",
1985     [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip6-tcp-established",
1986   },
1987 };
1988
1989 static_always_inline void
1990 tcp_free_connection_x1 (vlib_main_t * vm, tcp_main_t * tm,
1991                         tcp_ip_4_or_6_t is_ip6,
1992                         u32 iest0)
1993 {
1994   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1995   tcp_connection_t * est0;
1996   u32 iest_div0, iest_mod0;
1997   
1998   iest_div0 = iest0 / 4;
1999   iest_mod0 = iest0 % 4;
2000
2001   if (is_ip6)
2002     {
2003       ip6_tcp_udp_address_x4_t * esta0;
2004       esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0);
2005       ip6_tcp_udp_address_x4_invalidate (esta0, iest_mod0);
2006     }
2007   else
2008     {
2009       ip4_tcp_udp_address_x4_t * esta0;
2010       esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0);
2011       ip4_tcp_udp_address_x4_invalidate (esta0, iest_mod0);
2012     }
2013
2014   est0 = vec_elt_at_index (tm46->established_connections, iest0);
2015 }
2016
2017 static_always_inline void
2018 tcp_free_connection_x2 (vlib_main_t * vm, tcp_main_t * tm,
2019                         tcp_ip_4_or_6_t is_ip6,
2020                         u32 iest0, u32 iest1)
2021 {
2022   tcp_free_connection_x1 (vm, tm, is_ip6, iest0);
2023   tcp_free_connection_x1 (vm, tm, is_ip6, iest1);
2024 }
2025
2026 static_always_inline uword
2027 ip46_tcp_output (vlib_main_t * vm,
2028                  vlib_node_runtime_t * node,
2029                  vlib_frame_t * frame,
2030                  tcp_ip_4_or_6_t is_ip6)
2031 {
2032   tcp_main_t * tm = &tcp_main;
2033   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2034   u32 * cis, * to_next, n_left_to_next, n_connections_left;
2035   u32 timestamp_now_host_byte_order, timestamp_now_net_byte_order;
2036   vlib_node_runtime_t * error_node;
2037   const u32 next = 0;
2038   uword n_acks;
2039
2040   /* Inform listeners of new connections. */
2041   {
2042     tcp_listener_t * l;
2043     pool_foreach (l, tm->listener_pool, ({
2044       if (vec_len (l->eof_connections) > 0)
2045         {
2046           if (l->event_function)
2047             l->event_function (l->eof_connections[is_ip6], TCP_EVENT_fin_received);
2048           else
2049             {
2050               uword i;
2051               for (i = 0; i < vec_len (l->eof_connections[is_ip6]); i++)
2052                 {
2053                   tcp_connection_t * c = tcp_get_connection (l->eof_connections[is_ip6][i]);
2054                   c->flags |= TCP_CONNECTION_FLAG_application_requested_close;
2055                 }
2056             }
2057           _vec_len (l->eof_connections[is_ip6]) = 0;
2058         }
2059
2060       if (vec_len (l->close_connections[is_ip6]) > 0)
2061         {
2062           uword n_left;
2063           u32 * cis;
2064
2065           if (l->event_function)
2066             l->event_function (l->close_connections[is_ip6], TCP_EVENT_connection_closed);
2067
2068           cis = l->close_connections[is_ip6];
2069           n_left = vec_len (cis);
2070           ASSERT (tm->n_established_connections[is_ip6] >= n_left);
2071           tm->n_established_connections[is_ip6] -= n_left;
2072           if (tm->n_established_connections[is_ip6] == 0)
2073             vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_DISABLED);
2074           while (n_left >= 2)
2075             {
2076               tcp_free_connection_x2 (vm, tm, is_ip6, cis[0], cis[1]);
2077               n_left -= 2;
2078               cis += 2;
2079             }
2080
2081           while (n_left > 0)
2082             {
2083               tcp_free_connection_x1 (vm, tm, is_ip6, cis[0]);
2084               n_left -= 1;
2085               cis += 1;
2086             }
2087
2088           _vec_len (l->close_connections[is_ip6]) = 0;
2089         }
2090     }));
2091   }
2092
2093   n_acks = 0;
2094   cis = tm46->connections_pending_acks;
2095   n_connections_left = vec_len (cis);
2096   if (n_connections_left == 0)
2097     return n_acks;
2098   _vec_len (tm46->connections_pending_acks) = 0;
2099   error_node = vlib_node_get_runtime
2100     (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
2101
2102   timestamp_now_host_byte_order = tcp_time_now (tm, TCP_TIMER_timestamp);
2103   timestamp_now_net_byte_order = clib_host_to_net_u32 (timestamp_now_host_byte_order);
2104
2105   while (n_connections_left > 0)
2106     {
2107       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2108
2109       while (n_connections_left > 0 && n_left_to_next > 0)
2110         {
2111           tcp_connection_t * est0;
2112           tcp_ack_packet_t * tcp0;
2113           tcp_udp_ports_t * ports0;
2114           ip_csum_t tcp_sum0;
2115           tcp_packet_template_type_t template_type0;
2116           u32 bi0, iest0, iest_div0, iest_mod0, my_seq_net0, his_seq_net0;
2117           u8 is_fin0;
2118
2119           iest0 = cis[0];
2120           cis += 1;
2121           iest_div0 = iest0 / 4;
2122           iest_mod0 = iest0 % 4;
2123           est0 = vec_elt_at_index (tm46->established_connections, iest0);
2124
2125           /* Send a FIN along with our ACK if application closed connection. */
2126           {
2127             u8 is_closed0, fin_sent0;
2128
2129             is_closed0 = (est0->flags & TCP_CONNECTION_FLAG_application_requested_close) != 0;
2130             fin_sent0 = (est0->flags & TCP_CONNECTION_FLAG_fin_sent) != 0;
2131
2132             is_fin0 = is_closed0 && ! fin_sent0;
2133             template_type0 = 
2134               (is_fin0
2135                ? TCP_PACKET_TEMPLATE_FIN_ACK
2136                : TCP_PACKET_TEMPLATE_ACK);
2137             est0->flags |= is_closed0 << LOG2_TCP_CONNECTION_FLAG_fin_sent;
2138           }
2139
2140           if (is_ip6)
2141             {
2142               ip6_tcp_ack_packet_t * r0;
2143               ip6_tcp_udp_address_x4_t * esta0;
2144               uword tmp0, i;
2145
2146               esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0);
2147               r0 = vlib_packet_template_get_packet
2148                 (vm, &tm->ip6.packet_templates[template_type0].vlib, &bi0);
2149               tcp0 = &r0->tcp;
2150
2151               tcp_sum0 = (tm->ip6.packet_templates[template_type0]
2152                           .tcp_checksum_net_byte_order);
2153
2154               for (i = 0; i < ARRAY_LEN (r0->ip6.src_address.as_u32); i++)
2155                 {
2156                   tmp0 = r0->ip6.src_address.as_u32[i] = esta0->dst.as_u32[i][iest_mod0];
2157                   tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
2158
2159                   tmp0 = r0->ip6.dst_address.as_u32[i] = esta0->src.as_u32[i][iest_mod0];
2160                   tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
2161                 }
2162
2163               ports0 = &esta0->ports.as_ports[iest_mod0];
2164             }
2165           else
2166             {
2167               ip4_tcp_ack_packet_t * r0;
2168               ip4_tcp_udp_address_x4_t * esta0;
2169               ip_csum_t ip_sum0;
2170               u32 src0, dst0;
2171
2172               esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0);
2173               r0 = vlib_packet_template_get_packet
2174                 (vm, &tm->ip4.packet_templates[template_type0].vlib, &bi0);
2175               tcp0 = &r0->tcp;
2176
2177               ip_sum0 = (tm->ip4.packet_templates[template_type0]
2178                           .ip4_checksum_net_byte_order);
2179               tcp_sum0 = (tm->ip4.packet_templates[template_type0]
2180                           .tcp_checksum_net_byte_order);
2181
2182               src0 = r0->ip4.src_address.as_u32 = esta0->dst.as_ip4_address[iest_mod0].as_u32;
2183               dst0 = r0->ip4.dst_address.as_u32 = esta0->src.as_ip4_address[iest_mod0].as_u32;
2184
2185               ip_sum0 = ip_csum_add_even (ip_sum0, src0);
2186               tcp_sum0 = ip_csum_add_even (tcp_sum0, src0);
2187
2188               ip_sum0 = ip_csum_add_even (ip_sum0, dst0);
2189               tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0);
2190
2191               r0->ip4.checksum = ip_csum_fold (ip_sum0);
2192
2193               ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4));
2194               ports0 = &esta0->ports.as_ports[iest_mod0];
2195             }
2196
2197           tcp_sum0 = ip_csum_add_even (tcp_sum0, ports0->as_u32);
2198           tcp0->header.ports.src = ports0->dst;
2199           tcp0->header.ports.dst = ports0->src;
2200
2201           my_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.ours);
2202           his_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.his);
2203
2204           /* FIN accounts for 1 sequence number. */
2205           est0->sequence_numbers.ours += is_fin0;
2206
2207           tcp0->header.seq_number = my_seq_net0;
2208           tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0);
2209
2210           tcp0->header.ack_number = his_seq_net0;
2211           tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0);
2212
2213           est0->time_stamps.ours_host_byte_order = timestamp_now_host_byte_order;
2214           tcp0->options.time_stamp.my_time_stamp = timestamp_now_net_byte_order;
2215           tcp_sum0 = ip_csum_add_even (tcp_sum0, timestamp_now_net_byte_order);
2216
2217           tcp0->options.time_stamp.his_time_stamp = est0->time_stamps.his_net_byte_order;
2218           tcp_sum0 = ip_csum_add_even (tcp_sum0, est0->time_stamps.his_net_byte_order);
2219
2220           tcp0->header.checksum = ip_csum_fold (tcp_sum0);
2221
2222           est0->flags &= ~TCP_CONNECTION_FLAG_ack_pending;
2223
2224           to_next[0] = bi0;
2225           to_next += 1;
2226           n_left_to_next -= 1;
2227           n_connections_left -= 1;
2228           n_acks += 1;
2229         }
2230
2231       vlib_put_next_frame (vm, node, next, n_left_to_next);
2232     }
2233
2234   vlib_error_count (vm, error_node->node_index, TCP_ERROR_ACKS_SENT, n_acks);
2235
2236   return n_acks;
2237 }
2238
2239 static uword
2240 ip4_tcp_output (vlib_main_t * vm,
2241                 vlib_node_runtime_t * node,
2242                 vlib_frame_t * frame)
2243 { return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 0); }
2244
2245 static uword
2246 ip6_tcp_output (vlib_main_t * vm,
2247                 vlib_node_runtime_t * node,
2248                 vlib_frame_t * frame)
2249 { return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 1); }
2250
2251 VLIB_REGISTER_NODE (ip4_tcp_output_node,static) = {
2252   .function = ip4_tcp_output,
2253   .name = "ip4-tcp-output",
2254   .state = VLIB_NODE_STATE_DISABLED,
2255   .type = VLIB_NODE_TYPE_INPUT,
2256
2257   .vector_size = sizeof (u32),
2258
2259   .n_next_nodes = 1,
2260   .next_nodes = {
2261     [0] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
2262   },
2263 };
2264
2265 VLIB_REGISTER_NODE (ip6_tcp_output_node,static) = {
2266   .function = ip6_tcp_output,
2267   .name = "ip6-tcp-output",
2268   .state = VLIB_NODE_STATE_DISABLED,
2269   .type = VLIB_NODE_TYPE_INPUT,
2270
2271   .vector_size = sizeof (u32),
2272
2273   .n_next_nodes = 1,
2274   .next_nodes = {
2275     [0] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
2276   },
2277 };
2278
2279 static_always_inline void
2280 tcp_ack (tcp_main_t * tm, tcp_connection_t * c, u32 n_bytes)
2281 {
2282   ASSERT (n_bytes == 0);
2283 }
2284
2285 typedef enum {
2286   TCP_ESTABLISHED_NEXT_DROP,
2287   TCP_ESTABLISHED_N_NEXT,
2288 } tcp_established_next_t;
2289
2290 static_always_inline uword
2291 ip46_tcp_established (vlib_main_t * vm,
2292                       vlib_node_runtime_t * node,
2293                       vlib_frame_t * frame,
2294                       tcp_ip_4_or_6_t is_ip6)
2295 {
2296   tcp_main_t * tm = &tcp_main;
2297   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2298   uword n_packets = frame->n_vectors;
2299   u32 * from, * to_next;
2300   u32 n_left_from, n_left_to_next, next, timestamp_now;
2301   vlib_node_runtime_t * error_node;
2302
2303   error_node = vlib_node_get_runtime
2304     (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
2305
2306   from = vlib_frame_vector_args (frame);
2307   n_left_from = n_packets;
2308   next = node->cached_next_index;
2309   timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
2310   
2311   while (n_left_from > 0)
2312     {
2313       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2314
2315       while (n_left_from > 0 && n_left_to_next > 0)
2316         {
2317           vlib_buffer_t * p0;
2318           ip6_header_t * ip60;
2319           ip4_header_t * ip40;
2320           tcp_header_t * tcp0;
2321           tcp_connection_t * est0;
2322           tcp_listener_t * l0;
2323           u32 bi0, iest0, n_data_bytes0, his_ack_host0, n_ack0;
2324           u8 error0, next0, n_advance_bytes0, is_fin0, send_ack0;
2325       
2326           bi0 = to_next[0] = from[0];
2327
2328           from += 1;
2329           n_left_from -= 1;
2330           to_next += 1;
2331           n_left_to_next -= 1;
2332       
2333           p0 = vlib_get_buffer (vm, bi0);
2334
2335           if (is_ip6)
2336             {
2337               ip60 = vlib_buffer_get_current (p0);
2338               tcp0 = ip6_next_header (ip60);
2339               ASSERT (ip60->protocol == IP_PROTOCOL_TCP);
2340               n_advance_bytes0 = tcp_header_bytes (tcp0);
2341               n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) - n_advance_bytes0;
2342               n_advance_bytes0 += sizeof (ip60[0]);
2343             }
2344           else
2345             {
2346               ip40 = vlib_buffer_get_current (p0);
2347               tcp0 = ip4_next_header (ip40);
2348               n_advance_bytes0 = (ip4_header_bytes (ip40)
2349                                   + tcp_header_bytes (tcp0));
2350               n_data_bytes0 = clib_net_to_host_u16 (ip40->length) - n_advance_bytes0;
2351             }
2352
2353           iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index;
2354           est0 = vec_elt_at_index (tm46->established_connections, iest0);
2355
2356           error0 = TCP_ERROR_NO_DATA;
2357           next0 = TCP_ESTABLISHED_NEXT_DROP;
2358
2359           if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number)
2360                              != est0->sequence_numbers.his))
2361             goto unexpected_seq_number0;
2362           if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number) - est0->sequence_numbers.ours
2363                              > est0->n_tx_unacked_bytes))
2364             goto unexpected_ack_number0;
2365
2366           is_fin0 = (tcp0->flags & TCP_FLAG_FIN) != 0;
2367
2368           if (PREDICT_FALSE ((est0->flags & TCP_CONNECTION_FLAG_fin_received)
2369                              && (is_fin0 || n_data_bytes0 > 0)))
2370             goto already_received_fin0;
2371
2372           /* Update window. */
2373           est0->his_window = clib_net_to_host_u16 (tcp0->window);
2374
2375           /* Update his sequence number to account for data he's just sent. */
2376           est0->sequence_numbers.his += n_data_bytes0 + is_fin0;
2377
2378           his_ack_host0 = clib_net_to_host_u32 (tcp0->ack_number);
2379           n_ack0 = his_ack_host0 - est0->sequence_numbers.ours;
2380           tcp_ack (tm, est0, n_ack0);
2381           est0->sequence_numbers.ours = his_ack_host0;
2382
2383           {
2384             u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order);
2385             if (t != est0->time_stamps.ours_host_byte_order)
2386               {
2387                 f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp];
2388                 est0->round_trip_time_stats.sum += dt;
2389                 est0->round_trip_time_stats.sum2 += dt*dt;
2390                 est0->round_trip_time_stats.count += 1;
2391                 est0->time_stamps.ours_host_byte_order = t;
2392
2393                 {
2394                   ELOG_TYPE_DECLARE (e) = {
2395                     .format = "ack rtt: %.4e",
2396                     .format_args = "f8",
2397                   };
2398                   struct { f64 dt; } * ed;
2399                   ed = ELOG_DATA (&vm->elog_main, e);
2400                   ed->dt = dt;
2401                 }
2402               }
2403           }
2404           
2405           send_ack0 = ((est0->flags & TCP_CONNECTION_FLAG_ack_pending) == 0
2406                        && (n_data_bytes0 > 0 || is_fin0));
2407           vec_add1 (tm46->connections_pending_acks, vnet_buffer (p0)->ip.tcp.established_connection_index);
2408           _vec_len (tm46->connections_pending_acks) -= ! send_ack0;
2409           est0->flags |= send_ack0 << LOG2_TCP_CONNECTION_FLAG_ack_pending;
2410
2411           est0->flags |= is_fin0 << LOG2_TCP_CONNECTION_FLAG_fin_received;
2412
2413           l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index);
2414
2415           {
2416             u32 ch0 = tcp_connection_handle_set (iest0, is_ip6);
2417
2418             vec_add1 (l0->eof_connections[is_ip6], ch0);
2419             _vec_len (l0->eof_connections[is_ip6]) -= ! is_fin0;
2420
2421             vec_add1 (l0->close_connections[is_ip6], ch0);
2422             _vec_len (l0->close_connections[is_ip6]) -= !(est0->flags & TCP_CONNECTION_FLAG_fin_sent);
2423           }
2424
2425           next0 = n_data_bytes0 > 0 ? l0->next_index : next0;
2426
2427           vlib_buffer_advance (p0, n_advance_bytes0);
2428
2429         enqueue0:
2430           p0->error = error_node->errors[error0];
2431           if (PREDICT_FALSE (next0 != next))
2432             {
2433               to_next -= 1;
2434               n_left_to_next += 1;
2435
2436               vlib_put_next_frame (vm, node, next, n_left_to_next);
2437
2438               next = next0;
2439               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2440               to_next[0] = bi0;
2441               to_next += 1;
2442               n_left_to_next -= 1;
2443             }
2444           continue;
2445
2446         unexpected_seq_number0:
2447           next0 = TCP_ESTABLISHED_NEXT_DROP;
2448           error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER;
2449           goto enqueue0;
2450
2451         unexpected_ack_number0:
2452           next0 = TCP_ESTABLISHED_NEXT_DROP;
2453           error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER;
2454           goto enqueue0;
2455
2456         already_received_fin0:
2457           next0 = TCP_ESTABLISHED_NEXT_DROP;
2458           error0 = TCP_ERROR_SEGMENT_AFTER_FIN;
2459           goto enqueue0;
2460         }
2461   
2462       vlib_put_next_frame (vm, node, next, n_left_to_next);
2463     }
2464
2465   if (node->flags & VLIB_NODE_FLAG_TRACE)
2466     /* FIXME */ ;
2467
2468   return frame->n_vectors;
2469 }
2470
2471 static uword
2472 ip4_tcp_established (vlib_main_t * vm,
2473                 vlib_node_runtime_t * node,
2474                 vlib_frame_t * frame)
2475 { return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 0); }
2476
2477 static uword
2478 ip6_tcp_established (vlib_main_t * vm,
2479                 vlib_node_runtime_t * node,
2480                 vlib_frame_t * frame)
2481 { return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 1); }
2482
2483 VLIB_REGISTER_NODE (ip4_tcp_established_node,static) = {
2484   .function = ip4_tcp_established,
2485   .name = "ip4-tcp-established",
2486
2487   .vector_size = sizeof (u32),
2488
2489   .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
2490   .next_nodes = {
2491     [TCP_ESTABLISHED_NEXT_DROP] = "error-drop",
2492   },
2493 };
2494
2495 VLIB_REGISTER_NODE (ip6_tcp_established_node,static) = {
2496   .function = ip6_tcp_established,
2497   .name = "ip6-tcp-established",
2498
2499   .vector_size = sizeof (u32),
2500
2501   .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
2502   .next_nodes = {
2503     [TCP_ESTABLISHED_NEXT_DROP] = "error-drop",
2504   },
2505 };
2506
2507 uword
2508 tcp_register_listener (vlib_main_t * vm,
2509                        tcp_listener_registration_t * r)
2510 {
2511   tcp_main_t * tm = &tcp_main;
2512   tcp_listener_t * l;
2513
2514   {
2515     clib_error_t * error;
2516
2517     if ((error = vlib_call_init_function (vm, tcp_udp_lookup_init)))
2518       clib_error_report (error);
2519   }
2520
2521   pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES);
2522
2523   memset (l, 0, sizeof (l[0]));
2524
2525   l->dst_port = r->port;
2526   l->next_index = vlib_node_add_next (vm, ip4_tcp_established_node.index, r->data_node_index);
2527   l->valid_local_adjacency_bitmap = 0;
2528   l->flags = r->flags & (TCP_LISTENER_IP4 | TCP_LISTENER_IP6);
2529
2530   tm->listener_index_by_dst_port[clib_host_to_net_u16 (l->dst_port)] = l - tm->listener_pool;
2531
2532   return l - tm->listener_pool;
2533 }
2534
2535 static void
2536 tcp_udp_lookup_ip4_add_del_interface_address (ip4_main_t * im,
2537                                               uword opaque,
2538                                               u32 sw_if_index,
2539                                               ip4_address_t * address,
2540                                               u32 address_length,
2541                                               u32 if_address_index,
2542                                               u32 is_delete)
2543 {
2544   tcp_main_t * tm = &tcp_main;
2545
2546   tm->ip4.default_valid_local_adjacency_bitmap
2547     = clib_bitmap_set (tm->ip4.default_valid_local_adjacency_bitmap,
2548                        if_address_index,
2549                        is_delete ? 0 : 1);
2550 }
2551
2552 static void
2553 tcp_udp_lookup_ip6_add_del_interface_address (ip6_main_t * im,
2554                                               uword opaque,
2555                                               u32 sw_if_index,
2556                                               ip6_address_t * address,
2557                                               u32 address_length,
2558                                               u32 if_address_index,
2559                                               u32 is_delete)
2560 {
2561   tcp_main_t * tm = &tcp_main;
2562
2563   tm->ip6.default_valid_local_adjacency_bitmap
2564     = clib_bitmap_set (tm->ip6.default_valid_local_adjacency_bitmap,
2565                        if_address_index,
2566                        is_delete ? 0 : 1);
2567 }
2568
2569 static clib_error_t *
2570 tcp_udp_lookup_init (vlib_main_t * vm)
2571 {
2572   tcp_main_t * tm = &tcp_main;
2573   ip4_main_t * im4 = &ip4_main;
2574   ip6_main_t * im6 = &ip6_main;
2575   clib_error_t * error;
2576
2577   if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
2578     return error;
2579   if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
2580     return error;
2581
2582   tcp_time_init (vm, tm);
2583
2584   {
2585     ip4_add_del_interface_address_callback_t cb;
2586
2587     cb.function = tcp_udp_lookup_ip4_add_del_interface_address;
2588     cb.function_opaque = 0;
2589     vec_add1 (im4->add_del_interface_address_callbacks, cb);
2590   }
2591
2592   {
2593     ip6_add_del_interface_address_callback_t cb;
2594
2595     cb.function = tcp_udp_lookup_ip6_add_del_interface_address;
2596     cb.function_opaque = 0;
2597     vec_add1 (im6->add_del_interface_address_callbacks, cb);
2598   }
2599
2600   tm->ip4.output_node_index = ip4_tcp_output_node.index;
2601   tm->ip6.output_node_index = ip6_tcp_output_node.index;
2602
2603   tcp_lookup_init (vm, tm);
2604   tcp_options_decode_init (tm);
2605
2606   tm->tx_buffer_free_list = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
2607   tm->tx_buffer_free_list_n_buffer_bytes = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
2608
2609   return 0;
2610 }
2611
2612 VLIB_INIT_FUNCTION (tcp_udp_lookup_init);
2613
2614 static u8 * format_tcp_time_stamp (u8 * s, va_list * va)
2615 {
2616   tcp_timer_type_t type = va_arg (*va, tcp_timer_type_t);
2617   u32 value = va_arg (*va, u32);
2618   vlib_main_t * vm = vlib_get_main();
2619   tcp_main_t * tm = &tcp_main;
2620   u64 now;
2621   f64 dt;
2622
2623   now = clib_cpu_time_now ();
2624   dt = vm->clib_time.seconds_per_clock * (now - (value << tm->log2_clocks_per_tick[type]));
2625   return format (s, "%.4e sec", dt);
2626 }
2627
2628 static u8 * format_tcp_connection_state (u8 * s, va_list * va)
2629 {
2630   tcp_connection_state_t st = va_arg (*va, tcp_connection_state_t);
2631   char * t = 0;
2632   switch (st)
2633     {
2634 #define _(f) case TCP_CONNECTION_STATE_##f: t = #f; break;
2635       foreach_tcp_connection_state
2636 #undef _
2637     default: break;
2638     }
2639   if (t)
2640     s = format (s, "%s", t);
2641   else
2642     s = format (s, "unknown 0x%x", st);
2643
2644   return s;
2645 }
2646
2647 static u8 * format_tcp_ip_4_or_6 (u8 * s, va_list * va)
2648 {
2649   tcp_ip_4_or_6_t is_ip6 = va_arg (*va, tcp_ip_4_or_6_t);
2650   return format (s, "%s", is_ip6 ? "ip6" : "ip4");
2651 }
2652
2653 static u8 * format_tcp_mini_connection (u8 * s, va_list * va)
2654 {
2655   tcp_mini_connection_t * c = va_arg (*va, tcp_mini_connection_t *);
2656
2657   s = format (s, "state %U, window scale %d, mss %d",
2658               format_tcp_connection_state, c->state,
2659               c->window_scale, c->max_segment_size);
2660
2661   return s;
2662 }
2663
2664 static u8 * format_ip4_tcp_mini_connection (u8 * s, va_list * va)
2665 {
2666   u32 imin = va_arg (*va, u32);
2667   u32 imin_div, imin_mod;
2668   tcp_main_t * tm = &tcp_main;
2669   tcp_mini_connection_t * min;
2670   ip4_tcp_udp_address_x4_and_timestamps_t * mina;
2671   
2672   imin_div = imin / 4;
2673   imin_mod = imin % 4;
2674
2675   mina = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin_div);
2676
2677   s = format (s, "%U, age %U",
2678               format_ip4_tcp_udp_address_x4, &mina->address_x4, imin_div,
2679               format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]);
2680
2681   min = vec_elt_at_index (tm->ip4.mini_connections, imin);
2682
2683   s = format (s, "%U", format_tcp_mini_connection, min);
2684
2685   return s;
2686 }
2687
2688 static u8 * format_ip6_tcp_mini_connection (u8 * s, va_list * va)
2689 {
2690   u32 imin = va_arg (*va, u32);
2691   u32 imin_div, imin_mod;
2692   tcp_main_t * tm = &tcp_main;
2693   tcp_mini_connection_t * min;
2694   ip6_tcp_udp_address_x4_and_timestamps_t * mina;
2695   
2696   imin_div = imin / 4;
2697   imin_mod = imin % 4;
2698
2699   mina = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin_div);
2700
2701   s = format (s, "%U, age %U",
2702               format_ip6_tcp_udp_address_x4, &mina->address_x4, imin_div,
2703               format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]);
2704
2705   min = vec_elt_at_index (tm->ip6.mini_connections, imin);
2706
2707   s = format (s, "%U", format_tcp_mini_connection, min);
2708
2709   return s;
2710 }
2711
2712 static u8 * format_tcp_established_connection (u8 * s, va_list * va)
2713 {
2714   tcp_connection_t * c = va_arg (*va, tcp_connection_t *);
2715
2716   if (c->flags != 0)
2717     {
2718       s = format (s, ", flags: ");
2719 #define _(f) if (c->flags & TCP_CONNECTION_FLAG_##f) s = format (s, "%s, ", #f);
2720       foreach_tcp_connection_flag;
2721 #undef _
2722     }
2723
2724   if (tcp_round_trip_time_stats_is_valid (&c->round_trip_time_stats))
2725     {
2726       f64 r[2];
2727       tcp_round_trip_time_stats_compute (&c->round_trip_time_stats, r);
2728       s = format (s, ", rtt %.4e +- %.4e",
2729                   r[0], r[1]);
2730     }
2731
2732   return s;
2733 }
2734
2735 static u8 * format_ip4_tcp_established_connection (u8 * s, va_list * va)
2736 {
2737   u32 iest = va_arg (*va, u32);
2738   u32 iest_div, iest_mod;
2739   tcp_main_t * tm = &tcp_main;
2740   tcp_connection_t * est;
2741   ip4_tcp_udp_address_x4_t * esta;
2742   
2743   iest_div = iest / 4;
2744   iest_mod = iest % 4;
2745
2746   esta = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div);
2747   est = vec_elt_at_index (tm->ip4.established_connections, iest);
2748
2749   s = format (s, "%U%U",
2750               format_ip4_tcp_udp_address_x4, esta, iest_mod,
2751               format_tcp_established_connection, est);
2752
2753   return s;
2754 }
2755
2756 static u8 * format_ip6_tcp_established_connection (u8 * s, va_list * va)
2757 {
2758   u32 iest = va_arg (*va, u32);
2759   u32 iest_div, iest_mod;
2760   tcp_main_t * tm = &tcp_main;
2761   tcp_connection_t * est;
2762   ip6_tcp_udp_address_x4_t * esta;
2763   
2764   iest_div = iest / 4;
2765   iest_mod = iest % 4;
2766
2767   esta = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div);
2768   est = vec_elt_at_index (tm->ip6.established_connections, iest);
2769
2770   s = format (s, "%U%U",
2771               format_ip6_tcp_udp_address_x4, esta, iest_mod,
2772               format_tcp_established_connection, est);
2773
2774   return s;
2775 }
2776
2777 VLIB_CLI_COMMAND (vlib_cli_show_tcp_command, static) = {
2778   .path = "show tcp",
2779   .short_help = "Transmission control protocol (TCP) show commands",
2780 };
2781
2782 static clib_error_t *
2783 show_mini_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
2784 {
2785   tcp_main_t * tm = &tcp_main;
2786   ip46_tcp_main_t * tm46;
2787   tcp_ip_4_or_6_t is_ip6 = TCP_IP4;
2788   tcp_mini_connection_t * min;
2789   ip6_tcp_udp_address_x4_and_timestamps_t * mina6;
2790   ip4_tcp_udp_address_x4_and_timestamps_t * mina4;
2791   clib_error_t * error = 0;
2792   uword i, i0, i1, n_valid;
2793
2794   if (unformat (input, "4"))
2795     is_ip6 = TCP_IP4;
2796   if (unformat (input, "6"))
2797     is_ip6 = TCP_IP6;
2798
2799   n_valid = 0;
2800   tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2801   for (i = 0; i <= tm46->mini_connection_hash_mask; i++)
2802     {
2803       i0 = i / 4;
2804       i1 = i % 4;
2805
2806       min = vec_elt_at_index (tm46->mini_connections, i);
2807       if (is_ip6)
2808         {
2809           mina6 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, i0);
2810           if (ip6_tcp_udp_address_x4_is_valid (&mina6->address_x4, i1))
2811             {
2812               vlib_cli_output (vm, "%U", format_ip4_tcp_mini_connection, i);
2813               n_valid += 1;
2814             }
2815         }
2816       else
2817         {
2818           mina4 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, i0);
2819           if (ip4_tcp_udp_address_x4_is_valid (&mina4->address_x4, i1))
2820             {
2821               vlib_cli_output (vm, "%U", format_ip6_tcp_mini_connection, i);
2822               n_valid += 1;
2823             }
2824         }
2825     }
2826
2827   if (n_valid == 0)
2828     vlib_cli_output (vm, "no %U mini tcp connections", format_tcp_ip_4_or_6, is_ip6);
2829
2830   return error;
2831 }
2832
2833 VLIB_CLI_COMMAND (vlib_cli_show_tcp_mini_connections_command) = {
2834   .path = "show tcp mini-connections",
2835   .short_help = "Show not-yet established TCP connections",
2836   .function = show_mini_connections,
2837 };
2838
2839 static clib_error_t *
2840 show_established_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
2841 {
2842   tcp_main_t * tm = &tcp_main;
2843   ip46_tcp_main_t * tm46;
2844   tcp_ip_4_or_6_t is_ip6 = TCP_IP4;
2845   tcp_connection_t * est;
2846   ip6_tcp_udp_address_x4_t * esta6;
2847   ip4_tcp_udp_address_x4_t * esta4;
2848   clib_error_t * error = 0;
2849   uword i, i0, i1, n_valid;
2850
2851   if (unformat (input, "4"))
2852     is_ip6 = TCP_IP4;
2853   if (unformat (input, "6"))
2854     is_ip6 = TCP_IP6;
2855
2856   n_valid = 0;
2857   tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2858   for (i = 0; i < vec_len (tm46->established_connections); i++)
2859     {
2860       i0 = i / 4;
2861       i1 = i % 4;
2862
2863       est = vec_elt_at_index (tm46->established_connections, i);
2864       if (is_ip6)
2865         {
2866           esta6 = vec_elt_at_index (tm->ip6_established_connection_address_hash, i0);
2867           if (ip6_tcp_udp_address_x4_is_valid (esta6, i1))
2868             {
2869               vlib_cli_output (vm, "%U", format_ip6_tcp_established_connection, i);
2870               n_valid += 1;
2871             }
2872         }
2873       else
2874         {
2875           esta4 = vec_elt_at_index (tm->ip4_established_connection_address_hash, i0);
2876           if (ip4_tcp_udp_address_x4_is_valid (esta4, i1))
2877             {
2878               vlib_cli_output (vm, "%U", format_ip4_tcp_established_connection, i);
2879               n_valid += 1;
2880             }
2881         }
2882     }
2883
2884   if (n_valid == 0)
2885     vlib_cli_output (vm, "no %U established tcp connections", format_tcp_ip_4_or_6, is_ip6);
2886
2887   return error;
2888 }
2889
2890 VLIB_CLI_COMMAND (vlib_cli_show_tcp_established_connections_command, static) = {
2891   .path = "show tcp connections",
2892   .short_help = "Show established TCP connections",
2893   .function = show_established_connections,
2894 };
2895
2896 #if 0
2897 uword
2898 tcp_write (vlib_main_t * vm, u32 connection_handle, void * data, uword n_data_bytes)
2899 {
2900   tcp_main_t * tm = &tcp_main;
2901   tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle);
2902   ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2903   tcp_connection_t * c = vec_elt_at_index (tm46->established_connections, connection_handle / 2);
2904   vlib_buffer_t * b;
2905   u32 bi, bi_next, bi_start_of_packet;
2906   ip_csum_t sum;
2907
2908   b = 0;
2909   bi = c->write_tail_buffer_index;
2910   n_bytes_left_tail = 0;
2911   if (bi != 0)
2912     {
2913       b = vlib_get_buffer (vm, bi);
2914       n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes - b->current_length;
2915     }
2916
2917   n_bytes_this_packet = c->write_tail_packet.n_data_bytes;
2918   n_bytes_left_packet = c->max_segment_size - n_bytes_this_packet;
2919
2920   n_data_left = n_data_bytes;
2921   sum = c->write_tail_packet.data_ip_checksum;
2922
2923   while (n_data_left > 0)
2924     {
2925       u32 n_copy;
2926
2927       if (n_bytes_left_tail == 0)
2928         {
2929           if (! vlib_buffer_alloc_from_free_list (vm, &bi_next, 1,
2930                                                   tm->tx_buffer_free_list))
2931             return n_data_bytes - n_data_left;
2932
2933           bi_start_of_packet = bi_next;
2934           if (b)
2935             {
2936               b->flags |= VLIB_BUFFER_NEXT_PRESENT;
2937               b->next_buffer = bi_next;
2938               bi_start_of_packet = b->opaque[0];
2939             }
2940           bi = bi_next;
2941           b = vlib_get_buffer (vm, bi);
2942
2943           /* Save away start of packet buffer in opaque. */
2944           b->opaque[0] = bi_start_of_packet;
2945
2946           c->tail_buffer.buffer_index = bi;
2947           n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes;
2948         }
2949
2950       n_copy = n_data_left;
2951       n_copy = clib_min (n_copy, n_bytes_left_tail);
2952       n_copy = clib_min (n_copy, n_bytes_left_packet);
2953
2954       sum = ip_csum_and_memcpy (sum, b->data + b->current_length,
2955                                 data, n_copy);
2956
2957       b->current_length += n_copy;
2958       n_bytes_left_tail -= n_copy;
2959       n_bytes_left_packet -= n_copy;
2960       n_data_left -=- n_copy;
2961       n_bytes_this_packet += n_copy;
2962
2963       if (n_bytes_left_packet == 0)
2964         {
2965           bi_start_of_packet = b->opaque[0];
2966
2967           if (c->tail_packet.buffer_index != 0)
2968             {
2969               vlib_buffer_t * p = vlib_get_buffer (vm, c->tail_packet.buffer_index);
2970               tcp_buffer_t * next = vlib_get_buffer_opaque (p);
2971               next[0] = c->;
2972             }
2973           c->tail_packet.buffer_index = bi_start_of_packet;
2974         }
2975     }
2976
2977   c->tail_buffer.buffer_index = bi;
2978   c->tail_buffer.n_data_bytes = n_bytes_this_packet;
2979   c->tail_buffer.data_ip_checksum = ip_csum_fold (sum);
2980
2981   return 0;
2982 }
2983 #endif