2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/tcp.c: tcp protocol
18 * Copyright (c) 2011 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/ip/ip.h>
41 #include <vnet/ip/tcp.h>
44 static u8 my_zero_mask_table[256] = {
47 [0xff] = (1 << 0) | (1 << 1),
50 static_always_inline u32 my_zero_mask (u32 x)
52 return ((my_zero_mask_table[(x >> 0) & 0xff] << 0)
53 | (my_zero_mask_table[(x >> 8) & 0xff] << 2));
56 static u8 my_first_set_table[256] = {
63 static_always_inline u32 my_first_set (u32 zero_mask)
65 u8 r0 = my_first_set_table[(zero_mask >> 0) & 0xff];
66 u8 r1 = 2 + my_first_set_table[(zero_mask >> 8) & 0xff];
67 return r0 != 4 ? r0 : r1;
70 static_always_inline void
71 ip4_tcp_udp_address_x4_set_from_headers (ip4_tcp_udp_address_x4_t * a,
76 a->src.as_ip4_address[i] = ip->src_address;
77 a->dst.as_ip4_address[i] = ip->dst_address;
78 a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst;
81 static_always_inline void
82 ip4_tcp_udp_address_x4_copy_and_invalidate (ip4_tcp_udp_address_x4_t * dst,
83 ip4_tcp_udp_address_x4_t * src,
86 #define _(d,s) d = s; s = 0;
87 _ (dst->src.as_ip4_address[dst_i].as_u32, src->src.as_ip4_address[src_i].as_u32);
88 _ (dst->dst.as_ip4_address[dst_i].as_u32, src->dst.as_ip4_address[src_i].as_u32);
89 _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32);
93 static_always_inline void
94 ip4_tcp_udp_address_x4_invalidate (ip4_tcp_udp_address_x4_t * a, u32 i)
96 a->src.as_ip4_address[i].as_u32 = 0;
97 a->dst.as_ip4_address[i].as_u32 = 0;
98 a->ports.as_ports[i].as_u32 = 0;
101 static_always_inline uword
102 ip4_tcp_udp_address_x4_is_valid (ip4_tcp_udp_address_x4_t * a, u32 i)
104 return !(a->src.as_ip4_address[i].as_u32 == 0
105 && a->dst.as_ip4_address[i].as_u32 == 0
106 && a->ports.as_ports[i].as_u32 == 0);
109 #ifdef TCP_HAVE_VEC128
110 static_always_inline uword
111 ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4,
112 u32x4 src, u32x4 dst, u32x4 ports)
117 r = u32x4_is_equal (src, ax4->src.as_u32x4);
118 r &= u32x4_is_equal (dst, ax4->dst.as_u32x4);
119 r &= u32x4_is_equal (ports, ax4->ports.as_u32x4);
121 /* At this point r will be either all zeros (if nothing matched)
122 or have 32 1s in the position that did match. */
123 m = u8x16_compare_byte_mask ((u8x16) r);
128 static_always_inline uword
129 ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4,
133 u32x4 src = u32x4_splat (ip->src_address.as_u32);
134 u32x4 dst = u32x4_splat (ip->dst_address.as_u32);
135 u32x4 ports = u32x4_splat (tcp->ports.src_and_dst);
136 return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, src, dst, ports));
139 static_always_inline uword
140 ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4)
143 return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero));
146 static_always_inline uword
147 ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4)
150 return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero));
152 #else /* TCP_HAVE_VEC128 */
153 static_always_inline uword
154 ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4,
155 u32 src, u32 dst, u32 ports)
160 r##i = (src == ax4->src.as_ip4_address[i].as_u32 \
161 && dst == ax4->dst.as_ip4_address[i].as_u32 \
162 && ports == ax4->ports.as_ports[i].as_u32)
171 return (((r0 ? 0xf : 0x0) << 0)
172 | ((r1 ? 0xf : 0x0) << 4)
173 | ((r2 ? 0xf : 0x0) << 8)
174 | ((r3 ? 0xf : 0x0) << 12));
177 static_always_inline uword
178 ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4,
182 return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4,
183 ip->src_address.as_u32,
184 ip->dst_address.as_u32,
185 tcp->ports.src_and_dst));
188 static_always_inline uword
189 ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4)
191 return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0));
194 static_always_inline uword
195 ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4)
197 return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0));
201 static u8 * format_ip4_tcp_udp_address_x4 (u8 * s, va_list * va)
203 ip4_tcp_udp_address_x4_t * a = va_arg (*va, ip4_tcp_udp_address_x4_t *);
204 u32 ai = va_arg (*va, u32);
207 s = format (s, "%U:%d -> %U:%d",
208 format_ip4_address, &a->src.as_ip4_address[ai],
209 clib_net_to_host_u16 (a->ports.as_ports[ai].src),
210 format_ip4_address, &a->dst.as_ip4_address[ai],
211 clib_net_to_host_u16 (a->ports.as_ports[ai].dst));
216 static_always_inline void
217 ip6_tcp_udp_address_x4_set_from_headers (ip6_tcp_udp_address_x4_t * a,
222 a->src.as_u32[0][i] = ip->src_address.as_u32[0];
223 a->src.as_u32[1][i] = ip->src_address.as_u32[1];
224 a->src.as_u32[2][i] = ip->src_address.as_u32[2];
225 a->src.as_u32[3][i] = ip->src_address.as_u32[3];
226 a->dst.as_u32[0][i] = ip->dst_address.as_u32[0];
227 a->dst.as_u32[1][i] = ip->dst_address.as_u32[1];
228 a->dst.as_u32[2][i] = ip->dst_address.as_u32[2];
229 a->dst.as_u32[3][i] = ip->dst_address.as_u32[3];
230 a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst;
233 static_always_inline void
234 ip6_tcp_udp_address_x4_copy_and_invalidate (ip6_tcp_udp_address_x4_t * dst,
235 ip6_tcp_udp_address_x4_t * src,
236 u32 dst_i, u32 src_i)
238 #define _(d,s) d = s; s = 0;
239 _ (dst->src.as_u32[0][dst_i], src->src.as_u32[0][src_i]);
240 _ (dst->src.as_u32[1][dst_i], src->src.as_u32[1][src_i]);
241 _ (dst->src.as_u32[2][dst_i], src->src.as_u32[2][src_i]);
242 _ (dst->src.as_u32[3][dst_i], src->src.as_u32[3][src_i]);
243 _ (dst->dst.as_u32[0][dst_i], src->dst.as_u32[0][src_i]);
244 _ (dst->dst.as_u32[1][dst_i], src->dst.as_u32[1][src_i]);
245 _ (dst->dst.as_u32[2][dst_i], src->dst.as_u32[2][src_i]);
246 _ (dst->dst.as_u32[3][dst_i], src->dst.as_u32[3][src_i]);
247 _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32);
251 static_always_inline void
252 ip6_tcp_udp_address_x4_invalidate (ip6_tcp_udp_address_x4_t * a, u32 i)
254 a->src.as_u32[0][i] = 0;
255 a->src.as_u32[1][i] = 0;
256 a->src.as_u32[2][i] = 0;
257 a->src.as_u32[3][i] = 0;
258 a->dst.as_u32[0][i] = 0;
259 a->dst.as_u32[1][i] = 0;
260 a->dst.as_u32[2][i] = 0;
261 a->dst.as_u32[3][i] = 0;
262 a->ports.as_ports[i].as_u32 = 0;
265 static_always_inline uword
266 ip6_tcp_udp_address_x4_is_valid (ip6_tcp_udp_address_x4_t * a, u32 i)
268 return !(a->src.as_u32[0][i] == 0
269 && a->src.as_u32[1][i] == 0
270 && a->src.as_u32[2][i] == 0
271 && a->src.as_u32[3][i] == 0
272 && a->dst.as_u32[0][i] == 0
273 && a->dst.as_u32[1][i] == 0
274 && a->dst.as_u32[2][i] == 0
275 && a->dst.as_u32[3][i] == 0
276 && a->ports.as_ports[i].as_u32 == 0);
279 #ifdef TCP_HAVE_VEC128
280 static_always_inline uword
281 ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4,
282 u32x4 src0, u32x4 src1, u32x4 src2, u32x4 src3,
283 u32x4 dst0, u32x4 dst1, u32x4 dst2, u32x4 dst3,
289 r = u32x4_is_equal (src0, ax4->src.as_u32x4[0]);
290 r &= u32x4_is_equal (src1, ax4->src.as_u32x4[1]);
291 r &= u32x4_is_equal (src2, ax4->src.as_u32x4[2]);
292 r &= u32x4_is_equal (src3, ax4->src.as_u32x4[3]);
293 r &= u32x4_is_equal (dst0, ax4->dst.as_u32x4[0]);
294 r &= u32x4_is_equal (dst1, ax4->dst.as_u32x4[1]);
295 r &= u32x4_is_equal (dst2, ax4->dst.as_u32x4[2]);
296 r &= u32x4_is_equal (dst3, ax4->dst.as_u32x4[3]);
297 r &= u32x4_is_equal (ports, ax4->ports.as_u32x4);
299 /* At this point r will be either all zeros (if nothing matched)
300 or have 32 1s in the position that did match. */
301 m = u8x16_compare_byte_mask ((u8x16) r);
306 static_always_inline uword
307 ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4,
311 u32x4 src0 = u32x4_splat (ip->src_address.as_u32[0]);
312 u32x4 src1 = u32x4_splat (ip->src_address.as_u32[1]);
313 u32x4 src2 = u32x4_splat (ip->src_address.as_u32[2]);
314 u32x4 src3 = u32x4_splat (ip->src_address.as_u32[3]);
315 u32x4 dst0 = u32x4_splat (ip->dst_address.as_u32[0]);
316 u32x4 dst1 = u32x4_splat (ip->dst_address.as_u32[1]);
317 u32x4 dst2 = u32x4_splat (ip->dst_address.as_u32[2]);
318 u32x4 dst3 = u32x4_splat (ip->dst_address.as_u32[3]);
319 u32x4 ports = u32x4_splat (tcp->ports.src_and_dst);
320 return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
321 src0, src1, src2, src3,
322 dst0, dst1, dst2, dst3,
326 static_always_inline uword
327 ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4)
330 return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
331 zero, zero, zero, zero,
332 zero, zero, zero, zero,
336 static_always_inline uword
337 ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4)
340 return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4,
341 zero, zero, zero, zero,
342 zero, zero, zero, zero,
345 #else /* TCP_HAVE_VEC128 */
346 static_always_inline uword
347 ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4,
348 u32 src0, u32 src1, u32 src2, u32 src3,
349 u32 dst0, u32 dst1, u32 dst2, u32 dst3,
355 r##i = (src0 == ax4->src.as_u32[i][0] \
356 && src1 == ax4->src.as_u32[i][1] \
357 && src2 == ax4->src.as_u32[i][2] \
358 && src3 == ax4->src.as_u32[i][3] \
359 && dst0 == ax4->dst.as_u32[i][0] \
360 && dst1 == ax4->dst.as_u32[i][1] \
361 && dst2 == ax4->dst.as_u32[i][2] \
362 && dst3 == ax4->dst.as_u32[i][3] \
363 && ports == ax4->ports.as_ports[i].as_u32)
372 return (((r0 ? 0xf : 0x0) << 0)
373 | ((r1 ? 0xf : 0x0) << 4)
374 | ((r2 ? 0xf : 0x0) << 8)
375 | ((r3 ? 0xf : 0x0) << 12));
378 static_always_inline uword
379 ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4,
383 u32 src0 = ip->src_address.as_u32[0];
384 u32 src1 = ip->src_address.as_u32[1];
385 u32 src2 = ip->src_address.as_u32[2];
386 u32 src3 = ip->src_address.as_u32[3];
387 u32 dst0 = ip->dst_address.as_u32[0];
388 u32 dst1 = ip->dst_address.as_u32[1];
389 u32 dst2 = ip->dst_address.as_u32[2];
390 u32 dst3 = ip->dst_address.as_u32[3];
391 u32 ports = tcp->ports.src_and_dst;
392 return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
393 src0, src1, src2, src3,
394 dst0, dst1, dst2, dst3,
398 static_always_inline uword
399 ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4)
401 return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
407 static_always_inline uword
408 ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4)
410 return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4,
415 #endif /* ! TCP_HAVE_VEC128 */
417 static u8 * format_ip6_tcp_udp_address_x4 (u8 * s, va_list * va)
419 ip6_tcp_udp_address_x4_t * a = va_arg (*va, ip6_tcp_udp_address_x4_t *);
420 u32 i, ai = va_arg (*va, u32);
421 ip6_address_t src, dst;
424 for (i = 0; i < 4; i++)
426 src.as_u32[i] = a->src.as_u32[i][ai];
427 dst.as_u32[i] = a->dst.as_u32[i][ai];
430 s = format (s, "%U:%d -> %U:%d",
431 format_ip6_address, &src,
432 clib_net_to_host_u16 (a->ports.as_ports[ai].src),
433 format_ip6_address, &dst,
434 clib_net_to_host_u16 (a->ports.as_ports[ai].dst));
439 static_always_inline u32
440 find_oldest_timestamp_x4 (u32 * time_stamps, u32 now)
442 u32 dt0, dt_min0, i_min0;
443 u32 dt1, dt_min1, i_min1;
446 dt_min0 = now - time_stamps[0];
447 dt_min1 = now - time_stamps[2];
448 dt0 = now - time_stamps[1];
449 dt1 = now - time_stamps[3];
451 i_min0 += dt0 > dt_min0;
452 i_min1 += dt1 > dt_min1;
454 dt_min0 = i_min0 > 0 ? dt0 : dt_min0;
455 dt_min1 = i_min1 > 0 ? dt1 : dt_min1;
457 return dt_min0 > dt_min1 ? i_min0 : (2 + i_min1);
460 static_always_inline uword
461 tcp_round_trip_time_stats_is_valid (tcp_round_trip_time_stats_t * s)
462 { return s->count > 0; }
464 static_always_inline void
465 tcp_round_trip_time_stats_compute (tcp_round_trip_time_stats_t * s, f64 * r)
468 ASSERT (s->count > 0);
469 ave = s->sum / s->count;
470 rms = sqrt (s->sum2 / s->count - ave*ave);
476 tcp_option_type_t type : 8;
478 u32 my_time_stamp, his_time_stamp;
479 } __attribute__ ((packed)) tcp_time_stamp_option_t;
486 tcp_option_type_t type : 8;
492 tcp_option_type_t type : 8;
495 } __attribute__ ((packed)) window_scale;
499 tcp_time_stamp_option_t time_stamp;
500 } __attribute__ ((packed)) options;
501 } __attribute__ ((packed)) tcp_syn_packet_t;
509 tcp_time_stamp_option_t time_stamp;
511 } __attribute__ ((packed)) tcp_ack_packet_t;
515 tcp_syn_packet_t tcp;
516 } ip4_tcp_syn_packet_t;
520 tcp_ack_packet_t tcp;
521 } ip4_tcp_ack_packet_t;
525 tcp_syn_packet_t tcp;
526 } ip6_tcp_syn_packet_t;
530 tcp_ack_packet_t tcp;
531 } ip6_tcp_ack_packet_t;
533 static_always_inline void
534 ip4_tcp_packet_init (ip4_header_t * ip, u32 n_bytes)
536 ip->ip_version_and_header_length = 0x45;
538 ip->tos = ip4_main.host_config.tos;
539 ip->ttl = ip4_main.host_config.ttl;
541 /* No need to set fragment ID due to DF bit. */
542 ip->flags_and_fragment_offset = clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
544 ip->protocol = IP_PROTOCOL_TCP;
546 ip->length = clib_host_to_net_u16 (n_bytes);
548 ip->checksum = ip4_header_checksum (ip);
551 static_always_inline void
552 ip6_tcp_packet_init (ip6_header_t * ip, u32 n_bytes)
554 ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
556 ip->payload_length = clib_host_to_net_u16 (n_bytes - sizeof (ip[0]));
558 ip->hop_limit = ip6_main.host_config.ttl;
561 static_always_inline u32
562 tcp_time_now (tcp_main_t * tm, tcp_timer_type_t t)
564 ASSERT (t < ARRAY_LEN (tm->log2_clocks_per_tick));
565 return clib_cpu_time_now () >> tm->log2_clocks_per_tick[t];
569 tcp_time_init (vlib_main_t * vm, tcp_main_t * tm)
572 f64 log2 = .69314718055994530941;
574 for (i = 0; i < ARRAY_LEN (tm->log2_clocks_per_tick); i++)
581 tm->log2_clocks_per_tick[i] =
582 flt_round_nearest (log (t[i] / vm->clib_time.seconds_per_clock) / log2);
583 tm->secs_per_tick[i] = vm->clib_time.seconds_per_clock * (1 << tm->log2_clocks_per_tick[i]);
590 TCP_LOOKUP_NEXT_DROP,
591 TCP_LOOKUP_NEXT_PUNT,
592 TCP_LOOKUP_NEXT_LISTEN_SYN,
593 TCP_LOOKUP_NEXT_LISTEN_ACK,
594 TCP_LOOKUP_NEXT_CONNECT_SYN_ACK,
595 TCP_LOOKUP_NEXT_ESTABLISHED,
599 #define foreach_tcp_error \
600 _ (NONE, "no error") \
601 _ (LOOKUP_DROPS, "lookup drops") \
602 _ (LISTEN_RESPONSES, "listen responses sent") \
603 _ (CONNECTS_SENT, "connects sent") \
604 _ (LISTENS_ESTABLISHED, "listens connected") \
605 _ (UNEXPECTED_SEQ_NUMBER, "unexpected sequence number drops") \
606 _ (UNEXPECTED_ACK_NUMBER, "unexpected acknowledgment number drops") \
607 _ (CONNECTS_ESTABLISHED, "connects established") \
608 _ (NO_LISTENER_FOR_PORT, "no listener for port") \
609 _ (WRONG_LOCAL_ADDRESS_FOR_PORT, "wrong local address for port") \
610 _ (ACKS_SENT, "acks sent for established connections") \
611 _ (NO_DATA, "acks with no data") \
612 _ (FINS_RECEIVED, "fins received") \
613 _ (SEGMENT_AFTER_FIN, "segments dropped after fin received") \
614 _ (CONNECTIONS_CLOSED, "connections closed")
617 #define _(sym,str) TCP_ERROR_##sym,
623 #ifdef TCP_HAVE_VEC128
624 static_always_inline u32x4 u32x4_splat_x2 (u32 x)
626 u32x4 r = u32x4_set0 (x);
627 return u32x4_interleave_lo (r, r);
630 static_always_inline u32x4 u32x4_set_x2 (u32 x, u32 y)
632 u32x4 r0 = u32x4_set0 (x);
633 u32x4 r1 = u32x4_set0 (y);
634 return u32x4_interleave_lo (r0, r1);
638 #define u32x4_get(x,i) \
639 __builtin_ia32_vec_ext_v4si ((i32x4) (x), (int) (i))
640 #else /* TCP_HAVE_VEC128 */
641 #endif /* TCP_HAVE_VEC128 */
643 /* Dispatching on tcp/udp listeners (by dst port)
644 and tcp/udp connections (by src/dst address/port). */
645 static_always_inline uword
646 ip46_tcp_lookup (vlib_main_t * vm,
647 vlib_node_runtime_t * node,
648 vlib_frame_t * frame,
651 tcp_main_t * tm = &tcp_main;
652 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
653 uword n_packets = frame->n_vectors;
654 u32 * from, * to_next;
655 u32 n_left_from, n_left_to_next, next, mini_now;
656 vlib_node_runtime_t * error_node = node;
658 from = vlib_frame_vector_args (frame);
659 n_left_from = n_packets;
660 next = node->cached_next_index;
661 mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection);
663 while (n_left_from > 0)
665 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
667 while (n_left_from > 0 && n_left_to_next > 0)
673 u32 bi0, imin0, iest0, li0;
674 tcp_connection_state_t state0;
676 u8 min_match0, est_match0, is_min_match0, is_est_match0;
677 u8 min_oldest0, est_first_empty0;
679 bi0 = to_next[0] = from[0];
686 p0 = vlib_get_buffer (vm, bi0);
688 #ifdef TCP_HAVE_VEC128
692 a0 = tm->connection_hash_seeds[is_ip6][0].as_u32x4;
693 b0 = tm->connection_hash_seeds[is_ip6][1].as_u32x4;
694 c0 = tm->connection_hash_seeds[is_ip6][2].as_u32x4;
698 ip60 = vlib_buffer_get_current (p0);
699 tcp0 = ip6_next_header (ip60);
701 a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[0]);
702 b0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[1]);
703 c0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[2]);
705 hash_v3_mix_u32x (a0, b0, c0);
707 a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[3]);
708 b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[0]);
709 c0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[1]);
711 hash_v3_mix_u32x (a0, b0, c0);
713 a0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[2]);
714 b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[3]);
715 c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst);
719 ip40 = vlib_buffer_get_current (p0);
720 tcp0 = ip4_next_header (ip40);
722 a0 ^= u32x4_splat_x2 (ip40->src_address.as_u32);
723 b0 ^= u32x4_splat_x2 (ip40->dst_address.as_u32);
724 c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst);
727 hash_v3_finalize_u32x (a0, b0, c0);
729 c0 &= tm->connection_hash_masks[is_ip6].as_u32x4;
731 imin0 = u32x4_get0 (c0);
732 iest0 = u32x4_get (c0, 1);
736 u32 a00, a01, b00, b01, c00, c01;
738 a00 = tm->connection_hash_seeds[is_ip6][0].as_u32[0];
739 a01 = tm->connection_hash_seeds[is_ip6][0].as_u32[1];
740 b00 = tm->connection_hash_seeds[is_ip6][1].as_u32[0];
741 b01 = tm->connection_hash_seeds[is_ip6][1].as_u32[1];
742 c00 = tm->connection_hash_seeds[is_ip6][2].as_u32[0];
743 c01 = tm->connection_hash_seeds[is_ip6][2].as_u32[1];
747 ip60 = vlib_buffer_get_current (p0);
748 tcp0 = ip6_next_header (ip60);
750 a00 ^= ip60->src_address.as_u32[0];
751 a01 ^= ip60->src_address.as_u32[0];
752 b00 ^= ip60->src_address.as_u32[1];
753 b01 ^= ip60->src_address.as_u32[1];
754 c00 ^= ip60->src_address.as_u32[2];
755 c01 ^= ip60->src_address.as_u32[2];
757 hash_v3_mix32 (a00, b00, c00);
758 hash_v3_mix32 (a01, b01, c01);
760 a00 ^= ip60->src_address.as_u32[3];
761 a01 ^= ip60->src_address.as_u32[3];
762 b00 ^= ip60->dst_address.as_u32[0];
763 b01 ^= ip60->dst_address.as_u32[0];
764 c00 ^= ip60->dst_address.as_u32[1];
765 c01 ^= ip60->dst_address.as_u32[1];
767 hash_v3_mix32 (a00, b00, c00);
768 hash_v3_mix32 (a01, b01, c01);
770 a00 ^= ip60->dst_address.as_u32[2];
771 a01 ^= ip60->dst_address.as_u32[2];
772 b00 ^= ip60->dst_address.as_u32[3];
773 b01 ^= ip60->dst_address.as_u32[3];
774 c00 ^= tcp0->ports.src_and_dst;
775 c01 ^= tcp0->ports.src_and_dst;
779 ip40 = vlib_buffer_get_current (p0);
780 tcp0 = ip4_next_header (ip40);
782 a00 ^= ip40->src_address.as_u32;
783 a01 ^= ip40->src_address.as_u32;
784 b00 ^= ip40->dst_address.as_u32;
785 b01 ^= ip40->dst_address.as_u32;
786 c00 ^= tcp0->ports.src_and_dst;
787 c01 ^= tcp0->ports.src_and_dst;
790 hash_v3_finalize32 (a00, b00, c00);
791 hash_v3_finalize32 (a01, b01, c01);
793 c00 &= tm->connection_hash_masks[is_ip6].as_u32[0];
794 c01 &= tm->connection_hash_masks[is_ip6].as_u32[1];
803 ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
804 ip6_tcp_udp_address_x4_t * esta0;
806 mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0);
807 esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0);
809 min_match0 = ip6_tcp_udp_address_x4_match (&mina0->address_x4, ip60, tcp0);
810 est_match0 = ip6_tcp_udp_address_x4_match (esta0, ip60, tcp0);
812 min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now);
813 est_first_empty0 = ip6_tcp_udp_address_x4_first_empty (esta0);
815 if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0))
817 /* Lookup in overflow hash. */
823 ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
824 ip4_tcp_udp_address_x4_t * esta0;
826 mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0);
827 esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0);
829 min_match0 = ip4_tcp_udp_address_x4_match (&mina0->address_x4, ip40, tcp0);
830 est_match0 = ip4_tcp_udp_address_x4_match (esta0, ip40, tcp0);
832 min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now);
833 est_first_empty0 = ip4_tcp_udp_address_x4_first_empty (esta0);
835 if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0))
837 /* Lookup in overflow hash. */
842 is_min_match0 = min_match0 < 4;
843 is_est_match0 = est_match0 < 4;
845 imin0 = 4 * imin0 + (is_min_match0 ? min_match0 : min_oldest0);
846 iest0 = 4 * iest0 + (is_est_match0 ? est_match0 : est_first_empty0);
848 /* Should simultaneously not match both in mini and established connection tables. */
849 ASSERT (! (is_min_match0 && is_est_match0));
852 tcp_mini_connection_t * min0;
853 tcp_connection_t * est0;
854 tcp_sequence_pair_t * seq_pair0;
857 min0 = vec_elt_at_index (tm46->mini_connections, imin0);
858 est0 = vec_elt_at_index (tm46->established_connections, iest0);
862 ASSERT (min0->state != TCP_CONNECTION_STATE_unused);
863 ASSERT (min0->state != TCP_CONNECTION_STATE_established);
866 seq_pair0 = is_min_match0 ? &min0->sequence_numbers : &est0->sequence_numbers;
868 state0 = is_min_match0 ? min0->state : TCP_CONNECTION_STATE_unused;
869 state0 = is_est_match0 ? TCP_CONNECTION_STATE_established : state0;
871 vnet_buffer (p0)->ip.tcp.established_connection_index = iest0;
872 vnet_buffer (p0)->ip.tcp.mini_connection_index = imin0;
873 vnet_buffer (p0)->ip.tcp.listener_index = li0 = tm->listener_index_by_dst_port[tcp0->ports.dst];
875 flags0 = tcp0->flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_FIN);
877 next0 = tm->disposition_by_state_and_flags[state0][flags0].next;
878 error0 = tm->disposition_by_state_and_flags[state0][flags0].error;
880 next0 = li0 != 0 ? next0 : TCP_LOOKUP_NEXT_PUNT;
881 error0 = li0 != 0 ? error0 : TCP_ERROR_NO_LISTENER_FOR_PORT;
884 p0->error = error_node->errors[error0];
886 if (PREDICT_FALSE (next0 != next))
891 vlib_put_next_frame (vm, node, next, n_left_to_next);
894 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
901 vlib_put_next_frame (vm, node, next, n_left_to_next);
904 if (node->flags & VLIB_NODE_FLAG_TRACE)
907 return frame->n_vectors;
911 ip4_tcp_lookup (vlib_main_t * vm,
912 vlib_node_runtime_t * node,
913 vlib_frame_t * frame)
914 { return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 0); }
917 ip6_tcp_lookup (vlib_main_t * vm,
918 vlib_node_runtime_t * node,
919 vlib_frame_t * frame)
920 { return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 1); }
923 ip46_size_hash_tables (ip46_tcp_main_t * m)
925 m->mini_connection_hash_mask = pow2_mask (m->log2_n_mini_connection_hash_elts);
926 vec_validate_aligned (m->mini_connections,
927 m->mini_connection_hash_mask,
928 CLIB_CACHE_LINE_BYTES);
930 m->established_connection_hash_mask = pow2_mask (m->log2_n_established_connection_hash_elts);
931 vec_validate_aligned (m->established_connections,
932 m->established_connection_hash_mask,
933 CLIB_CACHE_LINE_BYTES);
937 ip46_tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm, int is_ip6)
939 ip46_tcp_main_t * m = is_ip6 ? &tm->ip6 : &tm->ip4;
943 m->log2_n_mini_connection_hash_elts = 8;
944 m->log2_n_established_connection_hash_elts = 8;
945 ip46_size_hash_tables (m);
949 vec_validate_aligned (tm->ip6_mini_connection_address_hash,
950 m->mini_connection_hash_mask / 4,
951 CLIB_CACHE_LINE_BYTES);
952 vec_validate_aligned (tm->ip6_established_connection_address_hash,
953 m->established_connection_hash_mask / 4,
954 CLIB_CACHE_LINE_BYTES);
958 vec_validate_aligned (tm->ip4_mini_connection_address_hash,
959 m->mini_connection_hash_mask / 4,
960 CLIB_CACHE_LINE_BYTES);
961 vec_validate_aligned (tm->ip4_established_connection_address_hash,
962 m->established_connection_hash_mask / 4,
963 CLIB_CACHE_LINE_BYTES);
965 tm->connection_hash_masks[is_ip6].as_u32[0] = m->mini_connection_hash_mask / 4;
966 tm->connection_hash_masks[is_ip6].as_u32[1] = m->established_connection_hash_mask / 4;
970 tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm)
974 /* Initialize hash seeds. */
975 for (is_ip6 = 0; is_ip6 < 2; is_ip6++)
977 u32 * r = clib_random_buffer_get_data (&vm->random_buffer, 3 * 2 * sizeof (r[0]));
978 tm->connection_hash_seeds[is_ip6][0].as_u32[0] = r[0];
979 tm->connection_hash_seeds[is_ip6][0].as_u32[1] = r[1];
980 tm->connection_hash_seeds[is_ip6][1].as_u32[0] = r[2];
981 tm->connection_hash_seeds[is_ip6][1].as_u32[1] = r[3];
982 tm->connection_hash_seeds[is_ip6][2].as_u32[0] = r[4];
983 tm->connection_hash_seeds[is_ip6][2].as_u32[1] = r[5];
985 ip46_tcp_lookup_init (vm, tm, is_ip6);
991 pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES);
993 /* Null listener must always have zero index. */
994 ASSERT (l - tm->listener_pool == 0);
996 memset (l, 0, sizeof (l[0]));
998 /* No adjacencies are valid. */
999 l->valid_local_adjacency_bitmap = 0;
1001 vec_validate_init_empty (tm->listener_index_by_dst_port,
1003 l - tm->listener_pool);
1006 /* Initialize disposition table. */
1009 for (i = 0; i < ARRAY_LEN (tm->disposition_by_state_and_flags); i++)
1010 for (j = 0; j < ARRAY_LEN (tm->disposition_by_state_and_flags[i]); j++)
1012 tm->disposition_by_state_and_flags[i][j].next = TCP_LOOKUP_NEXT_DROP;
1013 tm->disposition_by_state_and_flags[i][j].error = TCP_ERROR_LOOKUP_DROPS;
1016 #define _(t,f,n,e) \
1018 tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].next = (n); \
1019 tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].error = (e); \
1022 /* SYNs for new connections -> tcp-listen. */
1023 _ (unused, TCP_FLAG_SYN,
1024 TCP_LOOKUP_NEXT_LISTEN_SYN, TCP_ERROR_NONE);
1025 _ (listen_ack_wait, TCP_FLAG_ACK,
1026 TCP_LOOKUP_NEXT_LISTEN_ACK, TCP_ERROR_NONE);
1027 _ (established, TCP_FLAG_ACK,
1028 TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE);
1029 _ (established, TCP_FLAG_FIN | TCP_FLAG_ACK,
1030 TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE);
1035 /* IP4 packet templates. */
1037 ip4_tcp_syn_packet_t ip4_syn, ip4_syn_ack;
1038 ip4_tcp_ack_packet_t ip4_ack, ip4_fin_ack, ip4_rst_ack;
1039 ip6_tcp_syn_packet_t ip6_syn, ip6_syn_ack;
1040 ip6_tcp_ack_packet_t ip6_ack, ip6_fin_ack, ip6_rst_ack;
1042 memset (&ip4_syn, 0, sizeof (ip4_syn));
1043 memset (&ip4_syn_ack, 0, sizeof (ip4_syn_ack));
1044 memset (&ip4_ack, 0, sizeof (ip4_ack));
1045 memset (&ip4_fin_ack, 0, sizeof (ip4_fin_ack));
1046 memset (&ip4_rst_ack, 0, sizeof (ip4_rst_ack));
1047 memset (&ip6_syn, 0, sizeof (ip6_syn));
1048 memset (&ip6_syn_ack, 0, sizeof (ip6_syn_ack));
1049 memset (&ip6_ack, 0, sizeof (ip6_ack));
1050 memset (&ip6_fin_ack, 0, sizeof (ip6_fin_ack));
1051 memset (&ip6_rst_ack, 0, sizeof (ip6_rst_ack));
1053 ip4_tcp_packet_init (&ip4_syn.ip4, sizeof (ip4_syn));
1054 ip4_tcp_packet_init (&ip4_syn_ack.ip4, sizeof (ip4_syn_ack));
1055 ip4_tcp_packet_init (&ip4_ack.ip4, sizeof (ip4_ack));
1056 ip4_tcp_packet_init (&ip4_fin_ack.ip4, sizeof (ip4_fin_ack));
1057 ip4_tcp_packet_init (&ip4_rst_ack.ip4, sizeof (ip4_rst_ack));
1059 ip6_tcp_packet_init (&ip6_syn.ip6, sizeof (ip6_syn));
1060 ip6_tcp_packet_init (&ip6_syn_ack.ip6, sizeof (ip6_syn_ack));
1061 ip6_tcp_packet_init (&ip6_ack.ip6, sizeof (ip6_ack));
1062 ip6_tcp_packet_init (&ip6_fin_ack.ip6, sizeof (ip6_fin_ack));
1063 ip6_tcp_packet_init (&ip6_rst_ack.ip6, sizeof (ip6_rst_ack));
1067 u8 window_scale = 7;
1068 tcp_syn_packet_t * s = &ip4_syn.tcp;
1069 tcp_syn_packet_t * sa = &ip4_syn_ack.tcp;
1070 tcp_ack_packet_t * a = &ip4_ack.tcp;
1071 tcp_ack_packet_t * fa = &ip4_fin_ack.tcp;
1072 tcp_ack_packet_t * ra = &ip4_rst_ack.tcp;
1074 s->header.tcp_header_u32s_and_reserved = (sizeof (s[0]) / sizeof (u32)) << 4;
1075 a->header.tcp_header_u32s_and_reserved = (sizeof (a[0]) / sizeof (u32)) << 4;
1077 s->header.flags = TCP_FLAG_SYN;
1078 a->header.flags = TCP_FLAG_ACK;
1080 s->header.window = clib_host_to_net_u16 (32 << (10 - window_scale));
1081 a->header.window = s->header.window;
1083 s->options.mss.type = TCP_OPTION_MSS;
1084 s->options.mss.length = 4;
1086 s->options.window_scale.type = TCP_OPTION_WINDOW_SCALE;
1087 s->options.window_scale.length = 3;
1088 s->options.window_scale.value = window_scale;
1090 s->options.time_stamp.type = TCP_OPTION_TIME_STAMP;
1091 s->options.time_stamp.length = 10;
1093 memset (&s->options.nops, TCP_OPTION_NOP, sizeof (s->options.nops));
1095 /* SYN-ACK is same as SYN but with ACK flag set. */
1097 sa->header.flags |= TCP_FLAG_ACK;
1099 a->options.time_stamp.type = TCP_OPTION_TIME_STAMP;
1100 a->options.time_stamp.length = 10;
1101 memset (&a->options.nops, TCP_OPTION_NOP, sizeof (a->options.nops));
1103 /* {FIN,RST}-ACK are same as ACK but with {FIN,RST} flag set. */
1105 fa->header.flags |= TCP_FLAG_FIN;
1107 ra->header.flags |= TCP_FLAG_RST;
1109 /* IP6 TCP headers are identical. */
1111 ip6_syn_ack.tcp = sa[0];
1113 ip6_fin_ack.tcp = fa[0];
1114 ip6_rst_ack.tcp = ra[0];
1116 /* TCP checksums. */
1120 sum = clib_host_to_net_u32 (sizeof (ip4_ack.tcp) + (ip4_ack.ip4.protocol << 16));
1121 sum = ip_incremental_checksum (sum, &ip4_ack.tcp, sizeof (ip4_ack.tcp));
1122 ip4_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1124 sum = clib_host_to_net_u32 (sizeof (ip4_fin_ack.tcp) + (ip4_fin_ack.ip4.protocol << 16));
1125 sum = ip_incremental_checksum (sum, &ip4_fin_ack.tcp, sizeof (ip4_fin_ack.tcp));
1126 ip4_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1128 sum = clib_host_to_net_u32 (sizeof (ip4_rst_ack.tcp) + (ip4_rst_ack.ip4.protocol << 16));
1129 sum = ip_incremental_checksum (sum, &ip4_rst_ack.tcp, sizeof (ip4_rst_ack.tcp));
1130 ip4_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1132 sum = clib_host_to_net_u32 (sizeof (ip4_syn.tcp) + (ip4_syn.ip4.protocol << 16));
1133 sum = ip_incremental_checksum (sum, &ip4_syn.tcp, sizeof (ip4_syn.tcp));
1134 ip4_syn.tcp.header.checksum = ~ ip_csum_fold (sum);
1136 sum = clib_host_to_net_u32 (sizeof (ip4_syn_ack.tcp) + (ip4_syn_ack.ip4.protocol << 16));
1137 sum = ip_incremental_checksum (sum, &ip4_syn_ack.tcp, sizeof (ip4_syn_ack.tcp));
1138 ip4_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1140 sum = clib_host_to_net_u32 (sizeof (ip6_ack.tcp)) + ip6_ack.ip6.protocol;
1141 sum = ip_incremental_checksum (sum, &ip6_ack.tcp, sizeof (ip6_ack.tcp));
1142 ip6_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1144 sum = clib_host_to_net_u32 (sizeof (ip6_fin_ack.tcp)) + ip6_fin_ack.ip6.protocol;
1145 sum = ip_incremental_checksum (sum, &ip6_fin_ack.tcp, sizeof (ip6_fin_ack.tcp));
1146 ip6_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1148 sum = clib_host_to_net_u32 (sizeof (ip6_rst_ack.tcp)) + ip6_rst_ack.ip6.protocol;
1149 sum = ip_incremental_checksum (sum, &ip6_rst_ack.tcp, sizeof (ip6_rst_ack.tcp));
1150 ip6_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1152 sum = clib_host_to_net_u32 (sizeof (ip6_syn.tcp)) + ip6_syn.ip6.protocol;
1153 sum = ip_incremental_checksum (sum, &ip6_syn.tcp, sizeof (ip6_syn.tcp));
1154 ip6_syn.tcp.header.checksum = ~ ip_csum_fold (sum);
1156 sum = clib_host_to_net_u32 (sizeof (ip6_syn_ack.tcp)) + ip6_syn_ack.ip6.protocol;
1157 sum = ip_incremental_checksum (sum, &ip6_syn_ack.tcp, sizeof (ip6_syn_ack.tcp));
1158 ip6_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
1164 vlib_packet_template_init \
1166 &tm->ip4.packet_templates[t].vlib, \
1168 /* alloc chunk size */ VLIB_FRAME_SIZE, \
1170 tm->ip4.packet_templates[t].tcp_checksum_net_byte_order \
1171 = x.tcp.header.checksum; \
1172 tm->ip4.packet_templates[t].ip4_checksum_net_byte_order \
1176 _ (TCP_PACKET_TEMPLATE_SYN, ip4_syn, "ip4 tcp syn");
1177 _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip4_syn_ack, "ip4 tcp syn-ack");
1178 _ (TCP_PACKET_TEMPLATE_ACK, ip4_ack, "ip4 tcp ack");
1179 _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip4_fin_ack, "ip4 tcp fin-ack");
1180 _ (TCP_PACKET_TEMPLATE_RST_ACK, ip4_rst_ack, "ip4 tcp rst-ack");
1186 vlib_packet_template_init \
1188 &tm->ip6.packet_templates[t].vlib, \
1190 /* alloc chunk size */ VLIB_FRAME_SIZE, \
1192 tm->ip6.packet_templates[t].tcp_checksum_net_byte_order \
1193 = x.tcp.header.checksum; \
1194 tm->ip6.packet_templates[t].ip4_checksum_net_byte_order \
1198 _ (TCP_PACKET_TEMPLATE_SYN, ip6_syn, "ip6 tcp syn");
1199 _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip6_syn_ack, "ip6 tcp syn-ack");
1200 _ (TCP_PACKET_TEMPLATE_ACK, ip6_ack, "ip6 tcp ack");
1201 _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip6_fin_ack, "ip6 tcp fin-ack");
1202 _ (TCP_PACKET_TEMPLATE_RST_ACK, ip6_rst_ack, "ip6 tcp rst-ack");
1208 static char * tcp_error_strings[] = {
1209 #define _(sym,string) string,
1214 VLIB_REGISTER_NODE (ip4_tcp_lookup_node,static) = {
1215 .function = ip4_tcp_lookup,
1216 .name = "ip4-tcp-lookup",
1218 .vector_size = sizeof (u32),
1220 .n_next_nodes = TCP_LOOKUP_N_NEXT,
1222 [TCP_LOOKUP_NEXT_DROP] = "error-drop",
1223 [TCP_LOOKUP_NEXT_PUNT] = "error-punt",
1224 [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip4-tcp-listen",
1225 [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish",
1226 [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip4-tcp-connect",
1227 [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip4-tcp-established",
1230 .n_errors = TCP_N_ERROR,
1231 .error_strings = tcp_error_strings,
1234 VLIB_REGISTER_NODE (ip6_tcp_lookup_node,static) = {
1235 .function = ip6_tcp_lookup,
1236 .name = "ip6-tcp-lookup",
1238 .vector_size = sizeof (u32),
1240 .n_next_nodes = TCP_LOOKUP_N_NEXT,
1242 [TCP_LOOKUP_NEXT_DROP] = "error-drop",
1243 [TCP_LOOKUP_NEXT_PUNT] = "error-punt",
1244 [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip6-tcp-listen",
1245 [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish",
1246 [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip6-tcp-connect",
1247 [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip6-tcp-established",
1250 .n_errors = TCP_N_ERROR,
1251 .error_strings = tcp_error_strings,
1254 static_always_inline void
1255 tcp_options_decode_for_syn (tcp_main_t * tm, tcp_mini_connection_t * m, tcp_header_t * tcp)
1257 u8 * o = (void *) (tcp + 1);
1258 u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
1259 u8 * e = o + n_bytes;
1260 tcp_mini_connection_t * tmpl = &tm->option_decode_mini_connection_template;
1261 tcp_option_type_t t;
1263 u8 * option_decode[16];
1265 /* Initialize defaults. */
1266 option_decode[TCP_OPTION_MSS] = (u8 *) &tmpl->max_segment_size;
1267 option_decode[TCP_OPTION_WINDOW_SCALE] = (u8 *) &tmpl->window_scale;
1268 option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &tmpl->time_stamps.his_net_byte_order;
1275 i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t; \
1276 option_decode[i] = o + 2; \
1277 /* Skip nop; don't skip end; else length from packet. */ \
1278 l = t < 2 ? t : o[1]; \
1280 o = p < e ? p : o; \
1284 /* Fast path: NOP NOP TIMESTAMP. */
1285 if (o >= e) goto done;
1287 if (o >= e) goto done;
1295 m->max_segment_size =
1296 clib_net_to_host_u16 (*(u16 *) option_decode[TCP_OPTION_MSS]);
1297 m->window_scale = *option_decode[TCP_OPTION_WINDOW_SCALE];
1298 m->time_stamps.his_net_byte_order = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0];
1301 static_always_inline u32
1302 tcp_options_decode_for_ack (tcp_main_t * tm, tcp_header_t * tcp,
1303 u32 * his_time_stamp)
1305 u8 * o = (void *) (tcp + 1);
1306 u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
1307 u8 * e = o + n_bytes;
1308 tcp_option_type_t t;
1310 u8 * option_decode[16];
1311 u32 default_time_stamps[2];
1313 /* Initialize defaults. */
1314 default_time_stamps[0] = default_time_stamps[1] = 0;
1315 option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &default_time_stamps;
1322 i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t; \
1323 option_decode[i] = o + 2; \
1324 /* Skip nop; don't skip end; else length from packet. */ \
1325 l = t < 2 ? t : o[1]; \
1327 o = p < e ? p : o; \
1331 /* Fast path: NOP NOP TIMESTAMP. */
1332 if (o >= e) goto done;
1334 if (o >= e) goto done;
1342 his_time_stamp[0] = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0];
1344 return clib_net_to_host_u32 (((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[1]);
1348 tcp_options_decode_init (tcp_main_t * tm)
1350 tcp_mini_connection_t * m = &tm->option_decode_mini_connection_template;
1352 memset (m, 0, sizeof (m[0]));
1353 m->max_segment_size = clib_host_to_net_u16 (576 - 40);
1354 m->window_scale = 0;
1355 m->time_stamps.his_net_byte_order = 0;
1358 /* Initialize target buffer as "related" to given buffer. */
1360 vlib_buffer_copy_shared_fields (vlib_main_t * vm, vlib_buffer_t * b, u32 bi_target)
1362 vlib_buffer_t * b_target = vlib_get_buffer (vm, bi_target);
1363 vnet_buffer (b_target)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX];
1364 b_target->trace_index = b->trace_index;
1365 b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED;
1369 TCP_LISTEN_NEXT_DROP,
1370 TCP_LISTEN_NEXT_REPLY,
1372 } tcp_listen_next_t;
1374 static_always_inline uword
1375 ip46_tcp_listen (vlib_main_t * vm,
1376 vlib_node_runtime_t * node,
1377 vlib_frame_t * frame,
1380 tcp_main_t * tm = &tcp_main;
1381 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1382 uword n_packets = frame->n_vectors;
1383 u32 * from, * to_reply, * to_drop, * random_ack_numbers;
1384 u32 n_left_from, n_left_to_reply, n_left_to_drop, mini_now, timestamp_now;
1385 u16 * fid, * fragment_ids;
1386 vlib_node_runtime_t * error_node;
1388 error_node = vlib_node_get_runtime
1389 (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
1391 from = vlib_frame_vector_args (frame);
1392 n_left_from = n_packets;
1393 mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection);
1394 timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
1396 random_ack_numbers = clib_random_buffer_get_data (&vm->random_buffer,
1397 n_packets * sizeof (random_ack_numbers[0]));
1398 /* Get random fragment IDs for replies. */
1399 fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
1400 n_packets * sizeof (fragment_ids[0]));
1402 while (n_left_from > 0)
1404 vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY,
1405 to_reply, n_left_to_reply);
1406 vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_DROP,
1407 to_drop, n_left_to_drop);
1409 while (n_left_from > 0 && n_left_to_reply > 0 && n_left_to_drop > 0)
1412 ip6_header_t * ip60;
1413 ip4_header_t * ip40;
1414 tcp_header_t * tcp0;
1415 tcp_mini_connection_t * min0;
1416 tcp_syn_packet_t * tcp_reply0;
1418 u32 bi0, bi_reply0, imin0, my_seq_net0, his_seq_host0, his_seq_net0;
1421 bi0 = to_drop[0] = from[0];
1426 n_left_to_drop -= 1;
1428 p0 = vlib_get_buffer (vm, bi0);
1430 p0->error = error_node->errors[TCP_ERROR_LISTEN_RESPONSES];
1432 imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index;
1437 ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
1439 ip60 = vlib_buffer_get_current (p0);
1440 tcp0 = ip6_next_header (ip60);
1442 mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4);
1444 ip6_tcp_udp_address_x4_set_from_headers (&mina0->address_x4,
1446 mina0->time_stamps[i0] = mini_now;
1450 ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
1452 ip40 = vlib_buffer_get_current (p0);
1453 tcp0 = ip4_next_header (ip40);
1455 mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4);
1457 ip4_tcp_udp_address_x4_set_from_headers (&mina0->address_x4,
1459 mina0->time_stamps[i0] = mini_now;
1462 min0 = vec_elt_at_index (tm46->mini_connections, imin0);
1464 min0->state = TCP_CONNECTION_STATE_listen_ack_wait;
1465 min0->time_stamps.ours_host_byte_order = timestamp_now;
1466 tcp_options_decode_for_syn (tm, min0, tcp0);
1468 my_seq_net0 = *random_ack_numbers++;
1469 his_seq_host0 = 1 + clib_net_to_host_u32 (tcp0->seq_number);
1471 min0->sequence_numbers.ours = 1 + clib_net_to_host_u32 (my_seq_net0);
1472 min0->sequence_numbers.his = his_seq_host0;
1476 ip6_tcp_syn_packet_t * r0;
1479 r0 = vlib_packet_template_get_packet
1481 &tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib,
1483 tcp_reply0 = &r0->tcp;
1485 tcp_sum0 = (tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
1486 .tcp_checksum_net_byte_order);
1488 for (i = 0; i < ARRAY_LEN (ip60->dst_address.as_uword); i++)
1490 tmp0 = r0->ip6.src_address.as_uword[i] = ip60->dst_address.as_uword[i];
1491 tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
1493 tmp0 = r0->ip6.dst_address.as_uword[i] = ip60->src_address.as_uword[i];
1494 tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
1499 ip4_tcp_syn_packet_t * r0;
1503 r0 = vlib_packet_template_get_packet
1505 &tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib,
1507 tcp_reply0 = &r0->tcp;
1509 tcp_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
1510 .tcp_checksum_net_byte_order);
1511 ip_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
1512 .ip4_checksum_net_byte_order);
1514 src0 = r0->ip4.src_address.as_u32 = ip40->dst_address.as_u32;
1515 dst0 = r0->ip4.dst_address.as_u32 = ip40->src_address.as_u32;
1517 ip_sum0 = ip_csum_add_even (ip_sum0, src0);
1518 tcp_sum0 = ip_csum_add_even (tcp_sum0, src0);
1520 ip_sum0 = ip_csum_add_even (ip_sum0, dst0);
1521 tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0);
1523 r0->ip4.checksum = ip_csum_fold (ip_sum0);
1525 ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4));
1528 tcp_reply0->header.ports.src = tcp0->ports.dst;
1529 tcp_reply0->header.ports.dst = tcp0->ports.src;
1530 tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->header.ports.src_and_dst);
1532 tcp_reply0->header.seq_number = my_seq_net0;
1533 tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0);
1535 his_seq_net0 = clib_host_to_net_u32 (his_seq_host0);
1536 tcp_reply0->header.ack_number = his_seq_net0;
1537 tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0);
1540 ip_adjacency_t * adj0 = ip_get_adjacency (&ip4_main.lookup_main, vnet_buffer (p0)->ip.adj_index[VLIB_RX]);
1542 (adj0->rewrite_header.max_l3_packet_bytes
1543 - (is_ip6 ? sizeof (ip60[0]) : sizeof (ip40[0]))
1544 - sizeof (tcp0[0]));
1546 my_mss = clib_min (my_mss, min0->max_segment_size);
1547 min0->max_segment_size = my_mss;
1549 tcp_reply0->options.mss.value = clib_host_to_net_u16 (my_mss);
1550 tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.mss.value);
1553 tcp_reply0->options.time_stamp.my_time_stamp = clib_host_to_net_u32 (timestamp_now);
1554 tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.my_time_stamp);
1556 tcp_reply0->options.time_stamp.his_time_stamp = min0->time_stamps.his_net_byte_order;
1557 tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.his_time_stamp);
1559 tcp_reply0->header.checksum = ip_csum_fold (tcp_sum0);
1561 vlib_buffer_copy_shared_fields (vm, p0, bi_reply0);
1563 to_reply[0] = bi_reply0;
1564 n_left_to_reply -= 1;
1568 vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY, n_left_to_reply);
1569 vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_DROP, n_left_to_drop);
1572 if (node->flags & VLIB_NODE_FLAG_TRACE)
1575 return frame->n_vectors;
1579 ip4_tcp_listen (vlib_main_t * vm,
1580 vlib_node_runtime_t * node,
1581 vlib_frame_t * frame)
1582 { return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 0); }
1585 ip6_tcp_listen (vlib_main_t * vm,
1586 vlib_node_runtime_t * node,
1587 vlib_frame_t * frame)
1588 { return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 1); }
1590 VLIB_REGISTER_NODE (ip4_tcp_listen_node,static) = {
1591 .function = ip4_tcp_listen,
1592 .name = "ip4-tcp-listen",
1594 .vector_size = sizeof (u32),
1596 .n_next_nodes = TCP_LISTEN_N_NEXT,
1598 [TCP_LISTEN_NEXT_DROP] = "error-drop",
1599 [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
1603 VLIB_REGISTER_NODE (ip6_tcp_listen_node,static) = {
1604 .function = ip6_tcp_listen,
1605 .name = "ip6-tcp-listen",
1607 .vector_size = sizeof (u32),
1609 .n_next_nodes = TCP_LISTEN_N_NEXT,
1611 [TCP_LISTEN_NEXT_DROP] = "error-drop",
1612 [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
1617 TCP_CONNECT_NEXT_DROP,
1618 TCP_CONNECT_NEXT_REPLY,
1620 } tcp_connect_next_t;
1622 static_always_inline uword
1623 ip46_tcp_connect (vlib_main_t * vm,
1624 vlib_node_runtime_t * node,
1625 vlib_frame_t * frame,
1628 tcp_main_t * tm = &tcp_main;
1629 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1630 uword n_packets = frame->n_vectors;
1631 u32 * from, * to_next;
1632 u32 n_left_from, n_left_to_next, next;
1633 vlib_node_runtime_t * error_node;
1636 clib_warning ("%p", tm46);
1638 error_node = vlib_node_get_runtime
1639 (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
1641 from = vlib_frame_vector_args (frame);
1642 n_left_from = n_packets;
1643 next = node->cached_next_index;
1645 while (n_left_from > 0)
1647 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1649 while (n_left_from > 0 && n_left_to_next > 0)
1652 ip6_header_t * ip60;
1653 ip4_header_t * ip40;
1654 tcp_header_t * tcp0;
1658 bi0 = to_next[0] = from[0];
1663 n_left_to_next -= 1;
1665 p0 = vlib_get_buffer (vm, bi0);
1669 ip60 = vlib_buffer_get_current (p0);
1670 tcp0 = ip6_next_header (ip60);
1674 ip40 = vlib_buffer_get_current (p0);
1675 tcp0 = ip4_next_header (ip40);
1681 p0->error = error_node->errors[error0];
1683 if (PREDICT_FALSE (next0 != next))
1686 n_left_to_next += 1;
1688 vlib_put_next_frame (vm, node, next, n_left_to_next);
1691 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1694 n_left_to_next -= 1;
1698 vlib_put_next_frame (vm, node, next, n_left_to_next);
1701 if (node->flags & VLIB_NODE_FLAG_TRACE)
1704 return frame->n_vectors;
1708 ip4_tcp_connect (vlib_main_t * vm,
1709 vlib_node_runtime_t * node,
1710 vlib_frame_t * frame)
1711 { return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 0); }
1714 ip6_tcp_connect (vlib_main_t * vm,
1715 vlib_node_runtime_t * node,
1716 vlib_frame_t * frame)
1717 { return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 1); }
1719 VLIB_REGISTER_NODE (ip4_tcp_connect_node,static) = {
1720 .function = ip4_tcp_connect,
1721 .name = "ip4-tcp-connect",
1723 .vector_size = sizeof (u32),
1725 .n_next_nodes = TCP_CONNECT_N_NEXT,
1727 [TCP_CONNECT_NEXT_DROP] = "error-drop",
1728 [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
1732 VLIB_REGISTER_NODE (ip6_tcp_connect_node,static) = {
1733 .function = ip6_tcp_connect,
1734 .name = "ip6-tcp-connect",
1736 .vector_size = sizeof (u32),
1738 .n_next_nodes = TCP_CONNECT_N_NEXT,
1740 [TCP_CONNECT_NEXT_DROP] = "error-drop",
1741 [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
1746 TCP_ESTABLISH_NEXT_DROP,
1747 TCP_ESTABLISH_NEXT_ESTABLISHED,
1748 TCP_ESTABLISH_N_NEXT,
1749 } tcp_establish_next_t;
1751 static_always_inline uword
1752 ip46_tcp_establish (vlib_main_t * vm,
1753 vlib_node_runtime_t * node,
1754 vlib_frame_t * frame,
1757 tcp_main_t * tm = &tcp_main;
1758 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1759 uword n_packets = frame->n_vectors;
1760 u32 * from, * to_next;
1761 u32 n_left_from, n_left_to_next, next, mini_long_long_ago, timestamp_now;
1762 vlib_node_runtime_t * error_node;
1764 error_node = vlib_node_get_runtime
1765 (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
1767 from = vlib_frame_vector_args (frame);
1768 n_left_from = n_packets;
1769 next = node->cached_next_index;
1770 mini_long_long_ago =
1771 (tcp_time_now (tm, TCP_TIMER_mini_connection)
1772 + (1 << (BITS (mini_long_long_ago) - 1)));
1773 timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
1775 while (n_left_from > 0)
1777 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1779 while (n_left_from > 0 && n_left_to_next > 0)
1782 ip6_header_t * ip60;
1783 ip4_header_t * ip40;
1784 tcp_header_t * tcp0;
1785 tcp_mini_connection_t * min0;
1786 tcp_connection_t * est0;
1787 tcp_listener_t * l0;
1788 u32 bi0, imin0, iest0;
1789 u8 error0, next0, i0, e0;
1791 bi0 = to_next[0] = from[0];
1796 n_left_to_next -= 1;
1798 p0 = vlib_get_buffer (vm, bi0);
1800 imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index;
1801 iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index;
1806 min0 = vec_elt_at_index (tm46->mini_connections, imin0);
1807 if (PREDICT_FALSE (min0->state == TCP_CONNECTION_STATE_unused))
1808 goto already_established0;
1809 min0->state = TCP_CONNECTION_STATE_unused;
1813 ip60 = vlib_buffer_get_current (p0);
1814 tcp0 = ip6_next_header (ip60);
1818 ip40 = vlib_buffer_get_current (p0);
1819 tcp0 = ip4_next_header (ip40);
1822 if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number)
1823 != min0->sequence_numbers.his))
1824 goto unexpected_seq_number0;
1825 if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number)
1826 != min0->sequence_numbers.ours))
1827 goto unexpected_ack_number0;
1831 ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
1832 ip6_tcp_udp_address_x4_t * esta0;
1834 mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4);
1835 esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0 / 4);
1837 ip6_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0);
1839 mina0->time_stamps[i0] = mini_long_long_ago;
1843 ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
1844 ip4_tcp_udp_address_x4_t * esta0;
1846 mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4);
1847 esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0 / 4);
1849 ip4_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0);
1851 mina0->time_stamps[i0] = mini_long_long_ago;
1854 est0 = vec_elt_at_index (tm46->established_connections, iest0);
1856 est0->sequence_numbers = min0->sequence_numbers;
1857 est0->max_segment_size = (min0->max_segment_size
1858 - STRUCT_SIZE_OF (tcp_ack_packet_t, options));
1859 est0->his_window_scale = min0->window_scale;
1860 est0->his_window = clib_net_to_host_u16 (tcp0->window);
1861 est0->time_stamps.ours_host_byte_order = min0->time_stamps.ours_host_byte_order;
1863 /* Compute first measurement of round trip time. */
1865 u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order);
1866 f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp];
1867 est0->round_trip_time_stats.sum = dt;
1868 est0->round_trip_time_stats.sum2 = dt*dt;
1869 est0->round_trip_time_stats.count = 1;
1872 ELOG_TYPE_DECLARE (e) = {
1873 .format = "establish ack rtt: %.4e",
1874 .format_args = "f8",
1876 struct { f64 dt; } * ed;
1877 ed = ELOG_DATA (&vm->elog_main, e);
1882 est0->my_window_scale = 7;
1883 est0->my_window = 256;
1885 l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index);
1886 vec_add1 (l0->event_connections[is_ip6], tcp_connection_handle_set (iest0, is_ip6));
1888 next0 = TCP_ESTABLISH_NEXT_DROP;
1889 error0 = TCP_ERROR_LISTENS_ESTABLISHED;
1892 p0->error = error_node->errors[error0];
1893 if (PREDICT_FALSE (next0 != next))
1896 n_left_to_next += 1;
1898 vlib_put_next_frame (vm, node, next, n_left_to_next);
1901 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
1904 n_left_to_next -= 1;
1908 already_established0:
1909 next0 = TCP_ESTABLISH_NEXT_ESTABLISHED;
1910 error0 = TCP_ERROR_NONE;
1913 unexpected_seq_number0:
1914 next0 = TCP_ESTABLISH_NEXT_DROP;
1915 error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER;
1918 unexpected_ack_number0:
1919 next0 = TCP_ESTABLISH_NEXT_DROP;
1920 error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER;
1924 vlib_put_next_frame (vm, node, next, n_left_to_next);
1927 if (node->flags & VLIB_NODE_FLAG_TRACE)
1930 /* Inform listeners of new connections. */
1934 pool_foreach (l, tm->listener_pool, ({
1935 if ((n = vec_len (l->event_connections[is_ip6])) > 0)
1937 if (l->event_function)
1938 l->event_function (l->event_connections[is_ip6],
1939 TCP_EVENT_connection_established);
1940 if (tm->n_established_connections[is_ip6] == 0)
1941 vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_POLLING);
1942 tm->n_established_connections[is_ip6] += n;
1943 _vec_len (l->event_connections[is_ip6]) = 0;
1948 return frame->n_vectors;
1952 ip4_tcp_establish (vlib_main_t * vm,
1953 vlib_node_runtime_t * node,
1954 vlib_frame_t * frame)
1955 { return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 0); }
1958 ip6_tcp_establish (vlib_main_t * vm,
1959 vlib_node_runtime_t * node,
1960 vlib_frame_t * frame)
1961 { return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 1); }
1963 VLIB_REGISTER_NODE (ip4_tcp_establish_node,static) = {
1964 .function = ip4_tcp_establish,
1965 .name = "ip4-tcp-establish",
1967 .vector_size = sizeof (u32),
1969 .n_next_nodes = TCP_ESTABLISH_N_NEXT,
1971 [TCP_ESTABLISH_NEXT_DROP] = "error-drop",
1972 [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip4-tcp-established",
1976 VLIB_REGISTER_NODE (ip6_tcp_establish_node,static) = {
1977 .function = ip6_tcp_establish,
1978 .name = "ip6-tcp-establish",
1980 .vector_size = sizeof (u32),
1982 .n_next_nodes = TCP_ESTABLISH_N_NEXT,
1984 [TCP_ESTABLISH_NEXT_DROP] = "error-drop",
1985 [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip6-tcp-established",
1989 static_always_inline void
1990 tcp_free_connection_x1 (vlib_main_t * vm, tcp_main_t * tm,
1991 tcp_ip_4_or_6_t is_ip6,
1994 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
1995 tcp_connection_t * est0;
1996 u32 iest_div0, iest_mod0;
1998 iest_div0 = iest0 / 4;
1999 iest_mod0 = iest0 % 4;
2003 ip6_tcp_udp_address_x4_t * esta0;
2004 esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0);
2005 ip6_tcp_udp_address_x4_invalidate (esta0, iest_mod0);
2009 ip4_tcp_udp_address_x4_t * esta0;
2010 esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0);
2011 ip4_tcp_udp_address_x4_invalidate (esta0, iest_mod0);
2014 est0 = vec_elt_at_index (tm46->established_connections, iest0);
2017 static_always_inline void
2018 tcp_free_connection_x2 (vlib_main_t * vm, tcp_main_t * tm,
2019 tcp_ip_4_or_6_t is_ip6,
2020 u32 iest0, u32 iest1)
2022 tcp_free_connection_x1 (vm, tm, is_ip6, iest0);
2023 tcp_free_connection_x1 (vm, tm, is_ip6, iest1);
2026 static_always_inline uword
2027 ip46_tcp_output (vlib_main_t * vm,
2028 vlib_node_runtime_t * node,
2029 vlib_frame_t * frame,
2030 tcp_ip_4_or_6_t is_ip6)
2032 tcp_main_t * tm = &tcp_main;
2033 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2034 u32 * cis, * to_next, n_left_to_next, n_connections_left;
2035 u32 timestamp_now_host_byte_order, timestamp_now_net_byte_order;
2036 vlib_node_runtime_t * error_node;
2040 /* Inform listeners of new connections. */
2043 pool_foreach (l, tm->listener_pool, ({
2044 if (vec_len (l->eof_connections) > 0)
2046 if (l->event_function)
2047 l->event_function (l->eof_connections[is_ip6], TCP_EVENT_fin_received);
2051 for (i = 0; i < vec_len (l->eof_connections[is_ip6]); i++)
2053 tcp_connection_t * c = tcp_get_connection (l->eof_connections[is_ip6][i]);
2054 c->flags |= TCP_CONNECTION_FLAG_application_requested_close;
2057 _vec_len (l->eof_connections[is_ip6]) = 0;
2060 if (vec_len (l->close_connections[is_ip6]) > 0)
2065 if (l->event_function)
2066 l->event_function (l->close_connections[is_ip6], TCP_EVENT_connection_closed);
2068 cis = l->close_connections[is_ip6];
2069 n_left = vec_len (cis);
2070 ASSERT (tm->n_established_connections[is_ip6] >= n_left);
2071 tm->n_established_connections[is_ip6] -= n_left;
2072 if (tm->n_established_connections[is_ip6] == 0)
2073 vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_DISABLED);
2076 tcp_free_connection_x2 (vm, tm, is_ip6, cis[0], cis[1]);
2083 tcp_free_connection_x1 (vm, tm, is_ip6, cis[0]);
2088 _vec_len (l->close_connections[is_ip6]) = 0;
2094 cis = tm46->connections_pending_acks;
2095 n_connections_left = vec_len (cis);
2096 if (n_connections_left == 0)
2098 _vec_len (tm46->connections_pending_acks) = 0;
2099 error_node = vlib_node_get_runtime
2100 (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
2102 timestamp_now_host_byte_order = tcp_time_now (tm, TCP_TIMER_timestamp);
2103 timestamp_now_net_byte_order = clib_host_to_net_u32 (timestamp_now_host_byte_order);
2105 while (n_connections_left > 0)
2107 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2109 while (n_connections_left > 0 && n_left_to_next > 0)
2111 tcp_connection_t * est0;
2112 tcp_ack_packet_t * tcp0;
2113 tcp_udp_ports_t * ports0;
2115 tcp_packet_template_type_t template_type0;
2116 u32 bi0, iest0, iest_div0, iest_mod0, my_seq_net0, his_seq_net0;
2121 iest_div0 = iest0 / 4;
2122 iest_mod0 = iest0 % 4;
2123 est0 = vec_elt_at_index (tm46->established_connections, iest0);
2125 /* Send a FIN along with our ACK if application closed connection. */
2127 u8 is_closed0, fin_sent0;
2129 is_closed0 = (est0->flags & TCP_CONNECTION_FLAG_application_requested_close) != 0;
2130 fin_sent0 = (est0->flags & TCP_CONNECTION_FLAG_fin_sent) != 0;
2132 is_fin0 = is_closed0 && ! fin_sent0;
2135 ? TCP_PACKET_TEMPLATE_FIN_ACK
2136 : TCP_PACKET_TEMPLATE_ACK);
2137 est0->flags |= is_closed0 << LOG2_TCP_CONNECTION_FLAG_fin_sent;
2142 ip6_tcp_ack_packet_t * r0;
2143 ip6_tcp_udp_address_x4_t * esta0;
2146 esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0);
2147 r0 = vlib_packet_template_get_packet
2148 (vm, &tm->ip6.packet_templates[template_type0].vlib, &bi0);
2151 tcp_sum0 = (tm->ip6.packet_templates[template_type0]
2152 .tcp_checksum_net_byte_order);
2154 for (i = 0; i < ARRAY_LEN (r0->ip6.src_address.as_u32); i++)
2156 tmp0 = r0->ip6.src_address.as_u32[i] = esta0->dst.as_u32[i][iest_mod0];
2157 tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
2159 tmp0 = r0->ip6.dst_address.as_u32[i] = esta0->src.as_u32[i][iest_mod0];
2160 tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
2163 ports0 = &esta0->ports.as_ports[iest_mod0];
2167 ip4_tcp_ack_packet_t * r0;
2168 ip4_tcp_udp_address_x4_t * esta0;
2172 esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0);
2173 r0 = vlib_packet_template_get_packet
2174 (vm, &tm->ip4.packet_templates[template_type0].vlib, &bi0);
2177 ip_sum0 = (tm->ip4.packet_templates[template_type0]
2178 .ip4_checksum_net_byte_order);
2179 tcp_sum0 = (tm->ip4.packet_templates[template_type0]
2180 .tcp_checksum_net_byte_order);
2182 src0 = r0->ip4.src_address.as_u32 = esta0->dst.as_ip4_address[iest_mod0].as_u32;
2183 dst0 = r0->ip4.dst_address.as_u32 = esta0->src.as_ip4_address[iest_mod0].as_u32;
2185 ip_sum0 = ip_csum_add_even (ip_sum0, src0);
2186 tcp_sum0 = ip_csum_add_even (tcp_sum0, src0);
2188 ip_sum0 = ip_csum_add_even (ip_sum0, dst0);
2189 tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0);
2191 r0->ip4.checksum = ip_csum_fold (ip_sum0);
2193 ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4));
2194 ports0 = &esta0->ports.as_ports[iest_mod0];
2197 tcp_sum0 = ip_csum_add_even (tcp_sum0, ports0->as_u32);
2198 tcp0->header.ports.src = ports0->dst;
2199 tcp0->header.ports.dst = ports0->src;
2201 my_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.ours);
2202 his_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.his);
2204 /* FIN accounts for 1 sequence number. */
2205 est0->sequence_numbers.ours += is_fin0;
2207 tcp0->header.seq_number = my_seq_net0;
2208 tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0);
2210 tcp0->header.ack_number = his_seq_net0;
2211 tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0);
2213 est0->time_stamps.ours_host_byte_order = timestamp_now_host_byte_order;
2214 tcp0->options.time_stamp.my_time_stamp = timestamp_now_net_byte_order;
2215 tcp_sum0 = ip_csum_add_even (tcp_sum0, timestamp_now_net_byte_order);
2217 tcp0->options.time_stamp.his_time_stamp = est0->time_stamps.his_net_byte_order;
2218 tcp_sum0 = ip_csum_add_even (tcp_sum0, est0->time_stamps.his_net_byte_order);
2220 tcp0->header.checksum = ip_csum_fold (tcp_sum0);
2222 est0->flags &= ~TCP_CONNECTION_FLAG_ack_pending;
2226 n_left_to_next -= 1;
2227 n_connections_left -= 1;
2231 vlib_put_next_frame (vm, node, next, n_left_to_next);
2234 vlib_error_count (vm, error_node->node_index, TCP_ERROR_ACKS_SENT, n_acks);
2240 ip4_tcp_output (vlib_main_t * vm,
2241 vlib_node_runtime_t * node,
2242 vlib_frame_t * frame)
2243 { return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 0); }
2246 ip6_tcp_output (vlib_main_t * vm,
2247 vlib_node_runtime_t * node,
2248 vlib_frame_t * frame)
2249 { return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 1); }
2251 VLIB_REGISTER_NODE (ip4_tcp_output_node,static) = {
2252 .function = ip4_tcp_output,
2253 .name = "ip4-tcp-output",
2254 .state = VLIB_NODE_STATE_DISABLED,
2255 .type = VLIB_NODE_TYPE_INPUT,
2257 .vector_size = sizeof (u32),
2261 [0] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
2265 VLIB_REGISTER_NODE (ip6_tcp_output_node,static) = {
2266 .function = ip6_tcp_output,
2267 .name = "ip6-tcp-output",
2268 .state = VLIB_NODE_STATE_DISABLED,
2269 .type = VLIB_NODE_TYPE_INPUT,
2271 .vector_size = sizeof (u32),
2275 [0] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
2279 static_always_inline void
2280 tcp_ack (tcp_main_t * tm, tcp_connection_t * c, u32 n_bytes)
2282 ASSERT (n_bytes == 0);
2286 TCP_ESTABLISHED_NEXT_DROP,
2287 TCP_ESTABLISHED_N_NEXT,
2288 } tcp_established_next_t;
2290 static_always_inline uword
2291 ip46_tcp_established (vlib_main_t * vm,
2292 vlib_node_runtime_t * node,
2293 vlib_frame_t * frame,
2294 tcp_ip_4_or_6_t is_ip6)
2296 tcp_main_t * tm = &tcp_main;
2297 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2298 uword n_packets = frame->n_vectors;
2299 u32 * from, * to_next;
2300 u32 n_left_from, n_left_to_next, next, timestamp_now;
2301 vlib_node_runtime_t * error_node;
2303 error_node = vlib_node_get_runtime
2304 (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
2306 from = vlib_frame_vector_args (frame);
2307 n_left_from = n_packets;
2308 next = node->cached_next_index;
2309 timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
2311 while (n_left_from > 0)
2313 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2315 while (n_left_from > 0 && n_left_to_next > 0)
2318 ip6_header_t * ip60;
2319 ip4_header_t * ip40;
2320 tcp_header_t * tcp0;
2321 tcp_connection_t * est0;
2322 tcp_listener_t * l0;
2323 u32 bi0, iest0, n_data_bytes0, his_ack_host0, n_ack0;
2324 u8 error0, next0, n_advance_bytes0, is_fin0, send_ack0;
2326 bi0 = to_next[0] = from[0];
2331 n_left_to_next -= 1;
2333 p0 = vlib_get_buffer (vm, bi0);
2337 ip60 = vlib_buffer_get_current (p0);
2338 tcp0 = ip6_next_header (ip60);
2339 ASSERT (ip60->protocol == IP_PROTOCOL_TCP);
2340 n_advance_bytes0 = tcp_header_bytes (tcp0);
2341 n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) - n_advance_bytes0;
2342 n_advance_bytes0 += sizeof (ip60[0]);
2346 ip40 = vlib_buffer_get_current (p0);
2347 tcp0 = ip4_next_header (ip40);
2348 n_advance_bytes0 = (ip4_header_bytes (ip40)
2349 + tcp_header_bytes (tcp0));
2350 n_data_bytes0 = clib_net_to_host_u16 (ip40->length) - n_advance_bytes0;
2353 iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index;
2354 est0 = vec_elt_at_index (tm46->established_connections, iest0);
2356 error0 = TCP_ERROR_NO_DATA;
2357 next0 = TCP_ESTABLISHED_NEXT_DROP;
2359 if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number)
2360 != est0->sequence_numbers.his))
2361 goto unexpected_seq_number0;
2362 if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number) - est0->sequence_numbers.ours
2363 > est0->n_tx_unacked_bytes))
2364 goto unexpected_ack_number0;
2366 is_fin0 = (tcp0->flags & TCP_FLAG_FIN) != 0;
2368 if (PREDICT_FALSE ((est0->flags & TCP_CONNECTION_FLAG_fin_received)
2369 && (is_fin0 || n_data_bytes0 > 0)))
2370 goto already_received_fin0;
2372 /* Update window. */
2373 est0->his_window = clib_net_to_host_u16 (tcp0->window);
2375 /* Update his sequence number to account for data he's just sent. */
2376 est0->sequence_numbers.his += n_data_bytes0 + is_fin0;
2378 his_ack_host0 = clib_net_to_host_u32 (tcp0->ack_number);
2379 n_ack0 = his_ack_host0 - est0->sequence_numbers.ours;
2380 tcp_ack (tm, est0, n_ack0);
2381 est0->sequence_numbers.ours = his_ack_host0;
2384 u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order);
2385 if (t != est0->time_stamps.ours_host_byte_order)
2387 f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp];
2388 est0->round_trip_time_stats.sum += dt;
2389 est0->round_trip_time_stats.sum2 += dt*dt;
2390 est0->round_trip_time_stats.count += 1;
2391 est0->time_stamps.ours_host_byte_order = t;
2394 ELOG_TYPE_DECLARE (e) = {
2395 .format = "ack rtt: %.4e",
2396 .format_args = "f8",
2398 struct { f64 dt; } * ed;
2399 ed = ELOG_DATA (&vm->elog_main, e);
2405 send_ack0 = ((est0->flags & TCP_CONNECTION_FLAG_ack_pending) == 0
2406 && (n_data_bytes0 > 0 || is_fin0));
2407 vec_add1 (tm46->connections_pending_acks, vnet_buffer (p0)->ip.tcp.established_connection_index);
2408 _vec_len (tm46->connections_pending_acks) -= ! send_ack0;
2409 est0->flags |= send_ack0 << LOG2_TCP_CONNECTION_FLAG_ack_pending;
2411 est0->flags |= is_fin0 << LOG2_TCP_CONNECTION_FLAG_fin_received;
2413 l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index);
2416 u32 ch0 = tcp_connection_handle_set (iest0, is_ip6);
2418 vec_add1 (l0->eof_connections[is_ip6], ch0);
2419 _vec_len (l0->eof_connections[is_ip6]) -= ! is_fin0;
2421 vec_add1 (l0->close_connections[is_ip6], ch0);
2422 _vec_len (l0->close_connections[is_ip6]) -= !(est0->flags & TCP_CONNECTION_FLAG_fin_sent);
2425 next0 = n_data_bytes0 > 0 ? l0->next_index : next0;
2427 vlib_buffer_advance (p0, n_advance_bytes0);
2430 p0->error = error_node->errors[error0];
2431 if (PREDICT_FALSE (next0 != next))
2434 n_left_to_next += 1;
2436 vlib_put_next_frame (vm, node, next, n_left_to_next);
2439 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2442 n_left_to_next -= 1;
2446 unexpected_seq_number0:
2447 next0 = TCP_ESTABLISHED_NEXT_DROP;
2448 error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER;
2451 unexpected_ack_number0:
2452 next0 = TCP_ESTABLISHED_NEXT_DROP;
2453 error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER;
2456 already_received_fin0:
2457 next0 = TCP_ESTABLISHED_NEXT_DROP;
2458 error0 = TCP_ERROR_SEGMENT_AFTER_FIN;
2462 vlib_put_next_frame (vm, node, next, n_left_to_next);
2465 if (node->flags & VLIB_NODE_FLAG_TRACE)
2468 return frame->n_vectors;
2472 ip4_tcp_established (vlib_main_t * vm,
2473 vlib_node_runtime_t * node,
2474 vlib_frame_t * frame)
2475 { return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 0); }
2478 ip6_tcp_established (vlib_main_t * vm,
2479 vlib_node_runtime_t * node,
2480 vlib_frame_t * frame)
2481 { return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 1); }
2483 VLIB_REGISTER_NODE (ip4_tcp_established_node,static) = {
2484 .function = ip4_tcp_established,
2485 .name = "ip4-tcp-established",
2487 .vector_size = sizeof (u32),
2489 .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
2491 [TCP_ESTABLISHED_NEXT_DROP] = "error-drop",
2495 VLIB_REGISTER_NODE (ip6_tcp_established_node,static) = {
2496 .function = ip6_tcp_established,
2497 .name = "ip6-tcp-established",
2499 .vector_size = sizeof (u32),
2501 .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
2503 [TCP_ESTABLISHED_NEXT_DROP] = "error-drop",
2508 tcp_register_listener (vlib_main_t * vm,
2509 tcp_listener_registration_t * r)
2511 tcp_main_t * tm = &tcp_main;
2515 clib_error_t * error;
2517 if ((error = vlib_call_init_function (vm, tcp_udp_lookup_init)))
2518 clib_error_report (error);
2521 pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES);
2523 memset (l, 0, sizeof (l[0]));
2525 l->dst_port = r->port;
2526 l->next_index = vlib_node_add_next (vm, ip4_tcp_established_node.index, r->data_node_index);
2527 l->valid_local_adjacency_bitmap = 0;
2528 l->flags = r->flags & (TCP_LISTENER_IP4 | TCP_LISTENER_IP6);
2530 tm->listener_index_by_dst_port[clib_host_to_net_u16 (l->dst_port)] = l - tm->listener_pool;
2532 return l - tm->listener_pool;
2536 tcp_udp_lookup_ip4_add_del_interface_address (ip4_main_t * im,
2539 ip4_address_t * address,
2541 u32 if_address_index,
2544 tcp_main_t * tm = &tcp_main;
2546 tm->ip4.default_valid_local_adjacency_bitmap
2547 = clib_bitmap_set (tm->ip4.default_valid_local_adjacency_bitmap,
2553 tcp_udp_lookup_ip6_add_del_interface_address (ip6_main_t * im,
2556 ip6_address_t * address,
2558 u32 if_address_index,
2561 tcp_main_t * tm = &tcp_main;
2563 tm->ip6.default_valid_local_adjacency_bitmap
2564 = clib_bitmap_set (tm->ip6.default_valid_local_adjacency_bitmap,
2569 static clib_error_t *
2570 tcp_udp_lookup_init (vlib_main_t * vm)
2572 tcp_main_t * tm = &tcp_main;
2573 ip4_main_t * im4 = &ip4_main;
2574 ip6_main_t * im6 = &ip6_main;
2575 clib_error_t * error;
2577 if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
2579 if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
2582 tcp_time_init (vm, tm);
2585 ip4_add_del_interface_address_callback_t cb;
2587 cb.function = tcp_udp_lookup_ip4_add_del_interface_address;
2588 cb.function_opaque = 0;
2589 vec_add1 (im4->add_del_interface_address_callbacks, cb);
2593 ip6_add_del_interface_address_callback_t cb;
2595 cb.function = tcp_udp_lookup_ip6_add_del_interface_address;
2596 cb.function_opaque = 0;
2597 vec_add1 (im6->add_del_interface_address_callbacks, cb);
2600 tm->ip4.output_node_index = ip4_tcp_output_node.index;
2601 tm->ip6.output_node_index = ip6_tcp_output_node.index;
2603 tcp_lookup_init (vm, tm);
2604 tcp_options_decode_init (tm);
2606 tm->tx_buffer_free_list = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
2607 tm->tx_buffer_free_list_n_buffer_bytes = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
2612 VLIB_INIT_FUNCTION (tcp_udp_lookup_init);
2614 static u8 * format_tcp_time_stamp (u8 * s, va_list * va)
2616 tcp_timer_type_t type = va_arg (*va, tcp_timer_type_t);
2617 u32 value = va_arg (*va, u32);
2618 vlib_main_t * vm = vlib_get_main();
2619 tcp_main_t * tm = &tcp_main;
2623 now = clib_cpu_time_now ();
2624 dt = vm->clib_time.seconds_per_clock * (now - (value << tm->log2_clocks_per_tick[type]));
2625 return format (s, "%.4e sec", dt);
2628 static u8 * format_tcp_connection_state (u8 * s, va_list * va)
2630 tcp_connection_state_t st = va_arg (*va, tcp_connection_state_t);
2634 #define _(f) case TCP_CONNECTION_STATE_##f: t = #f; break;
2635 foreach_tcp_connection_state
2640 s = format (s, "%s", t);
2642 s = format (s, "unknown 0x%x", st);
2647 static u8 * format_tcp_ip_4_or_6 (u8 * s, va_list * va)
2649 tcp_ip_4_or_6_t is_ip6 = va_arg (*va, tcp_ip_4_or_6_t);
2650 return format (s, "%s", is_ip6 ? "ip6" : "ip4");
2653 static u8 * format_tcp_mini_connection (u8 * s, va_list * va)
2655 tcp_mini_connection_t * c = va_arg (*va, tcp_mini_connection_t *);
2657 s = format (s, "state %U, window scale %d, mss %d",
2658 format_tcp_connection_state, c->state,
2659 c->window_scale, c->max_segment_size);
2664 static u8 * format_ip4_tcp_mini_connection (u8 * s, va_list * va)
2666 u32 imin = va_arg (*va, u32);
2667 u32 imin_div, imin_mod;
2668 tcp_main_t * tm = &tcp_main;
2669 tcp_mini_connection_t * min;
2670 ip4_tcp_udp_address_x4_and_timestamps_t * mina;
2672 imin_div = imin / 4;
2673 imin_mod = imin % 4;
2675 mina = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin_div);
2677 s = format (s, "%U, age %U",
2678 format_ip4_tcp_udp_address_x4, &mina->address_x4, imin_div,
2679 format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]);
2681 min = vec_elt_at_index (tm->ip4.mini_connections, imin);
2683 s = format (s, "%U", format_tcp_mini_connection, min);
2688 static u8 * format_ip6_tcp_mini_connection (u8 * s, va_list * va)
2690 u32 imin = va_arg (*va, u32);
2691 u32 imin_div, imin_mod;
2692 tcp_main_t * tm = &tcp_main;
2693 tcp_mini_connection_t * min;
2694 ip6_tcp_udp_address_x4_and_timestamps_t * mina;
2696 imin_div = imin / 4;
2697 imin_mod = imin % 4;
2699 mina = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin_div);
2701 s = format (s, "%U, age %U",
2702 format_ip6_tcp_udp_address_x4, &mina->address_x4, imin_div,
2703 format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]);
2705 min = vec_elt_at_index (tm->ip6.mini_connections, imin);
2707 s = format (s, "%U", format_tcp_mini_connection, min);
2712 static u8 * format_tcp_established_connection (u8 * s, va_list * va)
2714 tcp_connection_t * c = va_arg (*va, tcp_connection_t *);
2718 s = format (s, ", flags: ");
2719 #define _(f) if (c->flags & TCP_CONNECTION_FLAG_##f) s = format (s, "%s, ", #f);
2720 foreach_tcp_connection_flag;
2724 if (tcp_round_trip_time_stats_is_valid (&c->round_trip_time_stats))
2727 tcp_round_trip_time_stats_compute (&c->round_trip_time_stats, r);
2728 s = format (s, ", rtt %.4e +- %.4e",
2735 static u8 * format_ip4_tcp_established_connection (u8 * s, va_list * va)
2737 u32 iest = va_arg (*va, u32);
2738 u32 iest_div, iest_mod;
2739 tcp_main_t * tm = &tcp_main;
2740 tcp_connection_t * est;
2741 ip4_tcp_udp_address_x4_t * esta;
2743 iest_div = iest / 4;
2744 iest_mod = iest % 4;
2746 esta = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div);
2747 est = vec_elt_at_index (tm->ip4.established_connections, iest);
2749 s = format (s, "%U%U",
2750 format_ip4_tcp_udp_address_x4, esta, iest_mod,
2751 format_tcp_established_connection, est);
2756 static u8 * format_ip6_tcp_established_connection (u8 * s, va_list * va)
2758 u32 iest = va_arg (*va, u32);
2759 u32 iest_div, iest_mod;
2760 tcp_main_t * tm = &tcp_main;
2761 tcp_connection_t * est;
2762 ip6_tcp_udp_address_x4_t * esta;
2764 iest_div = iest / 4;
2765 iest_mod = iest % 4;
2767 esta = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div);
2768 est = vec_elt_at_index (tm->ip6.established_connections, iest);
2770 s = format (s, "%U%U",
2771 format_ip6_tcp_udp_address_x4, esta, iest_mod,
2772 format_tcp_established_connection, est);
2777 VLIB_CLI_COMMAND (vlib_cli_show_tcp_command, static) = {
2779 .short_help = "Transmission control protocol (TCP) show commands",
2782 static clib_error_t *
2783 show_mini_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
2785 tcp_main_t * tm = &tcp_main;
2786 ip46_tcp_main_t * tm46;
2787 tcp_ip_4_or_6_t is_ip6 = TCP_IP4;
2788 tcp_mini_connection_t * min;
2789 ip6_tcp_udp_address_x4_and_timestamps_t * mina6;
2790 ip4_tcp_udp_address_x4_and_timestamps_t * mina4;
2791 clib_error_t * error = 0;
2792 uword i, i0, i1, n_valid;
2794 if (unformat (input, "4"))
2796 if (unformat (input, "6"))
2800 tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2801 for (i = 0; i <= tm46->mini_connection_hash_mask; i++)
2806 min = vec_elt_at_index (tm46->mini_connections, i);
2809 mina6 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, i0);
2810 if (ip6_tcp_udp_address_x4_is_valid (&mina6->address_x4, i1))
2812 vlib_cli_output (vm, "%U", format_ip4_tcp_mini_connection, i);
2818 mina4 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, i0);
2819 if (ip4_tcp_udp_address_x4_is_valid (&mina4->address_x4, i1))
2821 vlib_cli_output (vm, "%U", format_ip6_tcp_mini_connection, i);
2828 vlib_cli_output (vm, "no %U mini tcp connections", format_tcp_ip_4_or_6, is_ip6);
2833 VLIB_CLI_COMMAND (vlib_cli_show_tcp_mini_connections_command) = {
2834 .path = "show tcp mini-connections",
2835 .short_help = "Show not-yet established TCP connections",
2836 .function = show_mini_connections,
2839 static clib_error_t *
2840 show_established_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
2842 tcp_main_t * tm = &tcp_main;
2843 ip46_tcp_main_t * tm46;
2844 tcp_ip_4_or_6_t is_ip6 = TCP_IP4;
2845 tcp_connection_t * est;
2846 ip6_tcp_udp_address_x4_t * esta6;
2847 ip4_tcp_udp_address_x4_t * esta4;
2848 clib_error_t * error = 0;
2849 uword i, i0, i1, n_valid;
2851 if (unformat (input, "4"))
2853 if (unformat (input, "6"))
2857 tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2858 for (i = 0; i < vec_len (tm46->established_connections); i++)
2863 est = vec_elt_at_index (tm46->established_connections, i);
2866 esta6 = vec_elt_at_index (tm->ip6_established_connection_address_hash, i0);
2867 if (ip6_tcp_udp_address_x4_is_valid (esta6, i1))
2869 vlib_cli_output (vm, "%U", format_ip6_tcp_established_connection, i);
2875 esta4 = vec_elt_at_index (tm->ip4_established_connection_address_hash, i0);
2876 if (ip4_tcp_udp_address_x4_is_valid (esta4, i1))
2878 vlib_cli_output (vm, "%U", format_ip4_tcp_established_connection, i);
2885 vlib_cli_output (vm, "no %U established tcp connections", format_tcp_ip_4_or_6, is_ip6);
2890 VLIB_CLI_COMMAND (vlib_cli_show_tcp_established_connections_command, static) = {
2891 .path = "show tcp connections",
2892 .short_help = "Show established TCP connections",
2893 .function = show_established_connections,
2898 tcp_write (vlib_main_t * vm, u32 connection_handle, void * data, uword n_data_bytes)
2900 tcp_main_t * tm = &tcp_main;
2901 tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle);
2902 ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
2903 tcp_connection_t * c = vec_elt_at_index (tm46->established_connections, connection_handle / 2);
2905 u32 bi, bi_next, bi_start_of_packet;
2909 bi = c->write_tail_buffer_index;
2910 n_bytes_left_tail = 0;
2913 b = vlib_get_buffer (vm, bi);
2914 n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes - b->current_length;
2917 n_bytes_this_packet = c->write_tail_packet.n_data_bytes;
2918 n_bytes_left_packet = c->max_segment_size - n_bytes_this_packet;
2920 n_data_left = n_data_bytes;
2921 sum = c->write_tail_packet.data_ip_checksum;
2923 while (n_data_left > 0)
2927 if (n_bytes_left_tail == 0)
2929 if (! vlib_buffer_alloc_from_free_list (vm, &bi_next, 1,
2930 tm->tx_buffer_free_list))
2931 return n_data_bytes - n_data_left;
2933 bi_start_of_packet = bi_next;
2936 b->flags |= VLIB_BUFFER_NEXT_PRESENT;
2937 b->next_buffer = bi_next;
2938 bi_start_of_packet = b->opaque[0];
2941 b = vlib_get_buffer (vm, bi);
2943 /* Save away start of packet buffer in opaque. */
2944 b->opaque[0] = bi_start_of_packet;
2946 c->tail_buffer.buffer_index = bi;
2947 n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes;
2950 n_copy = n_data_left;
2951 n_copy = clib_min (n_copy, n_bytes_left_tail);
2952 n_copy = clib_min (n_copy, n_bytes_left_packet);
2954 sum = ip_csum_and_memcpy (sum, b->data + b->current_length,
2957 b->current_length += n_copy;
2958 n_bytes_left_tail -= n_copy;
2959 n_bytes_left_packet -= n_copy;
2960 n_data_left -=- n_copy;
2961 n_bytes_this_packet += n_copy;
2963 if (n_bytes_left_packet == 0)
2965 bi_start_of_packet = b->opaque[0];
2967 if (c->tail_packet.buffer_index != 0)
2969 vlib_buffer_t * p = vlib_get_buffer (vm, c->tail_packet.buffer_index);
2970 tcp_buffer_t * next = vlib_get_buffer_opaque (p);
2973 c->tail_packet.buffer_index = bi_start_of_packet;
2977 c->tail_buffer.buffer_index = bi;
2978 c->tail_buffer.n_data_bytes = n_bytes_this_packet;
2979 c->tail_buffer.data_ip_checksum = ip_csum_fold (sum);