2 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
19 #include <vnet/vnet.h>
20 #include <vnet/ip/ip.h>
21 #include <vnet/session/session.h>
22 #include <vnet/tcp/tcp_types.h>
23 #include <vnet/tcp/tcp_timer.h>
24 #include <vnet/tcp/tcp_debug.h>
25 #include <vnet/tcp/tcp_sack.h>
26 #include <vnet/tcp/tcp_bt.h>
27 #include <vnet/tcp/tcp_cc.h>
29 typedef void (timer_expiration_handler) (tcp_connection_t * tc);
31 extern timer_expiration_handler tcp_timer_retransmit_handler;
32 extern timer_expiration_handler tcp_timer_persist_handler;
33 extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
35 typedef enum _tcp_error
37 #define tcp_error(f, n, s, d) TCP_ERROR_##f,
38 #include <vnet/tcp/tcp_error.def>
43 typedef struct _tcp_lookup_dispatch
46 } tcp_lookup_dispatch_t;
48 #define foreach_tcp_wrk_stat \
49 _(timer_expirations, u64, "timer expirations") \
50 _(rxt_segs, u64, "segments retransmitted") \
51 _(tr_events, u32, "timer retransmit events") \
52 _(to_closewait, u32, "timeout close-wait") \
53 _(to_closewait2, u32, "timeout close-wait w/data") \
54 _(to_finwait1, u32, "timeout fin-wait-1") \
55 _(to_finwait2, u32, "timeout fin-wait-2") \
56 _(to_lastack, u32, "timeout last-ack") \
57 _(to_closing, u32, "timeout closing") \
58 _(tr_abort, u32, "timer retransmit abort") \
59 _(rst_unread, u32, "reset on close due to unread data") \
60 _(no_buffer, u32, "out of buffers") \
62 typedef struct tcp_wrk_stats_
64 #define _(name, type, str) type name;
69 typedef struct tcp_free_req_
71 clib_time_type_t free_time;
75 typedef struct tcp_worker_ctx_
77 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
79 /** worker's pool of connections */
80 tcp_connection_t *connections;
82 /** vector of pending ack dequeues */
83 u32 *pending_deq_acked;
85 /** vector of pending disconnect notifications */
86 u32 *pending_disconnects;
88 /** vector of pending reset notifications */
91 /** convenience pointer to this thread's vlib main */
94 /** Time used for high precision (us) measurements in seconds */
97 /** Time measured in @ref TCP_TSTAMP_TICK used for time stamps */
100 /* Max timers to be handled per dispatch loop */
101 u32 max_timers_per_loop;
103 /* Fifo of pending timer expirations */
106 CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
108 /** cached 'on the wire' options for bursts */
111 /** tx buffer free list */
114 /* fifo of pending free requests */
115 tcp_cleanup_req_t *pending_cleanups;
117 /** Session layer edge indices to tcp output */
118 u32 tco_next_node[2];
120 /** worker timer wheel */
121 tcp_timer_wheel_t timer_wheel;
123 CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
125 tcp_wrk_stats_t stats;
128 #define tcp_worker_stats_inc(_wrk,_stat,_val) \
129 _wrk->stats._stat += _val
131 typedef struct tcp_iss_seed_
137 typedef struct tcp_configuration_
139 /** Max rx fifo size for a session (in bytes). It is used in to compute the
140 * rfc 7323 window scaling factor */
143 /** Min rx fifo for a session (in bytes) */
146 /** Default MTU to be used when establishing connections */
149 /** Initial CWND multiplier, which multiplies MSS to determine initial CWND.
150 * Set 0 to determine the initial CWND by another way */
151 u16 initial_cwnd_multiplier;
153 /** Enable tx pacing for new connections */
156 /** Allow use of TSO whenever available */
159 /** Set if csum offloading is enabled */
162 /** Default congestion control algorithm type */
163 tcp_cc_algorithm_type_e cc_algo;
165 /** Min rwnd, as number of snd_mss segments, for update ack to be sent after
166 * a zero rwnd advertisement */
167 u32 rwnd_min_update_ack;
169 /** Timer ticks to wait for close from app */
172 /** Timer ticks to wait in time-wait. Also known as 2MSL */
175 /** Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack */
178 /** Timer ticks to wait in last ack for ack */
181 /** Timer ticks to wait in fin-wait2 for fin */
184 /** Timer ticks to wait in closing for fin ack */
187 /** Timer ticks to wait for free buffer */
188 u32 alloc_err_timeout;
190 /** Time to wait (sec) before cleaning up the connection */
193 /** Number of preallocated connections */
194 u32 preallocated_connections;
196 /** Maxium allowed GSO packet size */
199 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
200 ip4_address_t *ip4_src_addrs;
201 ip6_address_t *ip6_src_addrs;
203 /** Fault-injection. Debug only */
204 f64 buffer_fail_fraction;
205 } tcp_configuration_t;
207 typedef struct _tcp_main
209 /** per-worker context */
210 tcp_worker_ctx_t *wrk_ctx;
212 /* Pool of listeners. */
213 tcp_connection_t *listener_pool;
215 /** vlib buffer size */
216 u32 bytes_per_buffer;
218 /** Dispatch table by state and flags */
219 tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
221 /** Seed used to generate random iss */
222 tcp_iss_seed_t iss_seed;
224 /** Congestion control algorithms registered */
225 tcp_cc_algorithm_t *cc_algos;
227 /** Hash table of cc algorithms by name */
228 uword *cc_algo_by_name;
230 /** Last cc algo registered */
231 tcp_cc_algorithm_type_e cc_last_type;
233 /** Flag that indicates if stack is on or off */
236 /** Flag that indicates if v4 punting is enabled */
239 /** Flag that indicates if v6 punting is enabled */
242 /** Rotor for v4 source addresses */
243 u32 last_v4_addr_rotor;
245 /** Rotor for v6 source addresses */
246 u32 last_v6_addr_rotor;
248 /** Protocol configuration */
249 tcp_configuration_t cfg;
251 /** message ID base for API */
255 extern tcp_main_t tcp_main;
256 extern vlib_node_registration_t tcp4_input_node;
257 extern vlib_node_registration_t tcp6_input_node;
258 extern vlib_node_registration_t tcp4_output_node;
259 extern vlib_node_registration_t tcp6_output_node;
260 extern vlib_node_registration_t tcp4_established_node;
261 extern vlib_node_registration_t tcp6_established_node;
262 extern vlib_node_registration_t tcp4_syn_sent_node;
263 extern vlib_node_registration_t tcp6_syn_sent_node;
264 extern vlib_node_registration_t tcp4_rcv_process_node;
265 extern vlib_node_registration_t tcp6_rcv_process_node;
266 extern vlib_node_registration_t tcp4_listen_node;
267 extern vlib_node_registration_t tcp6_listen_node;
268 extern vlib_node_registration_t tcp4_input_nolookup_node;
269 extern vlib_node_registration_t tcp6_input_nolookup_node;
270 extern vlib_node_registration_t tcp4_drop_node;
271 extern vlib_node_registration_t tcp6_drop_node;
273 #define tcp_cfg tcp_main.cfg
274 #define tcp_node_index(node_id, is_ip4) \
275 ((is_ip4) ? tcp4_##node_id##_node.index : tcp6_##node_id##_node.index)
277 always_inline tcp_main_t *
283 always_inline tcp_worker_ctx_t *
284 tcp_get_worker (u32 thread_index)
286 ASSERT (thread_index < vec_len (tcp_main.wrk_ctx));
287 return &tcp_main.wrk_ctx[thread_index];
290 tcp_connection_t *tcp_connection_alloc (u8 thread_index);
291 tcp_connection_t *tcp_connection_alloc_w_base (u8 thread_index,
292 tcp_connection_t **base);
293 void tcp_connection_free (tcp_connection_t * tc);
294 void tcp_connection_close (tcp_connection_t * tc);
295 void tcp_connection_cleanup (tcp_connection_t * tc);
296 void tcp_connection_del (tcp_connection_t * tc);
297 int tcp_half_open_connection_cleanup (tcp_connection_t * tc);
299 void tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
300 u32 thread_index, u8 is_ip4);
301 void tcp_send_reset (tcp_connection_t * tc);
302 void tcp_send_syn (tcp_connection_t * tc);
303 void tcp_send_synack (tcp_connection_t * tc);
304 void tcp_send_fin (tcp_connection_t * tc);
305 void tcp_send_ack (tcp_connection_t * tc);
306 void tcp_send_window_update_ack (tcp_connection_t * tc);
308 void tcp_program_ack (tcp_connection_t * tc);
309 void tcp_program_dupack (tcp_connection_t * tc);
310 void tcp_program_retransmit (tcp_connection_t * tc);
312 void tcp_update_burst_snd_vars (tcp_connection_t * tc);
313 u32 tcp_snd_space (tcp_connection_t * tc);
314 int tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc);
315 void tcp_reschedule (tcp_connection_t * tc);
316 fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc);
317 u32 tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **b,
319 int tcp_session_custom_tx (void *conn, transport_send_params_t * sp);
321 void tcp_connection_timers_init (tcp_connection_t * tc);
322 void tcp_connection_timers_reset (tcp_connection_t * tc);
323 void tcp_init_snd_vars (tcp_connection_t * tc);
324 void tcp_connection_init_vars (tcp_connection_t * tc);
325 void tcp_connection_tx_pacer_update (tcp_connection_t * tc);
326 void tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
328 void tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc);
329 void tcp_check_gso (tcp_connection_t *tc);
331 int tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4);
332 void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
333 int tcp_configure_v4_source_address_range (vlib_main_t * vm,
334 ip4_address_t * start,
335 ip4_address_t * end, u32 table_id);
336 int tcp_configure_v6_source_address_range (vlib_main_t * vm,
337 ip6_address_t * start,
338 ip6_address_t * end, u32 table_id);
340 clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en);
342 format_function_t format_tcp_state;
343 format_function_t format_tcp_flags;
344 format_function_t format_tcp_sacks;
345 format_function_t format_tcp_rcv_sacks;
346 format_function_t format_tcp_connection;
347 format_function_t format_tcp_connection_id;
349 #define tcp_validate_txf_size(_tc, _a) \
350 ASSERT(_tc->state != TCP_STATE_ESTABLISHED \
351 || transport_max_tx_dequeue (&_tc->connection) >= _a)
353 #endif /* _vnet_tcp_h_ */
356 * fd.io coding-style-patch-verification: ON
359 * eval: (c-set-style "gnu")