tcp_main_t tcp_main;
+typedef struct
+{
+ fib_protocol_t nh_proto;
+ vnet_link_t link_type;
+ ip46_address_t ip;
+ u32 sw_if_index;
+ u8 is_add;
+} tcp_add_del_adj_args_t;
+
+static void
+tcp_add_del_adj_cb (tcp_add_del_adj_args_t * args)
+{
+ u32 ai;
+ if (args->is_add)
+ {
+ adj_nbr_add_or_lock (args->nh_proto, args->link_type, &args->ip,
+ args->sw_if_index);
+ }
+ else
+ {
+ ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &args->ip,
+ args->sw_if_index);
+ if (ai != ADJ_INDEX_INVALID)
+ adj_unlock (ai);
+ }
+}
+
+static void
+tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add)
+{
+ tcp_add_del_adj_args_t args = {
+ .nh_proto = FIB_PROTOCOL_IP6,
+ .link_type = VNET_LINK_IP6,
+ .ip = tc->c_rmt_ip,
+ .sw_if_index = tc->sw_if_index,
+ .is_add = is_add
+ };
+ vlib_rpc_call_main_thread (tcp_add_del_adj_cb, (u8 *) & args,
+ sizeof (args));
+}
+
static u32
tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
{
return listener->c_c_index;
}
-u32
+static u32
tcp_session_bind (u32 session_index, transport_endpoint_t * tep)
{
return tcp_connection_bind (session_index, tep);
pool_put_index (tm->listener_pool, listener_index);
}
-u32
+static u32
tcp_session_unbind (u32 listener_index)
{
tcp_connection_unbind (listener_index);
return 0;
}
-transport_connection_t *
+static transport_connection_t *
tcp_session_get_listener (u32 listener_index)
{
tcp_main_t *tm = vnet_get_tcp_main ();
* Cleanup half-open connection
*
*/
-void
+static void
tcp_half_open_connection_del (tcp_connection_t * tc)
{
tcp_main_t *tm = vnet_get_tcp_main ();
return 0;
}
-tcp_connection_t *
+static tcp_connection_t *
tcp_half_open_connection_new (void)
{
tcp_main_t *tm = vnet_get_tcp_main ();
/* Make sure all timers are cleared */
tcp_connection_timers_reset (tc);
+ if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
+ tcp_add_del_adjacency (tc, 0);
+
/* Poison the entry */
if (CLIB_DEBUG > 0)
memset (tc, 0xFA, sizeof (*tc));
tcp_connection_cleanup (tc);
break;
case TCP_STATE_ESTABLISHED:
+ tcp_connection_timers_reset (tc);
+ /* Set the cleanup timer, in case the session layer/app don't
+ * cleanly close the connection */
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
stream_session_reset_notify (&tc->connection);
- /* fall through */
+ break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
case TCP_STATE_CLOSING:
tc->state = TCP_STATE_CLOSED;
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
-
- /* Make sure all timers are cleared */
tcp_connection_timers_reset (tc);
-
- /* Wait for cleanup from session layer but not forever */
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
break;
case TCP_STATE_CLOSED:
tc->state = TCP_STATE_FIN_WAIT_1;
break;
case TCP_STATE_ESTABLISHED:
- if (!stream_session_tx_fifo_max_dequeue (&tc->connection))
+ if (!session_tx_fifo_max_dequeue (&tc->connection))
tcp_send_fin (tc);
else
tc->flags |= TCP_CONN_FINPNDG;
tc->state = TCP_STATE_FIN_WAIT_1;
break;
case TCP_STATE_CLOSE_WAIT:
- tcp_connection_timers_reset (tc);
- tcp_send_fin (tc);
- tc->state = TCP_STATE_LAST_ACK;
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ if (!session_tx_fifo_max_dequeue (&tc->connection))
+ {
+ tcp_send_fin (tc);
+ tcp_connection_timers_reset (tc);
+ tc->state = TCP_STATE_LAST_ACK;
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ }
+ else
+ tc->flags |= TCP_CONN_FINPNDG;
break;
case TCP_STATE_FIN_WAIT_1:
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
tcp_connection_del (tc);
}
-void
+static void
tcp_session_close (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc;
tcp_connection_close (tc);
}
-void
+static void
tcp_session_cleanup (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
tcp_connection_timers_reset (tc);
-
- /* Wait for the session tx events to clear */
tc->state = TCP_STATE_CLOSED;
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_connection_cleanup (tc);
}
/**
}
#endif /* 0 */
+static void
+tcp_cc_init (tcp_connection_t * tc)
+{
+ tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO);
+ tc->cc_algo->init (tc);
+}
+
+void
+tcp_cc_algo_register (tcp_cc_algorithm_type_e type,
+ const tcp_cc_algorithm_t * vft)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vec_validate (tm->cc_algos, type);
+
+ tm->cc_algos[type] = *vft;
+}
+
+tcp_cc_algorithm_t *
+tcp_cc_algo_get (tcp_cc_algorithm_type_e type)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ return &tm->cc_algos[type];
+}
+
+
/**
* Initialize connection send variables.
*/
if (tc->state == TCP_STATE_SYN_RCVD)
tcp_init_snd_vars (tc);
+ if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
+ tcp_add_del_adjacency (tc, 1);
+
// tcp_connection_fib_attach (tc);
}
return 0;
}
-int
+static int
tcp_connection_open (transport_endpoint_t * rmt)
{
tcp_main_t *tm = vnet_get_tcp_main ();
return tc->c_c_index;
}
-int
+static int
tcp_session_open (transport_endpoint_t * tep)
{
return tcp_connection_open (tep);
#undef _
};
-u8 *
+static u8 *
format_tcp_connection_flags (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
#undef _
};
-u8 *
+static u8 *
format_tcp_timers (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
return s;
}
-u8 *
+static u8 *
format_tcp_congestion_status (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
return s;
}
-u8 *
+static i32
+tcp_rcv_wnd_available (tcp_connection_t * tc)
+{
+ return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+}
+
+static u8 *
format_tcp_vars (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
tc->snd_una_max - tc->iss);
s = format (s, " rcv_nxt %u rcv_las %u\n",
tc->rcv_nxt - tc->irs, tc->rcv_las - tc->irs);
- s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n",
- tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs,
+ s = format (s, " snd_wnd %u rcv_wnd %u rcv_wscale %u ",
+ tc->snd_wnd, tc->rcv_wnd, tc->rcv_wscale);
+ s = format (s, "snd_wl1 %u snd_wl2 %u\n", tc->snd_wl1 - tc->irs,
tc->snd_wl2 - tc->iss);
- s = format (s, " flight size %u send space %u rcv_wnd_av %d\n",
+ s = format (s, " flight size %u out space %u cc space %u rcv_wnd_av %u\n",
tcp_flight_size (tc), tcp_available_output_snd_space (tc),
- tcp_rcv_wnd_available (tc));
+ tcp_available_cc_snd_space (tc), tcp_rcv_wnd_available (tc));
s = format (s, " cong %U ", format_tcp_congestion_status, tc);
s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
return s;
}
-u8 *
+static u8 *
format_tcp_connection_id (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
return s;
}
-u8 *
+static u8 *
format_tcp_session (u8 * s, va_list * args)
{
u32 tci = va_arg (*args, u32);
return s;
}
-u8 *
+static u8 *
format_tcp_listener_session (u8 * s, va_list * args)
{
u32 tci = va_arg (*args, u32);
return format (s, "%U", format_tcp_connection_id, tc);
}
-u8 *
+static u8 *
format_tcp_half_open_session (u8 * s, va_list * args)
{
u32 tci = va_arg (*args, u32);
return s;
}
-u8 *
+static u8 *
format_tcp_sack_hole (u8 * s, va_list * args)
{
sack_scoreboard_hole_t *hole = va_arg (*args, sack_scoreboard_hole_t *);
return s;
}
-transport_connection_t *
+static transport_connection_t *
tcp_session_get_transport (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
return &tc->connection;
}
-transport_connection_t *
+static transport_connection_t *
tcp_half_open_session_get_transport (u32 conn_index)
{
tcp_connection_t *tc = tcp_half_open_connection_get (conn_index);
* the tcp options to be used in the next burst and subtracts their
* length from the connection's snd_mss.
*/
-u16
+static u16
tcp_session_send_mss (transport_connection_t * trans_conn)
{
tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
/* Ensure snd_mss does accurately reflect the amount of data we can push
* in a segment. This also makes sure that options are updated according to
* the current state of the connection. */
- tcp_update_snd_mss (tc);
+ tcp_update_burst_snd_vars (tc);
return tc->snd_mss;
}
* @param tc tcp connection
* @return number of bytes session is allowed to write
*/
-u32
-tcp_snd_space (tcp_connection_t * tc)
+static inline u32
+tcp_snd_space_inline (tcp_connection_t * tc)
{
int snd_space, snt_limited;
* bytes of previously unsent data. */
if (tcp_in_fastrecovery (tc) && !tcp_fastrecovery_sent_1_smss (tc))
{
- if (tcp_available_output_snd_space (tc) < tc->snd_mss)
+ if (tcp_available_cc_snd_space (tc) < tc->snd_mss)
return 0;
tcp_fastrecovery_1_smss_on (tc);
return tc->snd_mss;
}
u32
-tcp_session_send_space (transport_connection_t * trans_conn)
+tcp_snd_space (tcp_connection_t * tc)
{
- tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
- return clib_min (tcp_snd_space (tc),
- tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
+ return tcp_snd_space_inline (tc);
}
-i32
-tcp_rcv_wnd_available (tcp_connection_t * tc)
+static u32
+tcp_session_send_space (transport_connection_t * trans_conn)
{
- return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+ tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+ return clib_min (tcp_snd_space_inline (tc),
+ tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
}
-u32
+static u32
tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
{
tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
return (tc->snd_nxt - tc->snd_una);
}
-void
+static void
tcp_update_time (f64 now, u8 thread_index)
{
tcp_set_time_now (thread_index);
- tw_timer_expire_timers_16t_2w_512sl (&tcp_main.timer_wheels[thread_index],
+ tw_timer_expire_timers_16t_2w_512sl (&tcp_main.
+ wrk_ctx[thread_index].timer_wheel,
now);
tcp_flush_frames_to_output (thread_index);
}
+static u32
+tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
+ return tcp_push_header (tc, b);
+}
+
/* *INDENT-OFF* */
const static transport_proto_vft_t tcp_proto = {
.enable = vnet_tcp_enable_disable,
.bind = tcp_session_bind,
.unbind = tcp_session_unbind,
- .push_header = tcp_push_header,
+ .push_header = tcp_session_push_header,
.get_connection = tcp_session_get_transport,
.get_listener = tcp_session_get_listener,
.get_half_open = tcp_half_open_session_get_transport,
};
/* *INDENT-ON* */
-void
+static void
tcp_timer_keep_handler (u32 conn_index)
{
u32 thread_index = vlib_get_thread_index ();
tcp_connection_close (tc);
}
-void
+static void
tcp_timer_establish_handler (u32 conn_index)
{
tcp_connection_t *tc;
tcp_connection_cleanup (tc);
}
-void
+static void
tcp_timer_waitclose_handler (u32 conn_index)
{
u32 thread_index = vlib_get_thread_index ();
clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!");
}
+ /* Make sure we don't try to send unsent data */
+ tcp_connection_timers_reset (tc);
+ tcp_cong_recovery_off (tc);
+ tc->snd_una_max = tc->snd_nxt = tc->snd_una;
tcp_send_fin (tc);
tc->state = TCP_STATE_LAST_ACK;
}
}
-void
+static void
tcp_initialize_timer_wheels (tcp_main_t * tm)
{
tw_timer_wheel_16t_2w_512sl_t *tw;
/* *INDENT-OFF* */
foreach_vlib_main (({
- tw = &tm->timer_wheels[ii];
+ tw = &tm->wrk_ctx[ii].timer_wheel;
tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
100e-3 /* timer period 100ms */ , ~0);
tw->last_run_time = vlib_time_now (this_vlib_main);
/* *INDENT-ON* */
}
-clib_error_t *
+static clib_error_t *
tcp_main_enable (vlib_main_t * vm)
{
tcp_main_t *tm = vnet_get_tcp_main ();
pool_init_fixed (tm->half_open_connections,
tm->preallocated_half_open_connections);
- /* Initialize per worker thread tx buffers (used for control messages) */
- vec_validate (tm->tx_buffers, num_threads - 1);
-
- /* Initialize timer wheels */
- vec_validate (tm->timer_wheels, num_threads - 1);
- tcp_initialize_timer_wheels (tm);
-
/* Initialize clocks per tick for TCP timestamp. Used to compute
* monotonically increasing timestamps. */
tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
clib_spinlock_init (&tm->half_open_lock);
}
- vec_validate (tm->tx_frames[0], num_threads - 1);
- vec_validate (tm->tx_frames[1], num_threads - 1);
- vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1);
- vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1);
+ vec_validate (tm->wrk_ctx, num_threads - 1);
+ tcp_initialize_timer_wheels (tm);
tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size
(vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
- vec_validate (tm->time_now, num_threads - 1);
return error;
}
tm->punt_unknown6 = is_add;
}
-clib_error_t *
+static clib_error_t *
tcp_init (vlib_main_t * vm)
{
tcp_main_t *tm = vnet_get_tcp_main ();
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (input, "preallocated-connections %d",
- &tm->preallocated_connections))
+ if (unformat (input, "preallocated-connections %d",
+ &tm->preallocated_connections))
;
else if (unformat (input, "preallocated-half-open-connections %d",
&tm->preallocated_half_open_connections))
else if (unformat (input, "buffer-fail-fraction %f",
&tm->buffer_fail_fraction))
;
+ else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
+ &tm->max_rx_fifo))
+ ;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);