pool_put (wrk->connections, tc);
}
+void
+tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ tcp_cleanup_req_t *req;
+ clib_time_type_t now;
+
+ now = transport_time_now (tc->c_thread_index);
+ clib_fifo_add2 (wrk->pending_cleanups, req);
+ req->connection_index = tc->c_c_index;
+ req->free_time = now + tcp_cfg.cleanup_time;
+}
+
/**
* Begin connection closing procedure.
*
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
session_transport_closed_notify (&tc->connection);
+ tcp_program_cleanup (tcp_get_worker (tc->c_thread_index), tc);
tcp_worker_stats_inc (tc->c_thread_index, rst_unread, 1);
break;
}
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_CLOSED:
- tcp_connection_timers_reset (tc);
- /* Delete connection but instead of doing it now wait until next
- * dispatch cycle to give the session layer a chance to clear
- * unhandled events */
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ /* Cleanup should've been programmed already */
break;
default:
TCP_DBG ("state: %u", tc->state);
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
- session_transport_closed_notify (&tc->connection);
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_cong_recovery_off (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ session_transport_closed_notify (&tc->connection);
+ tcp_program_cleanup (tcp_get_worker (thread_index), tc);
}
/**
tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar,
tc->rtt_ts);
s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss);
- s = format (s, " next_node %u opaque 0x%x\n", tc->next_node_index,
- tc->next_node_opaque);
+ s = format (s, " next_node %u opaque 0x%x fib_index %u\n",
+ tc->next_node_index, tc->next_node_opaque, tc->c_fib_index);
s = format (s, " cong: %U", format_tcp_congestion, tc);
if (tc->state >= TCP_STATE_ESTABLISHED)
return goal_size > tc->snd_mss ? goal_size : tc->snd_mss;
}
-/**
- * Compute maximum segment size for session layer.
- *
- * Since the result needs to be the actual data length, it first computes
- * the tcp options to be used in the next burst and subtracts their
- * length from the connection's snd_mss.
- */
-static u16
-tcp_session_send_mss (transport_connection_t * trans_conn)
-{
- tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
-
- /* Ensure snd_mss does accurately reflect the amount of data we can push
- * in a segment. This also makes sure that options are updated according to
- * the current state of the connection. */
- tcp_update_burst_snd_vars (tc);
-
- if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_TSO))
- return tcp_session_cal_goal_size (tc);
-
- return tc->snd_mss;
-}
-
always_inline u32
tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
{
return tcp_snd_space_inline (tc);
}
-static u32
-tcp_session_send_space (transport_connection_t * trans_conn)
+static int
+tcp_session_send_params (transport_connection_t * trans_conn,
+ transport_send_params_t * sp)
{
tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
- return clib_min (tcp_snd_space_inline (tc),
- tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
-}
-static u32
-tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
-{
- tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+ /* Ensure snd_mss does accurately reflect the amount of data we can push
+ * in a segment. This also makes sure that options are updated according to
+ * the current state of the connection. */
+ tcp_update_burst_snd_vars (tc);
- ASSERT (seq_geq (tc->snd_nxt, tc->snd_una));
+ if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_TSO))
+ sp->snd_mss = tcp_session_cal_goal_size (tc);
+ else
+ sp->snd_mss = tc->snd_mss;
+
+ sp->snd_space = clib_min (tcp_snd_space_inline (tc),
+ tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
+ ASSERT (seq_geq (tc->snd_nxt, tc->snd_una));
/* This still works if fast retransmit is on */
- return (tc->snd_nxt - tc->snd_una);
+ sp->tx_offset = tc->snd_nxt - tc->snd_una;
+
+ sp->flags = 0;
+ if (!tc->snd_wnd)
+ {
+ if (tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+ sp->flags = TRANSPORT_SND_F_DESCHED;
+ else
+ sp->flags = TRANSPORT_SND_F_POSTPONE;
+ }
+
+ return 0;
}
static void
tcp_timer_waitclose_handler (tcp_connection_t * tc)
{
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+
switch (tc->state)
{
case TCP_STATE_CLOSE_WAIT:
tcp_connection_timers_reset (tc);
- session_transport_closed_notify (&tc->connection);
/* App never returned with a close */
if (!(tc->flags & TCP_CONN_FINPNDG))
{
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- tcp_worker_stats_inc (tc->c_thread_index, to_closewait, 1);
+ session_transport_closed_notify (&tc->connection);
+ tcp_program_cleanup (wrk, tc);
+ tcp_workerp_stats_inc (wrk, to_closewait, 1);
break;
}
tc->snd_nxt = tc->snd_una;
tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
+ session_transport_closed_notify (&tc->connection);
/* Make sure we don't wait in LAST ACK forever */
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
- tcp_worker_stats_inc (tc->c_thread_index, to_closewait2, 1);
+ tcp_workerp_stats_inc (wrk, to_closewait2, 1);
/* Don't delete the connection yet */
break;
case TCP_STATE_FIN_WAIT_1:
tcp_connection_timers_reset (tc);
- session_transport_closed_notify (&tc->connection);
if (tc->flags & TCP_CONN_FINPNDG)
{
/* If FIN pending, we haven't sent everything, but we did try.
* Notify session layer that transport is closed. */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_send_reset (tc);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_program_cleanup (wrk, tc);
}
else
{
/* We've sent the fin but no progress. Close the connection and
* to make sure everything is flushed, setup a cleanup timer */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_program_cleanup (wrk, tc);
}
- tcp_worker_stats_inc (tc->c_thread_index, to_finwait1, 1);
+ session_transport_closed_notify (&tc->connection);
+ tcp_workerp_stats_inc (wrk, to_finwait1, 1);
break;
case TCP_STATE_LAST_ACK:
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc->connection);
- tcp_worker_stats_inc (tc->c_thread_index, to_lastack, 1);
+ tcp_program_cleanup (wrk, tc);
+ tcp_workerp_stats_inc (wrk, to_lastack, 1);
break;
case TCP_STATE_CLOSING:
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc->connection);
- tcp_worker_stats_inc (tc->c_thread_index, to_closing, 1);
+ tcp_program_cleanup (wrk, tc);
+ tcp_workerp_stats_inc (wrk, to_closing, 1);
break;
case TCP_STATE_FIN_WAIT_2:
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
session_transport_closed_notify (&tc->connection);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- tcp_worker_stats_inc (tc->c_thread_index, to_finwait2, 1);
+ tcp_program_cleanup (wrk, tc);
+ tcp_workerp_stats_inc (wrk, to_finwait2, 1);
+ break;
+ case TCP_STATE_TIME_WAIT:
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_program_cleanup (wrk, tc);
break;
default:
- tcp_connection_del (tc);
+ clib_warning ("waitclose in state: %U", format_tcp_state, tc->state);
break;
}
}
return;
thread_index = wrk->vm->thread_index;
- for (i = 0; i < clib_min (n_timers, 32); i++)
+ for (i = 0; i < clib_min (n_timers, wrk->max_timers_per_loop); i++)
{
clib_fifo_sub1 (wrk->pending_timers, timer_handle);
connection_index = timer_handle & 0x0FFFFFFF;
(*timer_expiration_handlers[timer_id]) (tc);
}
+
+ if (thread_index == 0 && clib_fifo_elts (wrk->pending_timers))
+ vlib_process_signal_event_mt (wrk->vm, session_queue_process_node.index,
+ SESSION_Q_PROCESS_FLUSH_FRAMES, 0);
}
/**
}
}
+static void
+tcp_handle_cleanups (tcp_worker_ctx_t * wrk, clib_time_type_t now)
+{
+ u32 thread_index = wrk->vm->thread_index;
+ tcp_cleanup_req_t *req;
+ tcp_connection_t *tc;
+
+ while (clib_fifo_elts (wrk->pending_cleanups))
+ {
+ req = clib_fifo_head (wrk->pending_cleanups);
+ if (req->free_time > now)
+ break;
+ clib_fifo_sub2 (wrk->pending_cleanups, req);
+ tc = tcp_connection_get (req->connection_index, thread_index);
+ if (PREDICT_FALSE (!tc))
+ continue;
+ session_transport_delete_notify (&tc->connection);
+ tcp_connection_cleanup (tc);
+ }
+}
+
static void
tcp_update_time (f64 now, u8 thread_index)
{
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
tcp_set_time_now (wrk);
+ tcp_handle_cleanups (wrk, now);
tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
tcp_dispatch_pending_timers (wrk);
tcp_flush_frames_to_output (wrk);
.close = tcp_session_close,
.cleanup = tcp_session_cleanup,
.reset = tcp_session_reset,
- .send_mss = tcp_session_send_mss,
- .send_space = tcp_session_send_space,
+ .send_params = tcp_session_send_params,
.update_time = tcp_update_time,
- .tx_fifo_offset = tcp_session_tx_fifo_offset,
.flush_data = tcp_session_flush_data,
.custom_tx = tcp_session_custom_tx,
.format_connection = format_tcp_session,
static void
tcp_expired_timers_dispatch (u32 * expired_timers)
{
- u32 thread_index = vlib_get_thread_index ();
- u32 connection_index, timer_id, n_expired;
+ u32 thread_index = vlib_get_thread_index (), n_left, max_per_loop;
+ u32 connection_index, timer_id, n_expired, max_loops;
tcp_worker_ctx_t *wrk;
tcp_connection_t *tc;
int i;
wrk = tcp_get_worker (thread_index);
n_expired = vec_len (expired_timers);
tcp_workerp_stats_inc (wrk, timer_expirations, n_expired);
+ n_left = clib_fifo_elts (wrk->pending_timers);
/*
* Invalidate all timer handles before dispatching. This avoids dangling
}
clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
+
+ max_loops = clib_max (1, 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second);
+ max_per_loop = clib_max ((n_left + n_expired) / max_loops, 10);
+ max_per_loop = clib_min (max_per_loop, VLIB_FRAME_SIZE);
+ wrk->max_timers_per_loop = clib_max (n_left ? wrk->max_timers_per_loop : 0,
+ max_per_loop);
+
+ if (thread_index == 0)
+ vlib_process_signal_event_mt (wrk->vm, session_queue_process_node.index,
+ SESSION_Q_PROCESS_FLUSH_FRAMES, 0);
}
static void
vec_reset_length (wrk->pending_disconnects);
vec_reset_length (wrk->pending_resets);
wrk->vm = vlib_mains[thread];
+ wrk->max_timers_per_loop = 10;
/*
* Preallocate connections. Assume that thread 0 won't
tcp_cfg.lastack_time = 300; /* 30s */
tcp_cfg.finwait2_time = 300; /* 30s */
tcp_cfg.closing_time = 300; /* 30s */
- tcp_cfg.cleanup_time = 1; /* 0.1s */
+ tcp_cfg.cleanup_time = 0.1; /* 100ms */
}
static clib_error_t *
else if (unformat (input, "closing-time %u", &tmp_time))
tcp_cfg.closing_time = tmp_time / TCP_TIMER_TICK;
else if (unformat (input, "cleanup-time %u", &tmp_time))
- tcp_cfg.cleanup_time = tmp_time / TCP_TIMER_TICK;
+ tcp_cfg.cleanup_time = tmp_time / 1000.0;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);