2 * Copyright (c) 2017 SUSE LLC.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #ifndef included_vnet_sctp_h
16 #define included_vnet_sctp_h
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <sctp/sctp_timer.h>
21 #include <sctp/sctp_packet.h>
22 #include <vnet/session/transport.h>
23 #include <vnet/session/session.h>
25 /* SCTP buffer opaque definition */
31 u16 sid; /**< Stream ID */
32 u16 ssn; /**< Stream Sequence Number */
33 u32 tsn; /**< Transmission Sequence Number */
34 u16 hdr_offset; /**< offset relative to ip hdr */
35 u16 data_offset; /**< offset relative to ip hdr */
36 u16 data_len; /**< data len */
37 u8 subconn_idx; /**< index of the sub_connection being used */
40 } sctp_buffer_opaque_t;
42 STATIC_ASSERT (sizeof (sctp_buffer_opaque_t) <=
43 STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused),
44 "sctp_buffer_opaque_t too large for vnet_buffer_opaque_t");
46 #define sctp_buffer_opaque(b) \
47 ((sctp_buffer_opaque_t *)((u8 *)((b)->opaque) + \
48 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused)))
52 #define foreach_sctp_timer \
53 _(T1_INIT, "T1_INIT") \
54 _(T1_COOKIE, "T1_COOKIE") \
55 _(T2_SHUTDOWN, "T2_SHUTDOWN") \
56 _(T3_RXTX, "T3_RXTX") \
57 _(T4_HEARTBEAT, "T4_HB") \
58 _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
60 typedef enum _sctp_timers
62 #define _(sym, str) SCTP_TIMER_##sym,
68 #define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
71 sctp_timer_to_string (u8 timer_id)
75 case SCTP_TIMER_T1_INIT:
76 return "SCTP_TIMER_T1_INIT";
77 case SCTP_TIMER_T1_COOKIE:
78 return "SCTP_TIMER_T1_COOKIE";
79 case SCTP_TIMER_T2_SHUTDOWN:
80 return "SCTP_TIMER_T2_SHUTDOWN";
81 case SCTP_TIMER_T3_RXTX:
82 return "SCTP_TIMER_T3_RXTX";
83 case SCTP_TIMER_T4_HEARTBEAT:
84 return "SCTP_TIMER_T4_HEARTBEAT";
85 case SCTP_TIMER_T5_SHUTDOWN_GUARD:
86 return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
91 typedef enum _sctp_error
93 #define sctp_error(n,s) SCTP_ERROR_##n,
94 #include <sctp/sctp_error.def>
101 #define IS_T_BIT_SET(var) ((var) & (1))
102 #define IS_E_BIT_SET(var) ((var) & (1))
103 #define IS_B_BIT_SET(var) ((var) & (1<<1))
104 #define IS_U_BIT_SET(var) ((var) & (1<<2))
106 #define MAX_SCTP_CONNECTIONS 8
107 #define SCTP_PRIMARY_PATH_IDX 0
109 #if (VLIB_BUFFER_TRACE_TRAJECTORY)
110 #define sctp_trajectory_add_start(b, start) \
112 (*vlib_buffer_trace_trajectory_cb) (b, start); \
115 #define sctp_trajectory_add_start(b, start)
118 enum _sctp_subconn_state
120 SCTP_SUBCONN_STATE_DOWN = 0,
121 SCTP_SUBCONN_STATE_UP,
122 SCTP_SUBCONN_STATE_ALLOW_HB,
123 SCTP_SUBCONN_AWAITING_SACK,
124 SCTP_SUBCONN_SACK_RECEIVED
127 #define SCTP_INITIAL_SSHTRESH 65535
128 typedef struct _sctp_sub_connection
130 transport_connection_t connection; /**< Common transport data. First! */
132 u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */
133 u32 error_count; /**< The current error count for this destination. */
134 u32 error_threshold; /**< Current error threshold for this destination,
135 i.e. what value marks the destination down if error count reaches this value. */
136 u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
137 the sender based on observed network conditions. */
138 u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
139 sender to distinguish slow-start and congestion avoidance phases. */
141 u64 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
143 u32 RTO; /**< The current retransmission timeout value. */
144 u64 SRTT; /**< The current smoothed round-trip time. */
145 f64 RTTVAR; /**< The current RTT variation. */
147 u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
148 congestion avoidance mode (see Section 7.2.2).*/
150 u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
152 u16 PMTU; /**< The current known path MTU. */
154 u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
156 u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
157 this address is currently being used to compute an RTT.
158 If this flag is 0, the next DATA chunk sent to this destination
159 should be used to compute an RTT and this flag should be set.
160 Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
163 u64 last_seen; /**< The time to which this destination was last sent a packet to.
164 This can be used to determine if a HEARTBEAT is needed. */
166 u64 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
168 u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
169 If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
171 u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
173 u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
175 } sctp_sub_connection_t;
179 u32 a_rwnd; /**< Maximum segment size advertised */
183 /* Useful macros to deal with the out_of_order_map (array of bit) */
184 #define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
185 #define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
186 #define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
189 _bytes_swap (void *pv, size_t n)
193 for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
201 #define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
203 #define MAX_INFLIGHT_PACKETS 128
204 #define MAX_ENQUEABLE_SACKS 2
206 /* This parameter indicates to the receiver how much increment in
207 * milliseconds the sender wishes the receiver to add to its default
210 #define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
212 typedef struct _sctp_user_configuration
217 } sctp_user_configuration_t;
219 typedef struct _sctp_connection
221 /** Required for pool_get_aligned */
222 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
224 sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
225 sctp_user_configuration_t conn_config; /**< Allows tuning of some SCTP behaviors */
227 u8 state; /**< SCTP state as per sctp_state_t */
228 u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
230 u32 local_tag; /**< INIT_TAG generated locally */
231 u32 remote_tag; /**< INIT_TAG generated by the remote peer */
233 u32 local_initial_tsn; /**< Initial TSN generated locally */
234 u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
236 u32 peer_cookie_life_span_increment;
238 u32 overall_err_count; /**< The overall association error count. */
239 u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
240 reaches will cause this association to be torn down. */
242 u8 init_retransmit_err; /**< Error counter for the INIT transmission phase */
244 u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
246 u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
247 This is sent in the INIT or INIT ACK chunk to the peer
248 and incremented each time a DATA chunk is assigned a
249 TSN (normally just prior to transmit or during
252 u32 last_unacked_tsn; /** < Last TSN number still unacked */
253 u32 next_tsn_expected; /**< The next TSN number expected to be received. */
255 u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
256 is set initially by taking the peer's initial TSN,
257 received in the INIT or INIT ACK chunk, and
258 subtracting one from it. */
260 u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
261 TSNs have been received (relative to the Last Rcvd TSN).
262 If no gaps exist, i.e., no out-of-order packets have been received,
263 this array will be set to all zero. */
265 u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
266 This is initialized to 0. When a packet is received it is incremented.
267 If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
268 Note: This is used only when no DATA chunks are received out-of-order.
269 When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
271 u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
273 u8 overall_sending_status; /**< 0 indicates first fragment of a user message
274 1 indicates normal stream
275 2 indicates last fragment of a user message */
277 u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during
278 the life-span of the association itself. For instance, a new sub-connection might have been added. */
280 sctp_state_cookie_param_t cookie_param; /**< Temporary location to save cookie information; it can be used to
281 when timeout expires and sending again a COOKIE is require. */
285 typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
287 sctp_connection_t *sctp_connection_new (u8 thread_index);
290 sctp_sub_connection_add_ip4 (vlib_main_t * vm,
291 ip4_address_t * lcl_addr,
292 ip4_address_t * rmt_addr);
295 sctp_sub_connection_add_ip6 (vlib_main_t * vm,
296 ip6_address_t * lcl_addr,
297 ip6_address_t * rmt_addr);
300 sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr,
301 ip4_address_t * rmt_addr);
304 sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr,
305 ip6_address_t * rmt_addr);
307 u8 sctp_configure (sctp_user_configuration_t config);
309 void sctp_connection_close (sctp_connection_t * sctp_conn);
310 void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
311 void sctp_connection_del (sctp_connection_t * sctp_conn);
313 u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
314 void sctp_send_init (sctp_connection_t * sctp_conn);
315 void sctp_send_cookie_echo (sctp_connection_t * sctp_conn);
316 void sctp_send_shutdown (sctp_connection_t * sctp_conn);
317 void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
319 void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
321 void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
322 void sctp_data_retransmit (sctp_connection_t * sctp_conn);
323 void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
325 void sctp_flush_frames_to_output (u8 thread_index);
326 void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
328 format_function_t format_sctp_state;
330 u8 *format_sctp_connection_id (u8 * s, va_list * args);
331 u8 *format_sctp_connection (u8 * s, va_list * args);
332 u8 *format_sctp_scoreboard (u8 * s, va_list * args);
333 u8 *format_sctp_header (u8 * s, va_list * args);
334 u8 *format_sctp_tx_trace (u8 * s, va_list * args);
335 unformat_function_t unformat_pg_sctp_header;
337 clib_error_t *sctp_init (vlib_main_t * vm);
338 void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
339 void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
340 void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
341 void sctp_init_mss (sctp_connection_t * sctp_conn);
343 void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
344 vlib_buffer_t * b, ip4_address_t * ip4_addr,
345 u8 add_ip4, ip6_address_t * ip6_addr,
347 void sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn,
348 u8 idx, vlib_buffer_t * b,
349 ip4_address_t * ip4_addr,
350 ip6_address_t * ip6_addr);
351 void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx,
353 ip4_address_t * ip4_addr,
354 ip6_address_t * ip6_addr);
355 void sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx,
356 vlib_buffer_t * b, u8 err_cause);
357 void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
358 vlib_buffer_t * b, u8 reuse_buffer);
359 void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
361 void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
363 void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
366 u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
368 void sctp_api_reference (void);
370 #define IP_PROTOCOL_SCTP 132
372 /** SSCTP FSM state definitions as per RFC4960. */
373 #define foreach_sctp_fsm_state \
374 _(CLOSED, "CLOSED") \
375 _(COOKIE_WAIT, "COOKIE_WAIT") \
376 _(COOKIE_ECHOED, "COOKIE_ECHOED") \
377 _(ESTABLISHED, "ESTABLISHED") \
378 _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
379 _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
380 _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
381 _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
383 typedef enum _sctp_state
385 #define _(sym, str) SCTP_STATE_##sym,
386 foreach_sctp_fsm_state
392 sctp_state_to_string (u8 state)
396 case SCTP_STATE_CLOSED:
397 return "SCTP_STATE_CLOSED";
398 case SCTP_STATE_COOKIE_WAIT:
399 return "SCTP_STATE_COOKIE_WAIT";
400 case SCTP_STATE_COOKIE_ECHOED:
401 return "SCTP_STATE_COOKIE_ECHOED";
402 case SCTP_STATE_ESTABLISHED:
403 return "SCTP_STATE_ESTABLISHED";
404 case SCTP_STATE_SHUTDOWN_PENDING:
405 return "SCTP_STATE_SHUTDOWN_PENDING";
406 case SCTP_STATE_SHUTDOWN_SENT:
407 return "SCTP_STATE_SHUTDOWN_SENT";
408 case SCTP_STATE_SHUTDOWN_RECEIVED:
409 return "SCTP_STATE_SHUTDOWN_RECEIVED";
410 case SCTP_STATE_SHUTDOWN_ACK_SENT:
411 return "SCTP_STATE_SHUTDOWN_ACK_SENT";
417 sctp_chunk_to_string (u8 type)
432 return "HEARTBEAT_ACK";
438 return "SHUTDOWN_ACK";
439 case OPERATION_ERROR:
440 return "OPERATION_ERROR";
442 return "COOKIE_ECHO";
449 case SHUTDOWN_COMPLETE:
450 return "SHUTDOWN_COMPLETE";
456 sctp_optparam_type_to_string (u8 type)
460 case SCTP_IPV4_ADDRESS_TYPE:
461 return "SCTP_IPV4_ADDRESS_TYPE";
462 case SCTP_IPV6_ADDRESS_TYPE:
463 return "SCTP_IPV6_ADDRESS_TYPE";
464 case SCTP_STATE_COOKIE_TYPE:
465 return "SCTP_STATE_COOKIE_TYPE";
466 case SCTP_UNRECOGNIZED_TYPE:
467 return "SCTP_UNRECOGNIZED_TYPE";
468 case SCTP_COOKIE_PRESERVATIVE_TYPE:
469 return "SCTP_COOKIE_PRESERVATIVE_TYPE";
470 case SCTP_HOSTNAME_ADDRESS_TYPE:
471 return "SCTP_HOSTNAME_ADDRESS_TYPE";
472 case SCTP_SUPPORTED_ADDRESS_TYPES:
473 return "SCTP_SUPPORTED_ADDRESS_TYPES";
478 #define SCTP_TICK 0.001 /**< SCTP tick period (s) */
479 #define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
480 #define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
482 /* As per RFC4960, page 83 */
483 #define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
484 #define SCTP_RTO_MIN 1 * SHZ /* 1 second */
485 #define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
486 #define SCTP_RTO_BURST 4
487 #define SCTP_RTO_ALPHA 1/8
488 #define SCTP_RTO_BETA 1/4
489 #define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
490 #define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
491 #define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
492 #define SCTP_MAX_INIT_RETRANS 8 // number of attempts
493 #define SCTP_HB_INTERVAL 30 * SHZ
494 #define SCTP_HB_MAX_BURST 1
495 #define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
496 #define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
498 #define SCTP_CONN_RECOVERY 1 << 1
499 #define SCTP_FAST_RECOVERY 1 << 2
501 typedef struct _sctp_lookup_dispatch
504 } sctp_lookup_dispatch_t;
506 typedef struct _sctp_main
508 /* Per-worker thread SCTP connection pools */
509 sctp_connection_t **connections;
511 /* Pool of listeners. */
512 sctp_connection_t *listener_pool;
514 /** Dispatch table by state and flags */
515 sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
517 u8 log2_tstamp_clocks_per_tick;
518 f64 tstamp_ticks_per_clock;
521 /** per-worker tx buffer free lists */
523 /** per-worker tx frames to SCTP 4/6 output nodes */
524 vlib_frame_t **tx_frames[2];
525 /** per-worker tx frames to ip 4/6 lookup nodes */
526 vlib_frame_t **ip_lookup_tx_frames[2];
528 /* Per worker-thread timer wheel for connections timers */
529 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
531 /* Pool of half-open connections on which we've sent a SYN */
532 sctp_connection_t *half_open_connections;
533 clib_spinlock_t half_open_lock;
535 /* TODO: Congestion control algorithms registered */
536 /* sctp_cc_algorithm_t *cc_algos; */
538 /* Flag that indicates if stack is on or off */
542 /** Number of preallocated connections */
543 u32 preallocated_connections;
545 /** Transport table (preallocation) size parameters */
546 u32 local_endpoints_table_memory;
547 u32 local_endpoints_table_buckets;
549 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
550 ip4_address_t *ip4_src_addresses;
551 u32 last_v4_address_rotor;
552 u32 last_v6_address_rotor;
553 ip6_address_t *ip6_src_addresses;
555 /** vlib buffer size */
556 u32 bytes_per_buffer;
561 u32 sctp4_established_phase_node_index;
562 u32 sctp6_established_phase_node_index;
567 extern sctp_main_t sctp_main;
568 extern vlib_node_registration_t sctp4_input_node;
569 extern vlib_node_registration_t sctp6_input_node;
570 extern vlib_node_registration_t sctp4_output_node;
571 extern vlib_node_registration_t sctp6_output_node;
573 always_inline sctp_main_t *
574 vnet_get_sctp_main ()
579 always_inline sctp_header_t *
580 sctp_buffer_hdr (vlib_buffer_t * b)
582 ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
583 return (sctp_header_t *) (b->data + b->current_data
584 + sctp_buffer_opaque (b)->sctp.hdr_offset);
587 clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
588 clib_error_t *sctp_plugin_api_hookup (vlib_main_t * vm);
590 always_inline sctp_connection_t *
591 sctp_half_open_connection_get (u32 conn_index)
593 sctp_connection_t *tc = 0;
594 clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
595 if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
596 tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
597 tc->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = SCTP_PRIMARY_PATH_IDX;
598 clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
603 * Cleanup half-open connection
607 sctp_half_open_connection_del (sctp_connection_t * tc)
609 sctp_main_t *sctp_main = vnet_get_sctp_main ();
610 u32 index = tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index;
611 clib_spinlock_lock_if_init (&sctp_main->half_open_lock);
613 clib_memset (tc, 0xFA, sizeof (*tc));
614 pool_put_index (sctp_main->half_open_connections, index);
615 clib_spinlock_unlock_if_init (&sctp_main->half_open_lock);
619 sctp_set_time_now (u32 thread_index)
621 sctp_main.time_now[thread_index] = clib_cpu_time_now ()
622 * sctp_main.tstamp_ticks_per_clock;
623 return sctp_main.time_now[thread_index];
627 sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
630 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
631 vlib_get_thread_index ());
632 ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
633 SCTP_TIMER_HANDLE_INVALID);
635 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
636 sub->timers[timer_id] =
637 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
638 sub->c_c_index, timer_id, interval);
642 sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
644 ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
645 if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
648 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
650 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
651 sub->timers[timer_id]);
652 sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
656 * Try to cleanup half-open connection
658 * If called from a thread that doesn't own tc, the call won't have any
661 * @param tc - connection to be cleaned up
662 * @return non-zero if cleanup failed.
665 sctp_half_open_connection_cleanup (sctp_connection_t * tc)
667 /* Make sure this is the owning thread */
668 if (tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index !=
669 vlib_get_thread_index ())
671 sctp_timer_reset (tc, SCTP_PRIMARY_PATH_IDX, SCTP_TIMER_T1_INIT);
672 sctp_half_open_connection_del (tc);
679 return sizeof (sctp_header_t);
682 always_inline sctp_connection_t *
683 sctp_get_connection_from_transport (transport_connection_t * tconn)
685 ASSERT (tconn != NULL);
687 sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
690 SCTP_ADV_DBG ("sub == NULL");
691 if (sub->parent == NULL)
692 SCTP_ADV_DBG ("sub->parent == NULL");
694 if (sub->subconn_idx > 0)
695 return (sctp_connection_t *) sub -
696 (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1));
698 return (sctp_connection_t *) sub;
704 return sctp_main.time_now[vlib_get_thread_index ()];
707 #define ABS(x) ((x) > 0) ? (x) : -(x);
710 sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
712 /* See RFC4960, 6.3.1. RTO Calculation */
715 u64 now = sctp_time_now ();
716 u64 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
717 u64 R = prev_ts - now;
719 if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
721 sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
725 if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
727 sctp_conn->sub_conn[conn_idx].SRTT = R;
731 RTTVAR = 100e-3; /* 100 ms */
733 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
735 else // C3: RTT already exists; let's recalculate
737 RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
738 SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
741 RTTVAR = 100e-3; /* 100 ms */
743 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
745 sctp_conn->sub_conn[conn_idx].SRTT =
746 (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
751 sctp_conn->sub_conn[conn_idx].SRTT +
752 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
753 if (RTO < SCTP_RTO_MIN) // C6
756 if (RTO > SCTP_RTO_MAX) // C7
759 sctp_conn->sub_conn[conn_idx].RTO = RTO;
763 sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
766 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
767 vlib_get_thread_index ());
768 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
770 if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
771 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
772 sub->timers[timer_id]);
774 tc->sub_conn[conn_idx].timers[timer_id] =
775 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
776 sub->c_c_index, timer_id, interval);
779 always_inline sctp_connection_t *
780 sctp_listener_get (u32 tli)
782 return pool_elt_at_index (sctp_main.listener_pool, tli);
787 always_inline sctp_connection_t *
788 sctp_connection_get (u32 conn_index, u32 thread_index)
791 (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
793 return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
796 #define SELECT_MAX_RETRIES 8
799 sctp_data_subconn_select (sctp_connection_t * sctp_conn)
801 u32 sub = SCTP_PRIMARY_PATH_IDX;
802 u8 i, cwnd = sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].cwnd;
803 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
805 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
808 if (sctp_conn->sub_conn[i].cwnd > cwnd)
811 cwnd = sctp_conn->sub_conn[i].cwnd;
818 sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
822 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
824 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
825 ip6h->dst_address.as_u64[0] &&
826 sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
827 ip6h->dst_address.as_u64[1] &&
828 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
829 ip6h->src_address.as_u64[0] &&
830 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
831 ip6h->src_address.as_u64[1])
834 clib_warning ("Did not find a sub-connection; defaulting to %u",
835 SCTP_PRIMARY_PATH_IDX);
836 return SCTP_PRIMARY_PATH_IDX;
840 sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
844 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
846 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
847 ip4h->dst_address.as_u32
848 && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
849 ip4h->src_address.as_u32)
852 clib_warning ("Did not find a sub-connection; defaulting to %u",
853 SCTP_PRIMARY_PATH_IDX);
854 return SCTP_PRIMARY_PATH_IDX;
858 * Push SCTP header to buffer
860 * @param vm - vlib_main
861 * @param b - buffer to write the header to
862 * @param sp_net - source port net order
863 * @param dp_net - destination port net order
864 * @param sctp_hdr_opts_len - header and options length in bytes
866 * @return - pointer to start of SCTP header
869 vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
870 u8 sctp_hdr_opts_len)
872 sctp_full_hdr_t *full_hdr;
874 full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
876 full_hdr->hdr.src_port = sp;
877 full_hdr->hdr.dst_port = dp;
878 full_hdr->hdr.checksum = 0;
883 * Push SCTP header to buffer
885 * @param b - buffer to write the header to
886 * @param sp_net - source port net order
887 * @param dp_net - destination port net order
888 * @param sctp_hdr_opts_len - header and options length in bytes
890 * @return - pointer to start of SCTP header
893 vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
894 u8 sctp_hdr_opts_len)
896 return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
901 sctp_next_avail_subconn (sctp_connection_t * sctp_conn)
905 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
907 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
910 return MAX_SCTP_CONNECTIONS;
914 update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
917 u8 smallest_pmtu_index = SCTP_PRIMARY_PATH_IDX;
919 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
921 if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
923 if (sctp_conn->sub_conn[i].PMTU <
924 sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
925 smallest_pmtu_index = i;
929 sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
932 /* As per RFC4960; section 7.2.1: Slow-Start */
934 sctp_init_cwnd (sctp_connection_t * sctp_conn)
937 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
939 /* Section 7.2.1; point (1) */
940 sctp_conn->sub_conn[i].cwnd =
941 clib_min (4 * sctp_conn->sub_conn[i].PMTU,
942 clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
944 /* Section 7.2.1; point (3) */
945 sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
947 /* Section 7.2.2; point (1) */
948 sctp_conn->sub_conn[i].partially_acked_bytes = 0;
953 sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
959 cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
961 if (sctp_conn->sub_conn[idx].cwnd == 0)
966 /* As per RFC4960; section 7.2.1: Slow-Start */
968 update_cwnd (sctp_connection_t * sctp_conn)
971 u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn;
973 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
975 /* Section 7.2.1; point (2) */
976 if (sctp_conn->sub_conn[i].is_retransmitting)
978 sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
982 /* Section 7.2.2; point (4) */
983 if (sctp_conn->sub_conn[i].last_data_ts >
984 sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
986 sctp_conn->sub_conn[i].cwnd =
987 clib_max (sctp_conn->sub_conn[i].cwnd / 2,
988 4 * sctp_conn->sub_conn[i].PMTU);
992 /* Section 7.2.1; point (5) */
993 if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
995 if (!cwnd_fully_utilized (sctp_conn, i))
998 if (sctp_in_cong_recovery (sctp_conn, i))
1001 sctp_conn->sub_conn[i].cwnd =
1002 clib_min (sctp_conn->sub_conn[i].PMTU, 1);
1005 /* Section 6.1; point (D) */
1006 if ((inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU) <
1007 sctp_conn->sub_conn[i].cwnd)
1008 sctp_conn->sub_conn[i].cwnd =
1009 inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU;
1014 * fd.io coding-style-patch-verification: ON
1017 * eval: (c-set-style "gnu")