X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fsctp%2Fsctp.h;h=5f19566d240a99f384445db80cc1cc8d8804ad52;hb=eb987d3a09f669787014b1553f032219522149e1;hp=af652dc06dc8fb092958cb3396c1251d7ddba72a;hpb=54432f8c0ac1f680198afa6047ce74bc4a126f21;p=vpp.git diff --git a/src/vnet/sctp/sctp.h b/src/vnet/sctp/sctp.h index af652dc06dc..5f19566d240 100644 --- a/src/vnet/sctp/sctp.h +++ b/src/vnet/sctp/sctp.h @@ -77,8 +77,8 @@ typedef enum _sctp_error #define IS_B_BIT_SET(var) ((var) & (1<<1)) #define IS_U_BIT_SET(var) ((var) & (1<<2)) -#define MAX_SCTP_CONNECTIONS 32 -#define MAIN_SCTP_SUB_CONN_IDX 0 +#define MAX_SCTP_CONNECTIONS 8 +#define SCTP_PRIMARY_PATH_IDX 0 #if (VLIB_BUFFER_TRACE_TRAJECTORY) #define sctp_trajectory_add_start(b, start) \ @@ -93,19 +93,24 @@ enum _sctp_subconn_state { SCTP_SUBCONN_STATE_DOWN = 0, SCTP_SUBCONN_STATE_UP, - SCTP_SUBCONN_STATE_ALLOW_HB + SCTP_SUBCONN_STATE_ALLOW_HB, + SCTP_SUBCONN_AWAITING_SACK, + SCTP_SUBCONN_SACK_RECEIVED }; +#define SCTP_INITIAL_SSHTRESH 65535 typedef struct _sctp_sub_connection { transport_connection_t connection; /**< Common transport data. First! */ - void *parent; /**< Link to the parent-super connection */ + u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */ u32 error_count; /**< The current error count for this destination. */ u32 error_threshold; /**< Current error threshold for this destination, i.e. what value marks the destination down if error count reaches this value. */ - u32 cwnd; /**< The current congestion window. */ - u32 ssthresh; /**< The current ssthresh value. */ + u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by + the sender based on observed network conditions. */ + u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the + sender to distinguish slow-start and congestion avoidance phases. */ u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */ @@ -132,12 +137,14 @@ typedef struct _sctp_sub_connection u32 last_seen; /**< The time to which this destination was last sent a packet to. This can be used to determine if a HEARTBEAT is needed. */ + u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */ + u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had; If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */ u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */ - u8 enqueue_state; + u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */ } sctp_sub_connection_t; @@ -176,9 +183,20 @@ _bytes_swap (void *pv, size_t n) */ #define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000 +typedef struct _sctp_user_configuration +{ + u8 never_delay_sack; + u8 never_bundle; + +} sctp_user_configuration_t; + typedef struct _sctp_connection { + /** Required for pool_get_aligned */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */ + sctp_user_configuration_t conn_config; /**< Allows tuning of some SCTP behaviors */ u8 state; /**< SCTP state as per sctp_state_t */ u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */ @@ -195,6 +213,8 @@ typedef struct _sctp_connection u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count reaches will cause this association to be torn down. */ + u8 init_retransmit_err; /**< Error counter for the INIT transmission phase */ + u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */ u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk. @@ -203,6 +223,7 @@ typedef struct _sctp_connection TSN (normally just prior to transmit or during fragmentation). */ + u32 last_unacked_tsn; /** < Last TSN number still unacked */ u32 next_tsn_expected; /**< The next TSN number expected to be received. */ u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value @@ -221,48 +242,58 @@ typedef struct _sctp_connection Note: This is used only when no DATA chunks are received out-of-order. When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */ - u32 a_rwnd; /** This value represents the dedicated buffer space, in number of bytes, - the sender of the INIT has reserved in association with this window. - During the life of the association, this buffer space SHOULD NOT be lessened - (i.e., dedicated buffers taken away from this association); - however, an endpoint MAY change the value of a_rwnd it sends in SACK chunks. */ - - u32 smallest_PMTU; /** The smallest PMTU discovered for all of the peer's transport addresses. */ - - u32 rcv_a_rwnd; /**< LOCAL max seg size that includes options. To be updated by congestion algos, etc. */ - u32 snd_a_rwnd; /**< REMOTE max seg size that includes options. To be updated if peer pushes back on window, etc.*/ + u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */ u8 overall_sending_status; /**< 0 indicates first fragment of a user message 1 indicates normal stream 2 indicates last fragment of a user message */ - sctp_options_t rcv_opts; - sctp_options_t snd_opts; + u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during + the life-span of the association itself. For instance, a new sub-connection might have been added. */ - u32 snd_hdr_length; /**< BASE HEADER LENGTH for the DATA chunk when sending */ - u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */ + sctp_state_cookie_param_t cookie_param; /**< Temporary location to save cookie information; it can be used to + when timeout expires and sending again a COOKIE is require. */ } sctp_connection_t; typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id); sctp_connection_t *sctp_connection_new (u8 thread_index); -void sctp_sub_connection_add_ip4 (u8 thread_index, - sctp_ipv4_addr_param_t * ipv4_addr); -void sctp_sub_connection_add_ip6 (u8 thread_index, - sctp_ipv6_addr_param_t * ipv6_addr); + +u8 +sctp_sub_connection_add_ip4 (vlib_main_t * vm, + ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr); + +u8 +sctp_sub_connection_add_ip6 (vlib_main_t * vm, + ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr); + +u8 +sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr); + +u8 +sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr); + +u8 sctp_configure (sctp_user_configuration_t config); + void sctp_connection_close (sctp_connection_t * sctp_conn); void sctp_connection_cleanup (sctp_connection_t * sctp_conn); void sctp_connection_del (sctp_connection_t * sctp_conn); u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); void sctp_send_init (sctp_connection_t * sctp_conn); +void sctp_send_cookie_echo (sctp_connection_t * sctp_conn); void sctp_send_shutdown (sctp_connection_t * sctp_conn); void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b); void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b0); void sctp_send_heartbeat (sctp_connection_t * sctp_conn); +void sctp_data_retransmit (sctp_connection_t * sctp_conn); void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4); void sctp_flush_frames_to_output (u8 thread_index); @@ -284,10 +315,20 @@ void sctp_init_mss (sctp_connection_t * sctp_conn); void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b, ip4_address_t * ip4_addr, - ip6_address_t * ip6_addr); + u8 add_ip4, ip6_address_t * ip6_addr, + u8 add_ip6); +void sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn, + u8 idx, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 err_cause); void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx, - vlib_buffer_t * b, - sctp_state_cookie_param_t * sc); + vlib_buffer_t * b, u8 reuse_buffer); void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b); void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx, @@ -297,6 +338,8 @@ void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn); +void sctp_api_reference (void); + #define IP_PROTOCOL_SCTP 132 /** SSCTP FSM state definitions as per RFC4960. */ @@ -413,7 +456,7 @@ sctp_optparam_type_to_string (u8 type) #define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */ #define SCTP_RTO_MIN 1 * SHZ /* 1 second */ #define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */ -#define SCTP_RTO_BURST 4 +#define SCTP_RTO_BURST 4 #define SCTP_RTO_ALPHA 1/8 #define SCTP_RTO_BETA 1/4 #define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */ @@ -422,9 +465,12 @@ sctp_optparam_type_to_string (u8 type) #define SCTP_MAX_INIT_RETRANS 8 // number of attempts #define SCTP_HB_INTERVAL 30 * SHZ #define SCTP_HB_MAX_BURST 1 - +#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */ #define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */ +#define SCTP_CONN_RECOVERY 1 << 1 +#define SCTP_FAST_RECOVERY 1 << 2 + typedef struct _sctp_lookup_dispatch { u8 next, error; @@ -515,7 +561,7 @@ sctp_half_open_connection_get (u32 conn_index) clib_spinlock_lock_if_init (&sctp_main.half_open_lock); if (!pool_is_free_index (sctp_main.half_open_connections, conn_index)) tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index); - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc; + tc->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = SCTP_PRIMARY_PATH_IDX; clib_spinlock_unlock_if_init (&sctp_main.half_open_lock); return tc; } @@ -527,13 +573,13 @@ sctp_half_open_connection_get (u32 conn_index) always_inline void sctp_half_open_connection_del (sctp_connection_t * tc) { - sctp_main_t *tm = vnet_get_sctp_main (); - clib_spinlock_lock_if_init (&tm->half_open_lock); - pool_put_index (tm->half_open_connections, - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index); + sctp_main_t *sctp_main = vnet_get_sctp_main (); + clib_spinlock_lock_if_init (&sctp_main->half_open_lock); + pool_put_index (sctp_main->half_open_connections, + tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index); if (CLIB_DEBUG) memset (tc, 0xFA, sizeof (*tc)); - clib_spinlock_unlock_if_init (&tm->half_open_lock); + clib_spinlock_unlock_if_init (&sctp_main->half_open_lock); } always_inline u32 @@ -586,10 +632,10 @@ always_inline int sctp_half_open_connection_cleanup (sctp_connection_t * tc) { /* Make sure this is the owning thread */ - if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index != + if (tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index != vlib_get_thread_index ()) return 1; - sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT); + sctp_timer_reset (tc, SCTP_PRIMARY_PATH_IDX, SCTP_TIMER_T1_INIT); sctp_half_open_connection_del (tc); return 0; } @@ -612,7 +658,11 @@ sctp_get_connection_from_transport (transport_connection_t * tconn) if (sub->parent == NULL) SCTP_ADV_DBG ("sub->parent == NULL"); #endif - return (sctp_connection_t *) sub->parent; + if (sub->subconn_idx > 0) + return (sctp_connection_t *) sub - + (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1)); + + return (sctp_connection_t *) sub; } always_inline u32 @@ -715,17 +765,18 @@ sctp_connection_get (u32 conn_index, u32 thread_index) always_inline u8 sctp_data_subconn_select (sctp_connection_t * sctp_conn) { - u8 i = 0; - u8 state = SCTP_SUBCONN_STATE_DOWN; - u32 sub = MAIN_SCTP_SUB_CONN_IDX; - u32 data_subconn_seed = random_default_seed (); - - while (state == SCTP_SUBCONN_STATE_DOWN && i < SELECT_MAX_RETRIES) + u32 sub = SCTP_PRIMARY_PATH_IDX; + u8 i, cwnd = sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].cwnd; + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) { - u32 sub = random_u32 (&data_subconn_seed) % MAX_SCTP_CONNECTIONS; - if (sctp_conn->sub_conn[sub].state == SCTP_SUBCONN_STATE_UP) - break; - i++; + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (sctp_conn->sub_conn[i].cwnd > cwnd) + { + sub = i; + cwnd = sctp_conn->sub_conn[i].cwnd; + } } return sub; } @@ -748,8 +799,8 @@ sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h) return i; } clib_warning ("Did not find a sub-connection; defaulting to %u", - MAIN_SCTP_SUB_CONN_IDX); - return MAIN_SCTP_SUB_CONN_IDX; + SCTP_PRIMARY_PATH_IDX); + return SCTP_PRIMARY_PATH_IDX; } always_inline u8 @@ -766,8 +817,8 @@ sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h) return i; } clib_warning ("Did not find a sub-connection; defaulting to %u", - MAIN_SCTP_SUB_CONN_IDX); - return MAIN_SCTP_SUB_CONN_IDX; + SCTP_PRIMARY_PATH_IDX); + return SCTP_PRIMARY_PATH_IDX; } /** @@ -813,6 +864,119 @@ vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, sctp_hdr_opts_len); } +always_inline u8 +sctp_next_avail_subconn (sctp_connection_t * sctp_conn) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + return i; + } + return MAX_SCTP_CONNECTIONS; +} + +always_inline void +update_smallest_pmtu_idx (sctp_connection_t * sctp_conn) +{ + u8 i; + u8 smallest_pmtu_index = SCTP_PRIMARY_PATH_IDX; + + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN) + { + if (sctp_conn->sub_conn[i].PMTU < + sctp_conn->sub_conn[smallest_pmtu_index].PMTU) + smallest_pmtu_index = i; + } + } + + sctp_conn->smallest_PMTU_idx = smallest_pmtu_index; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +sctp_init_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (1) */ + sctp_conn->sub_conn[i].cwnd = + clib_min (4 * sctp_conn->sub_conn[i].PMTU, + clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380)); + + /* Section 7.2.1; point (3) */ + sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH; + + /* Section 7.2.2; point (1) */ + sctp_conn->sub_conn[i].partially_acked_bytes = 0; + } +} + +always_inline u8 +sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx) +{ + return 0; +} + +always_inline u8 +cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx) +{ + if (sctp_conn->sub_conn[idx].cwnd == 0) + return 1; + return 0; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +update_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (2) */ + if (sctp_conn->sub_conn[i].is_retransmitting) + { + sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU; + continue; + } + + /* Section 7.2.2; point (4) */ + if (sctp_conn->sub_conn[i].last_data_ts > + sctp_time_now () + SCTP_DATA_IDLE_INTERVAL) + { + sctp_conn->sub_conn[i].cwnd = + clib_max (sctp_conn->sub_conn[i].cwnd / 2, + 4 * sctp_conn->sub_conn[i].PMTU); + continue; + } + + /* Section 7.2.1; point (5) */ + if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh) + { + if (!cwnd_fully_utilized (sctp_conn, i)) + continue; + + if (sctp_in_cong_recovery (sctp_conn, i)) + continue; + + sctp_conn->sub_conn[i].cwnd = + clib_min (sctp_conn->sub_conn[i].PMTU, 1); + } + + /* Section 6.1; point (D) */ + if ((inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU) < + sctp_conn->sub_conn[i].cwnd) + sctp_conn->sub_conn[i].cwnd = + inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU; + } +} + /* * fd.io coding-style-patch-verification: ON *