X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fsctp%2Fsctp_output.c;h=aa0bb41e504a23d4abb7f17fba9dd09bc8380c73;hb=8797168fe2f4fd32d241126181ad0d06c62c4eb4;hp=841444e20a366c31417af3c2b274170c82221e0d;hpb=191a59401c0552e5ea79041f34456eb9fcc1f311;p=vpp.git diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c index 841444e20a3..aa0bb41e504 100644 --- a/src/vnet/sctp/sctp_output.c +++ b/src/vnet/sctp/sctp_output.c @@ -15,6 +15,7 @@ #include #include #include +#include vlib_node_registration_t sctp4_output_node; vlib_node_registration_t sctp6_output_node; @@ -224,16 +225,17 @@ ip6_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, } void -sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * tc, +sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * sctp_sub_conn, vlib_buffer_t * b) { sctp_header_t *th = vlib_buffer_get_current (b); vlib_main_t *vm = vlib_get_main (); - if (tc->c_is_ip4) + if (sctp_sub_conn->c_is_ip4) { ip4_header_t *ih; - ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4, - &tc->c_rmt_ip4, IP_PROTOCOL_SCTP, 1); + ih = vlib_buffer_push_ip4 (vm, b, &sctp_sub_conn->c_lcl_ip4, + &sctp_sub_conn->c_rmt_ip4, IP_PROTOCOL_SCTP, + 1); th->checksum = ip4_sctp_compute_checksum (vm, b, ih); } else @@ -241,8 +243,8 @@ sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * tc, ip6_header_t *ih; int bogus = ~0; - ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6, - &tc->c_rmt_ip6, IP_PROTOCOL_SCTP); + ih = vlib_buffer_push_ip6 (vm, b, &sctp_sub_conn->c_lcl_ip6, + &sctp_sub_conn->c_rmt_ip6, IP_PROTOCOL_SCTP); th->checksum = ip6_sctp_compute_checksum (vm, b, ih, &bogus); ASSERT (!bogus); } @@ -259,6 +261,7 @@ sctp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) b->current_length = 0; b->total_length_not_including_first_buffer = 0; vnet_buffer (b)->sctp.flags = 0; + vnet_buffer (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS; /* Leave enough space for headers */ return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); @@ -268,10 +271,11 @@ always_inline void * sctp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) { ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK; + b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; vnet_buffer (b)->sctp.flags = 0; + vnet_buffer (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS; VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* Leave enough space for headers */ return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); @@ -358,7 +362,7 @@ sctp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, always_inline void sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, - u8 is_ip4, u8 flush) + u8 is_ip4, u32 fib_index, u8 flush) { sctp_main_t *tm = vnet_get_sctp_main (); u32 thread_index = vlib_get_thread_index (); @@ -368,8 +372,8 @@ sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->error = 0; - /* Default FIB for now */ - vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index; + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; /* Send to IP lookup */ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; @@ -399,28 +403,23 @@ sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, always_inline void sctp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, - u8 is_ip4) + u8 is_ip4, u32 fib_index) { - sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0); -} - -always_inline void -sctp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, - u8 is_ip4) -{ - sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1); + sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 0); + if (vm->thread_index == 0 && vlib_num_workers ()) + session_flush_frames_main_thread (vm); } /** * Convert buffer to INIT */ void -sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) +sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) { u32 random_seed = random_default_seed (); u16 alloc_bytes = sizeof (sctp_init_chunk_t); - sctp_sub_connection_t *sub_conn = - &sctp_conn->sub_conn[sctp_pick_conn_idx_on_chunk (INIT)]; + sctp_sub_connection_t *sub_conn = &sctp_conn->sub_conn[idx]; sctp_ipv4_addr_param_t *ip4_param = 0; sctp_ipv6_addr_param_t *ip6_param = 0; @@ -466,16 +465,24 @@ sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) vnet_sctp_set_chunk_length (&init_chunk->chunk_hdr, chunk_len); vnet_sctp_common_hdr_params_host_to_net (&init_chunk->chunk_hdr); - init_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); + sctp_init_cwnd (sctp_conn); + + init_chunk->a_rwnd = clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); init_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); init_chunk->inboud_streams_count = clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); init_chunk->outbound_streams_count = clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + init_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("sctp_conn->local_initial_tsn = %u", + sctp_conn->local_initial_tsn); + sctp_conn->local_tag = init_chunk->initiate_tag; vnet_buffer (b)->sctp.connection_index = sub_conn->c_c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; SCTP_DBG_STATE_MACHINE ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " "CHUNK_TYPE = %s, " @@ -488,17 +495,40 @@ sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) init_chunk->sctp_hdr.dst_port); } -u64 -sctp_compute_mac () +void +sctp_compute_mac (sctp_connection_t * sctp_conn, + sctp_state_cookie_param_t * state_cookie) { - return 0x0; +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + HMAC_CTX *ctx; +#else + HMAC_CTX ctx; +#endif + unsigned int len = 0; + const EVP_MD *md = EVP_sha1 (); +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + ctx = HMAC_CTX_new (); + HMAC_Init_ex (ctx, &state_cookie->creation_time, + sizeof (state_cookie->creation_time), md, NULL); + HMAC_Update (ctx, (const unsigned char *) &sctp_conn, sizeof (sctp_conn)); + HMAC_Final (ctx, state_cookie->mac, &len); +#else + HMAC_CTX_init (&ctx); + HMAC_Init_ex (&ctx, &state_cookie->creation_time, + sizeof (state_cookie->creation_time), md, NULL); + HMAC_Update (&ctx, (const unsigned char *) &sctp_conn, sizeof (sctp_conn)); + HMAC_Final (&ctx, state_cookie->mac, &len); + HMAC_CTX_cleanup (&ctx); +#endif + + ENDIANESS_SWAP (state_cookie->mac); } void -sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) { vlib_main_t *vm = vlib_get_main (); - u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK); sctp_reuse_buffer (vm, b); @@ -515,24 +545,27 @@ sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) vlib_buffer_push_uninit (b, alloc_bytes); cookie_ack_chunk->sctp_hdr.checksum = 0; - cookie_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - cookie_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - cookie_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + cookie_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + cookie_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + cookie_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&cookie_ack_chunk->chunk_hdr, COOKIE_ACK); vnet_sctp_set_chunk_length (&cookie_ack_chunk->chunk_hdr, chunk_len); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; } void -sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, vlib_buffer_t * b, - sctp_state_cookie_param_t * sc) +sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 reuse_buffer) { vlib_main_t *vm = vlib_get_main (); - u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ECHO); - sctp_reuse_buffer (vm, b); + if (reuse_buffer) + sctp_reuse_buffer (vm, b); /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ u16 alloc_bytes = sizeof (sctp_cookie_echo_chunk_t); @@ -545,31 +578,164 @@ sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, vlib_buffer_t * b, vlib_buffer_push_uninit (b, alloc_bytes); cookie_echo_chunk->sctp_hdr.checksum = 0; cookie_echo_chunk->sctp_hdr.src_port = - tc->sub_conn[idx].connection.lcl_port; + sctp_conn->sub_conn[idx].connection.lcl_port; cookie_echo_chunk->sctp_hdr.dst_port = - tc->sub_conn[idx].connection.rmt_port; - cookie_echo_chunk->sctp_hdr.verification_tag = tc->remote_tag; + sctp_conn->sub_conn[idx].connection.rmt_port; + cookie_echo_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&cookie_echo_chunk->chunk_hdr, COOKIE_ECHO); vnet_sctp_set_chunk_length (&cookie_echo_chunk->chunk_hdr, chunk_len); - clib_memcpy (&(cookie_echo_chunk->cookie), sc, + clib_memcpy (&(cookie_echo_chunk->cookie), &sctp_conn->cookie_param, sizeof (sctp_state_cookie_param_t)); + + vnet_buffer (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; +} + + +/* + * Send COOKIE_ECHO + */ +void +sctp_send_cookie_echo (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (PREDICT_FALSE (sctp_conn->init_retransmit_err > SCTP_MAX_INIT_RETRANS)) + { + clib_warning ("Reached MAX_INIT_RETRANS times. Aborting connection."); + + session_stream_connect_notify (&sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection, 1); + + sctp_connection_timers_reset (sctp_conn); + + sctp_connection_cleanup (sctp_conn); + } + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + u8 idx = SCTP_PRIMARY_PATH_IDX; + + sctp_init_buffer (vm, b); + sctp_prepare_cookie_echo_chunk (sctp_conn, idx, b, 0); + sctp_enqueue_to_output_now (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4); + + /* Start the T1_INIT timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT, + sctp_conn->sub_conn[idx].RTO); + + /* Change state to COOKIE_WAIT */ + sctp_conn->state = SCTP_STATE_COOKIE_WAIT; + + /* Measure RTT with this */ + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); +} + + +/** + * Convert buffer to ERROR + */ +void +sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 err_cause) +{ + vlib_main_t *vm = vlib_get_main (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_operation_error_t */ + u16 alloc_bytes = + sizeof (sctp_operation_error_t) + sizeof (sctp_err_cause_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_operation_error_t *err_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + /* src_port & dst_port are already in network byte-order */ + err_chunk->sctp_hdr.checksum = 0; + err_chunk->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + err_chunk->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + /* As per RFC4960 Section 5.2.2: copy the INITIATE_TAG into the VERIFICATION_TAG of the ABORT chunk */ + err_chunk->sctp_hdr.verification_tag = sctp_conn->local_tag; + + err_chunk->err_causes[0].param_hdr.length = + clib_host_to_net_u16 (sizeof (err_chunk->err_causes[0].param_hdr.type) + + sizeof (err_chunk->err_causes[0].param_hdr.length)); + err_chunk->err_causes[0].param_hdr.type = clib_host_to_net_u16 (err_cause); + + vnet_sctp_set_chunk_type (&err_chunk->chunk_hdr, OPERATION_ERROR); + vnet_sctp_set_chunk_length (&err_chunk->chunk_hdr, chunk_len); + + vnet_buffer (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to ABORT + */ +void +sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr) +{ + vlib_main_t *vm = vlib_get_main (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_abort_chunk_t */ + u16 alloc_bytes = sizeof (sctp_abort_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_abort_chunk_t *abort_chunk = vlib_buffer_push_uninit (b, alloc_bytes); + + /* src_port & dst_port are already in network byte-order */ + abort_chunk->sctp_hdr.checksum = 0; + abort_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + abort_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* As per RFC4960 Section 5.2.2: copy the INITIATE_TAG into the VERIFICATION_TAG of the ABORT chunk */ + abort_chunk->sctp_hdr.verification_tag = sctp_conn->local_tag; + + vnet_sctp_set_chunk_type (&abort_chunk->chunk_hdr, ABORT); + vnet_sctp_set_chunk_length (&abort_chunk->chunk_hdr, chunk_len); + vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; } /** * Convert buffer to INIT-ACK */ void -sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, - ip4_address_t * ip4_addr, - ip6_address_t * ip6_addr) +sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn, + u8 idx, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr) { vlib_main_t *vm = vlib_get_main (); sctp_ipv4_addr_param_t *ip4_param = 0; sctp_ipv6_addr_param_t *ip6_param = 0; - u8 idx = sctp_pick_conn_idx_on_chunk (INIT_ACK); - u32 random_seed = random_default_seed (); sctp_reuse_buffer (vm, b); @@ -588,7 +754,147 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH; } - if (tc->sub_conn[idx].connection.is_ip4) + if (sctp_conn->sub_conn[idx].connection.is_ip4) + alloc_bytes += sizeof (sctp_ipv4_addr_param_t); + else + alloc_bytes += sizeof (sctp_ipv6_addr_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_init_ack_chunk_t *init_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + u16 pointer_offset = sizeof (sctp_init_ack_chunk_t); + + /* Create State Cookie parameter */ + sctp_state_cookie_param_t *state_cookie_param = + (sctp_state_cookie_param_t *) ((char *) init_ack_chunk + pointer_offset); + + state_cookie_param->param_hdr.type = + clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); + state_cookie_param->param_hdr.length = + clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t)); + state_cookie_param->creation_time = clib_host_to_net_u64 (sctp_time_now ()); + state_cookie_param->cookie_lifespan = + clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE); + + sctp_compute_mac (sctp_conn, state_cookie_param); + + pointer_offset += sizeof (sctp_state_cookie_param_t); + + if (PREDICT_TRUE (ip4_addr != NULL)) + { + sctp_ipv4_addr_param_t *ipv4_addr = + (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv4_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + ipv4_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE_LENGTH); + ipv4_addr->address.as_u32 = ip4_addr->as_u32; + + pointer_offset += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_TRUE (ip6_addr != NULL)) + { + sctp_ipv6_addr_param_t *ipv6_addr = + (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv6_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + ipv6_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE_LENGTH); + ipv6_addr->address.as_u64[0] = ip6_addr->as_u64[0]; + ipv6_addr->address.as_u64[1] = ip6_addr->as_u64[1]; + + pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (sctp_conn->sub_conn[idx].connection.is_ip4) + { + ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + ip4_param->address.as_u32 = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip4.as_u32; + + pointer_offset += sizeof (sctp_ipv4_addr_param_t); + } + else + { + ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + ip6_param->address.as_u64[0] = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; + ip6_param->address.as_u64[1] = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; + + pointer_offset += sizeof (sctp_ipv6_addr_param_t); + } + + /* src_port & dst_port are already in network byte-order */ + init_ack_chunk->sctp_hdr.checksum = 0; + init_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + init_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* the sctp_conn->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ + init_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + init_ack_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("init_ack_chunk->initial_tsn = %u", + init_ack_chunk->initial_tsn); + + vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK); + vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len); + + init_ack_chunk->initiate_tag = sctp_conn->local_tag; + + init_ack_chunk->a_rwnd = + clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); + init_ack_chunk->inboud_streams_count = + clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); + init_ack_chunk->outbound_streams_count = + clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + + vnet_buffer (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to INIT-ACK + */ +void +sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, ip4_address_t * ip4_addr, + u8 add_ip4, ip6_address_t * ip6_addr, u8 add_ip6) +{ + vlib_main_t *vm = vlib_get_main (); + sctp_ipv4_addr_param_t *ip4_param = 0; + sctp_ipv6_addr_param_t *ip6_param = 0; + u32 random_seed = random_default_seed (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ + u16 alloc_bytes = + sizeof (sctp_init_ack_chunk_t) + sizeof (sctp_state_cookie_param_t); + + if (PREDICT_FALSE (add_ip4 == 1)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_FALSE (add_ip6 == 1)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (sctp_conn->sub_conn[idx].connection.is_ip4) alloc_bytes += sizeof (sctp_ipv4_addr_param_t); else alloc_bytes += sizeof (sctp_ipv6_addr_param_t); @@ -613,10 +919,11 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); state_cookie_param->param_hdr.length = clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t)); - state_cookie_param->creation_time = clib_host_to_net_u32 (sctp_time_now ()); + state_cookie_param->creation_time = clib_host_to_net_u64 (sctp_time_now ()); state_cookie_param->cookie_lifespan = clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE); - state_cookie_param->mac = clib_host_to_net_u64 (sctp_compute_mac ()); + + sctp_compute_mac (sctp_conn, state_cookie_param); pointer_offset += sizeof (sctp_state_cookie_param_t); @@ -636,8 +943,7 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, if (PREDICT_TRUE (ip6_addr != NULL)) { sctp_ipv6_addr_param_t *ipv6_addr = - (sctp_ipv6_addr_param_t *) init_ack_chunk + - sizeof (sctp_init_chunk_t) + pointer_offset; + (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; ipv6_addr->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); @@ -649,11 +955,11 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH; } - if (tc->sub_conn[idx].connection.is_ip4) + if (sctp_conn->sub_conn[idx].connection.is_ip4) { ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; ip4_param->address.as_u32 = - tc->sub_conn[idx].connection.lcl_ip.ip4.as_u32; + sctp_conn->sub_conn[idx].connection.lcl_ip.ip4.as_u32; pointer_offset += sizeof (sctp_ipv4_addr_param_t); } @@ -661,51 +967,55 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, { ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; ip6_param->address.as_u64[0] = - tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; ip6_param->address.as_u64[1] = - tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; pointer_offset += sizeof (sctp_ipv6_addr_param_t); } /* src_port & dst_port are already in network byte-order */ init_ack_chunk->sctp_hdr.checksum = 0; - init_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - init_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - /* the tc->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ - init_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + init_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + init_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* the sctp_conn->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ + init_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + init_ack_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("init_ack_chunk->initial_tsn = %u", + init_ack_chunk->initial_tsn); vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK); vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len); init_ack_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); - /* As per RFC 4960, the initial_tsn may be the same value as the initiate_tag */ - init_ack_chunk->initial_tsn = init_ack_chunk->initiate_tag; - init_ack_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); + + init_ack_chunk->a_rwnd = + clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); init_ack_chunk->inboud_streams_count = clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); init_ack_chunk->outbound_streams_count = clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); - tc->local_tag = init_ack_chunk->initiate_tag; + sctp_conn->local_tag = init_ack_chunk->initiate_tag; vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; } /** * Convert buffer to SHUTDOWN */ void -sctp_prepare_shutdown_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_shutdown_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN); u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); - b = sctp_reuse_buffer (vm, b); - /* As per RFC 4960 the chunk_length value does NOT contemplate * the size of the first header (see sctp_header_t) and any padding */ @@ -718,51 +1028,55 @@ sctp_prepare_shutdown_chunk (sctp_connection_t * tc, vlib_buffer_t * b) shutdown_chunk->sctp_hdr.checksum = 0; /* No need of host_to_net conversion, already in net-byte order */ - shutdown_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - shutdown_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - shutdown_chunk->sctp_hdr.verification_tag = tc->remote_tag; + shutdown_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + shutdown_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&shutdown_chunk->chunk_hdr, SHUTDOWN); vnet_sctp_set_chunk_length (&shutdown_chunk->chunk_hdr, chunk_len); - shutdown_chunk->cumulative_tsn_ack = tc->rcv_las; + shutdown_chunk->cumulative_tsn_ack = sctp_conn->last_rcvd_tsn; vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; } /* * Send SHUTDOWN */ void -sctp_send_shutdown (sctp_connection_t * tc) +sctp_send_shutdown (sctp_connection_t * sctp_conn) { vlib_buffer_t *b; u32 bi; sctp_main_t *tm = vnet_get_sctp_main (); vlib_main_t *vm = vlib_get_main (); - if (sctp_check_outstanding_data_chunks (tc) > 0) + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) return; if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) return; + u8 idx = SCTP_PRIMARY_PATH_IDX; + b = vlib_get_buffer (vm, bi); sctp_init_buffer (vm, b); - sctp_prepare_shutdown_chunk (tc, b); + sctp_prepare_shutdown_chunk (sctp_conn, idx, b); - u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_output_now (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); } /** * Convert buffer to SHUTDOWN_ACK */ void -sctp_prepare_shutdown_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_shutdown_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) { - u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK); u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); @@ -774,56 +1088,44 @@ sctp_prepare_shutdown_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) shutdown_ack_chunk->sctp_hdr.checksum = 0; /* No need of host_to_net conversion, already in net-byte order */ shutdown_ack_chunk->sctp_hdr.src_port = - tc->sub_conn[idx].connection.lcl_port; + sctp_conn->sub_conn[idx].connection.lcl_port; shutdown_ack_chunk->sctp_hdr.dst_port = - tc->sub_conn[idx].connection.rmt_port; - shutdown_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&shutdown_ack_chunk->chunk_hdr, SHUTDOWN_ACK); vnet_sctp_set_chunk_length (&shutdown_ack_chunk->chunk_hdr, chunk_len); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; } /* * Send SHUTDOWN_ACK */ void -sctp_send_shutdown_ack (sctp_connection_t * tc) +sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) { - vlib_buffer_t *b; - u32 bi; - sctp_main_t *tm = vnet_get_sctp_main (); vlib_main_t *vm = vlib_get_main (); - if (sctp_check_outstanding_data_chunks (tc) > 0) + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) return; - if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) - return; - - b = vlib_get_buffer (vm, bi); - sctp_init_buffer (vm, b); - sctp_prepare_shutdown_ack_chunk (tc, b); - - u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + sctp_reuse_buffer (vm, b); - /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ - sctp_timer_set (tc, idx, SCTP_TIMER_T2_SHUTDOWN, SCTP_RTO_INIT); - tc->state = SCTP_STATE_SHUTDOWN_ACK_SENT; + sctp_prepare_shutdown_ack_chunk (sctp_conn, idx, b); } /** * Convert buffer to SACK */ void -sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) { vlib_main_t *vm = vlib_get_main (); - u8 idx = sctp_pick_conn_idx_on_chunk (SACK); sctp_reuse_buffer (vm, b); @@ -839,24 +1141,136 @@ sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) sctp_selective_ack_chunk_t *sack = vlib_buffer_push_uninit (b, alloc_bytes); sack->sctp_hdr.checksum = 0; - sack->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - sack->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - sack->sctp_hdr.verification_tag = tc->remote_tag; + sack->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + sack->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + sack->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&sack->chunk_hdr, SACK); vnet_sctp_set_chunk_length (&sack->chunk_hdr, chunk_len); + sack->cumulative_tsn_ack = sctp_conn->next_tsn_expected; + + sctp_conn->ack_state = 0; + + vnet_buffer (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to HEARTBEAT_ACK + */ +void +sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + + u16 alloc_bytes = sizeof (sctp_hb_ack_chunk_t); + + sctp_reuse_buffer (vm, b); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_hb_ack_chunk_t *hb_ack = vlib_buffer_push_uninit (b, alloc_bytes); + + hb_ack->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + hb_ack->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + hb_ack->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + hb_ack->sctp_hdr.verification_tag = sctp_conn->remote_tag; + hb_ack->hb_info.param_hdr.type = clib_host_to_net_u16 (1); + hb_ack->hb_info.param_hdr.length = + clib_host_to_net_u16 (sizeof (hb_ack->hb_info.hb_info)); + + vnet_sctp_set_chunk_type (&hb_ack->chunk_hdr, HEARTBEAT_ACK); + vnet_sctp_set_chunk_length (&hb_ack->chunk_hdr, chunk_len); + + vnet_buffer (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to HEARTBEAT + */ +void +sctp_prepare_heartbeat_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + u16 alloc_bytes = sizeof (sctp_hb_req_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_hb_req_chunk_t *hb_req = vlib_buffer_push_uninit (b, alloc_bytes); + + hb_req->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + hb_req->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + hb_req->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + hb_req->sctp_hdr.verification_tag = sctp_conn->remote_tag; + hb_req->hb_info.param_hdr.type = clib_host_to_net_u16 (1); + hb_req->hb_info.param_hdr.length = + clib_host_to_net_u16 (sizeof (hb_req->hb_info.hb_info)); + + vnet_sctp_set_chunk_type (&hb_req->chunk_hdr, HEARTBEAT); + vnet_sctp_set_chunk_length (&hb_req->chunk_hdr, chunk_len); + vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; +} + +void +sctp_send_heartbeat (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + u8 i; + u64 now = sctp_time_now (); + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (now > (sctp_conn->sub_conn[i].last_seen + SCTP_HB_INTERVAL)) + { + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_heartbeat_chunk (sctp_conn, i, b); + + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[i]. + connection.is_ip4); + + sctp_conn->sub_conn[i].unacknowledged_hb += 1; + } + } } /** * Convert buffer to SHUTDOWN_COMPLETE */ void -sctp_prepare_shutdown_complete_chunk (sctp_connection_t * tc, +sctp_prepare_shutdown_complete_chunk (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b) { - u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE); u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); @@ -868,92 +1282,92 @@ sctp_prepare_shutdown_complete_chunk (sctp_connection_t * tc, shutdown_complete->sctp_hdr.checksum = 0; /* No need of host_to_net conversion, already in net-byte order */ shutdown_complete->sctp_hdr.src_port = - tc->sub_conn[idx].connection.lcl_port; + sctp_conn->sub_conn[idx].connection.lcl_port; shutdown_complete->sctp_hdr.dst_port = - tc->sub_conn[idx].connection.rmt_port; - shutdown_complete->sctp_hdr.verification_tag = tc->remote_tag; + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_complete->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&shutdown_complete->chunk_hdr, SHUTDOWN_COMPLETE); vnet_sctp_set_chunk_length (&shutdown_complete->chunk_hdr, chunk_len); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + vnet_buffer (b)->sctp.subconn_idx = idx; } void -sctp_send_shutdown_complete (sctp_connection_t * tc) +sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0) { - vlib_buffer_t *b; - u32 bi; - sctp_main_t *tm = vnet_get_sctp_main (); vlib_main_t *vm = vlib_get_main (); - if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) return; - b = vlib_get_buffer (vm, bi); - sctp_init_buffer (vm, b); - sctp_prepare_shutdown_complete_chunk (tc, b); + sctp_reuse_buffer (vm, b0); - u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); - - tc->state = SCTP_STATE_CLOSED; + sctp_prepare_shutdown_complete_chunk (sctp_conn, idx, b0); } - /* * Send INIT */ void -sctp_send_init (sctp_connection_t * tc) +sctp_send_init (sctp_connection_t * sctp_conn) { vlib_buffer_t *b; u32 bi; sctp_main_t *tm = vnet_get_sctp_main (); vlib_main_t *vm = vlib_get_main (); + if (PREDICT_FALSE (sctp_conn->init_retransmit_err > SCTP_MAX_INIT_RETRANS)) + { + clib_warning ("Reached MAX_INIT_RETRANS times. Aborting connection."); + + session_stream_connect_notify (&sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection, 1); + + sctp_connection_timers_reset (sctp_conn); + + sctp_connection_cleanup (sctp_conn); + + return; + } + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) return; b = vlib_get_buffer (vm, bi); - u8 idx = sctp_pick_conn_idx_on_chunk (INIT); + u8 idx = SCTP_PRIMARY_PATH_IDX; sctp_init_buffer (vm, b); - sctp_prepare_init_chunk (tc, b); - - /* Measure RTT with this */ - tc->rtt_ts = sctp_time_now (); - tc->rtt_seq = tc->snd_nxt; - tc->rto_boff = 0; + sctp_prepare_init_chunk (sctp_conn, idx, b); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_ip_lookup_now (vm, b, bi, tc->sub_conn[idx].c_is_ip4); + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4, + sctp_conn->sub_conn[idx].c_fib_index); /* Start the T1_INIT timer */ - sctp_timer_set (tc, idx, SCTP_TIMER_T1_INIT, SCTP_RTO_INIT); + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT, + sctp_conn->sub_conn[idx].RTO); + /* Change state to COOKIE_WAIT */ - tc->state = SCTP_STATE_COOKIE_WAIT; -} + sctp_conn->state = SCTP_STATE_COOKIE_WAIT; -always_inline u8 -sctp_in_cong_recovery (sctp_connection_t * sctp_conn) -{ - return 0; + /* Measure RTT with this */ + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); } /** * Push SCTP header and update connection variables */ static void -sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b, +sctp_push_hdr_i (sctp_connection_t * sctp_conn, vlib_buffer_t * b, sctp_state_t next_state) { - u8 idx = sctp_pick_conn_idx_on_chunk (DATA); - u16 data_len = b->current_length + b->total_length_not_including_first_buffer; + ASSERT (!b->total_length_not_including_first_buffer || (b->flags & VLIB_BUFFER_NEXT_PRESENT)); @@ -962,48 +1376,221 @@ sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b, "data_len = %u", b->current_length, b->current_data, data_len); + u16 data_padding = vnet_sctp_calculate_padding (b->current_length); + if (data_padding > 0) + { + u8 *p_tail = vlib_buffer_put_uninit (b, data_padding); + clib_memset_u8 (p_tail, 0, data_padding); + } + u16 bytes_to_add = sizeof (sctp_payload_data_chunk_t); u16 chunk_length = data_len + bytes_to_add - sizeof (sctp_header_t); - bytes_to_add += vnet_sctp_calculate_padding (bytes_to_add + data_len); - sctp_payload_data_chunk_t *data_chunk = vlib_buffer_push_uninit (b, bytes_to_add); + u8 idx = sctp_data_subconn_select (sctp_conn); + SCTP_DBG_OUTPUT + ("SCTP_CONN = %p, IDX = %u, S_INDEX = %u, C_INDEX = %u, sctp_conn->[...].LCL_PORT = %u, sctp_conn->[...].RMT_PORT = %u", + sctp_conn, idx, sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->sub_conn[idx].connection.lcl_port, + sctp_conn->sub_conn[idx].connection.rmt_port); data_chunk->sctp_hdr.checksum = 0; - data_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - data_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - data_chunk->sctp_hdr.verification_tag = tc->remote_tag; + data_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + data_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + data_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; - data_chunk->tsn = clib_host_to_net_u32 (0); + data_chunk->tsn = clib_host_to_net_u32 (sctp_conn->next_tsn); data_chunk->stream_id = clib_host_to_net_u16 (0); data_chunk->stream_seq = clib_host_to_net_u16 (0); vnet_sctp_set_chunk_type (&data_chunk->chunk_hdr, DATA); vnet_sctp_set_chunk_length (&data_chunk->chunk_hdr, chunk_length); + vnet_sctp_set_bbit (&data_chunk->chunk_hdr); + vnet_sctp_set_ebit (&data_chunk->chunk_hdr); + SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u", b->data, b->current_data); + if (sctp_conn->sub_conn[idx].state != SCTP_SUBCONN_AWAITING_SACK) + { + sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_AWAITING_SACK; + sctp_conn->last_unacked_tsn = sctp_conn->next_tsn; + } + + sctp_conn->next_tsn += data_len; + + u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn; + /* Section 7.2.2; point (3) */ + if (sctp_conn->sub_conn[idx].partially_acked_bytes >= + sctp_conn->sub_conn[idx].cwnd + && inflight >= sctp_conn->sub_conn[idx].cwnd) + { + sctp_conn->sub_conn[idx].cwnd += sctp_conn->sub_conn[idx].PMTU; + sctp_conn->sub_conn[idx].partially_acked_bytes -= + sctp_conn->sub_conn[idx].cwnd; + } + + sctp_conn->sub_conn[idx].last_data_ts = sctp_time_now (); + vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; + + vnet_buffer (b)->sctp.subconn_idx = idx; +} + +u32 +sctp_push_header (transport_connection_t * trans_conn, vlib_buffer_t * b) +{ + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); + + SCTP_DBG_OUTPUT ("TRANS_CONN = %p, SCTP_CONN = %p, " + "S_INDEX = %u, C_INDEX = %u," + "trans_conn->LCL_PORT = %u, trans_conn->RMT_PORT = %u", + trans_conn, + sctp_conn, + trans_conn->s_index, + trans_conn->c_index, + trans_conn->lcl_port, trans_conn->rmt_port); + + sctp_push_hdr_i (sctp_conn, b, SCTP_STATE_ESTABLISHED); + + sctp_trajectory_add_start (b, 3); + + return 0; } u32 -sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +sctp_prepare_data_retransmit (sctp_connection_t * sctp_conn, + u8 idx, + u32 offset, + u32 max_deq_bytes, vlib_buffer_t ** b) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + int n_bytes = 0; + u32 bi, available_bytes, seg_size; + u8 *data; + + ASSERT (sctp_conn->state >= SCTP_STATE_ESTABLISHED); + ASSERT (max_deq_bytes != 0); + + /* + * Make sure we can retransmit something + */ + available_bytes = + session_tx_fifo_max_dequeue (&sctp_conn->sub_conn[idx].connection); + ASSERT (available_bytes >= offset); + available_bytes -= offset; + if (!available_bytes) + return 0; + max_deq_bytes = clib_min (sctp_conn->sub_conn[idx].cwnd, max_deq_bytes); + max_deq_bytes = clib_min (available_bytes, max_deq_bytes); + + seg_size = max_deq_bytes; + + /* + * Allocate and fill in buffer(s) + */ + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return 0; + *b = vlib_get_buffer (vm, bi); + data = sctp_init_buffer (vm, *b); + + /* Easy case, buffer size greater than mss */ + if (PREDICT_TRUE (seg_size <= tm->bytes_per_buffer)) + { + n_bytes = + stream_session_peek_bytes (&sctp_conn->sub_conn[idx].connection, data, + offset, max_deq_bytes); + ASSERT (n_bytes == max_deq_bytes); + b[0]->current_length = n_bytes; + sctp_push_hdr_i (sctp_conn, *b, sctp_conn->state); + } + + return n_bytes; +} + +void +sctp_data_retransmit (sctp_connection_t * sctp_conn) { - sctp_connection_t *tc = sctp_get_connection_from_transport (tconn); - sctp_push_hdr_i (tc, b, SCTP_STATE_ESTABLISHED); + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_t *b = 0; + u32 bi, n_bytes = 0; + + u8 idx = sctp_data_subconn_select (sctp_conn); - if (tc->rtt_ts == 0 && !sctp_in_cong_recovery (tc)) + SCTP_DBG_OUTPUT + ("SCTP_CONN = %p, IDX = %u, S_INDEX = %u, C_INDEX = %u, sctp_conn->[...].LCL_PORT = %u, sctp_conn->[...].RMT_PORT = %u", + sctp_conn, idx, sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->sub_conn[idx].connection.lcl_port, + sctp_conn->sub_conn[idx].connection.rmt_port); + + if (sctp_conn->state >= SCTP_STATE_ESTABLISHED) { - tc->rtt_ts = sctp_time_now (); - tc->rtt_seq = tc->snd_nxt; + return; } - sctp_trajectory_add_start (b0, 3); - return 0; + n_bytes = + sctp_prepare_data_retransmit (sctp_conn, idx, 0, + sctp_conn->sub_conn[idx].cwnd, &b); + if (n_bytes > 0) + SCTP_DBG_OUTPUT ("We have data (%u bytes) to retransmit", n_bytes); + + bi = vlib_get_buffer_index (vm, b); + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); + + return; +} + +#if SCTP_DEBUG_STATE_MACHINE +always_inline u8 +sctp_validate_output_state_machine (sctp_connection_t * sctp_conn, + u8 chunk_type) +{ + u8 result = 0; + switch (sctp_conn->state) + { + case SCTP_STATE_CLOSED: + if (chunk_type != INIT && chunk_type != INIT_ACK) + result = 1; + break; + case SCTP_STATE_ESTABLISHED: + if (chunk_type != DATA && chunk_type != HEARTBEAT && + chunk_type != HEARTBEAT_ACK && chunk_type != SACK && + chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN) + result = 1; + break; + case SCTP_STATE_COOKIE_WAIT: + if (chunk_type != COOKIE_ECHO) + result = 1; + break; + case SCTP_STATE_SHUTDOWN_SENT: + if (chunk_type != SHUTDOWN_COMPLETE) + result = 1; + break; + case SCTP_STATE_SHUTDOWN_RECEIVED: + if (chunk_type != SHUTDOWN_ACK) + result = 1; + break; + } + return result; +} +#endif + +always_inline u8 +sctp_is_retransmitting (sctp_connection_t * sctp_conn, u8 idx) +{ + return sctp_conn->sub_conn[idx].is_retransmitting; } always_inline uword @@ -1030,7 +1617,7 @@ sctp46_output_inline (vlib_main_t * vm, u32 bi0; vlib_buffer_t *b0; sctp_header_t *sctp_hdr = 0; - sctp_connection_t *tc0; + sctp_connection_t *sctp_conn; sctp_tx_trace_t *t0; sctp_header_t *th0 = 0; u32 error0 = SCTP_ERROR_PKTS_SENT, next0 = @@ -1048,85 +1635,97 @@ sctp46_output_inline (vlib_main_t * vm, n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); - tc0 = sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, - my_thread_index); - if (PREDICT_FALSE (tc0 == 0)) + sctp_conn = + sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, + my_thread_index); + + if (PREDICT_FALSE (sctp_conn == 0)) { error0 = SCTP_ERROR_INVALID_CONNECTION; next0 = SCTP_OUTPUT_NEXT_DROP; goto done; } - u8 idx = sctp_pick_conn_idx_on_state (tc0->state); + u8 idx = vnet_buffer (b0)->sctp.subconn_idx; th0 = vlib_buffer_get_current (b0); if (is_ip4) { - ip4_header_t *th0 = vlib_buffer_push_ip4 (vm, - b0, - &tc0->sub_conn - [idx].connection. - lcl_ip.ip4, - &tc0-> - sub_conn - [idx].connection. - rmt_ip.ip4, - IP_PROTOCOL_SCTP, 1); - - u32 checksum = ip4_sctp_compute_checksum (vm, b0, th0); - - sctp_hdr = ip4_next_header (th0); + ip4_header_t *iph4 = vlib_buffer_push_ip4 (vm, + b0, + &sctp_conn->sub_conn + [idx].connection. + lcl_ip.ip4, + &sctp_conn-> + sub_conn + [idx].connection. + rmt_ip.ip4, + IP_PROTOCOL_SCTP, 1); + + u32 checksum = ip4_sctp_compute_checksum (vm, b0, iph4); + + sctp_hdr = ip4_next_header (iph4); sctp_hdr->checksum = checksum; vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; #if SCTP_DEBUG_STATE_MACHINE - packet_length = clib_net_to_host_u16 (th0->length); + packet_length = clib_net_to_host_u16 (iph4->length); #endif } else { - ip6_header_t *ih0; - ih0 = vlib_buffer_push_ip6 (vm, - b0, - &tc0->sub_conn[idx]. - connection.lcl_ip.ip6, - &tc0->sub_conn[idx]. - connection.rmt_ip.ip6, - IP_PROTOCOL_SCTP); + ip6_header_t *iph6 = vlib_buffer_push_ip6 (vm, + b0, + &sctp_conn->sub_conn + [idx]. + connection.lcl_ip. + ip6, + &sctp_conn->sub_conn + [idx]. + connection.rmt_ip. + ip6, + IP_PROTOCOL_SCTP); int bogus = ~0; - u32 checksum = ip6_sctp_compute_checksum (vm, b0, ih0, &bogus); + u32 checksum = ip6_sctp_compute_checksum (vm, b0, iph6, &bogus); ASSERT (!bogus); - sctp_hdr = ip6_next_header (ih0); + sctp_hdr = ip6_next_header (iph6); sctp_hdr->checksum = checksum; - vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; + vnet_buffer (b0)->l3_hdr_offset = (u8 *) iph6 - b0->data; vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; #if SCTP_DEBUG_STATE_MACHINE - packet_length = clib_net_to_host_u16 (ih0->payload_length); + packet_length = clib_net_to_host_u16 (iph6->payload_length); #endif } + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + if (chunk_type >= UNKNOWN) + { + clib_warning + ("Trying to send an unrecognized chunk... something is really bad."); + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + +#if SCTP_DEBUG_STATE_MACHINE u8 is_valid = - (tc0->sub_conn[idx].connection.lcl_port == + (sctp_conn->sub_conn[idx].connection.lcl_port == sctp_hdr->src_port - || tc0->sub_conn[idx].connection.lcl_port == + || sctp_conn->sub_conn[idx].connection.lcl_port == sctp_hdr->dst_port) - && (tc0->sub_conn[idx].connection.rmt_port == + && (sctp_conn->sub_conn[idx].connection.rmt_port == sctp_hdr->dst_port - || tc0->sub_conn[idx].connection.rmt_port == + || sctp_conn->sub_conn[idx].connection.rmt_port == sctp_hdr->src_port); - sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; - u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); - if (!is_valid) { SCTP_DBG_STATE_MACHINE ("BUFFER IS INCORRECT: conn_index = %u, " @@ -1134,106 +1733,113 @@ sctp46_output_inline (vlib_main_t * vm, "chunk_type = %u [%s], " "connection.lcl_port = %u, sctp_hdr->src_port = %u, " "connection.rmt_port = %u, sctp_hdr->dst_port = %u", - tc0->sub_conn - [idx].connection.c_index, packet_length, + sctp_conn->sub_conn[idx]. + connection.c_index, packet_length, chunk_type, sctp_chunk_to_string (chunk_type), - tc0->sub_conn[idx].connection.lcl_port, - sctp_hdr->src_port, - tc0->sub_conn[idx].connection.rmt_port, + sctp_conn->sub_conn[idx]. + connection.lcl_port, sctp_hdr->src_port, + sctp_conn->sub_conn[idx]. + connection.rmt_port, sctp_hdr->dst_port); - error0 = SCTP_ERROR_UNKOWN_CHUNK; + error0 = SCTP_ERROR_UNKNOWN_CHUNK; next0 = SCTP_OUTPUT_NEXT_DROP; goto done; } - +#endif SCTP_DBG_STATE_MACHINE - ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " + ("SESSION_INDEX = %u, CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u", - tc0->sub_conn[idx].connection.c_index, - tc0->state, sctp_state_to_string (tc0->state), + sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->state, sctp_state_to_string (sctp_conn->state), sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port, full_hdr->hdr.dst_port); - if (chunk_type == DATA) - SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length); - /* Let's make sure the state-machine does not send anything crazy */ - switch (tc0->state) +#if SCTP_DEBUG_STATE_MACHINE + if (sctp_validate_output_state_machine (sctp_conn, chunk_type) != 0) { - case SCTP_STATE_CLOSED: - { - if (chunk_type != INIT && chunk_type != INIT_ACK) - { - SCTP_DBG_STATE_MACHINE - ("Sending the wrong chunk (%s) based on state-machine status (%s)", - sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); - - error0 = SCTP_ERROR_UNKOWN_CHUNK; - next0 = SCTP_OUTPUT_NEXT_DROP; - goto done; - } - break; - } - case SCTP_STATE_ESTABLISHED: - if (chunk_type != DATA && chunk_type != HEARTBEAT && - chunk_type != HEARTBEAT_ACK && chunk_type != SACK && - chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN) - { - SCTP_DBG_STATE_MACHINE - ("Sending the wrong chunk (%s) based on state-machine status (%s)", - sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); - - error0 = SCTP_ERROR_UNKOWN_CHUNK; - next0 = SCTP_OUTPUT_NEXT_DROP; - goto done; - } - break; - case SCTP_STATE_COOKIE_WAIT: - if (chunk_type != COOKIE_ECHO) - { - SCTP_DBG_STATE_MACHINE - ("Sending the wrong chunk (%s) based on state-machine status (%s)", - sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); - - error0 = SCTP_ERROR_UNKOWN_CHUNK; - next0 = SCTP_OUTPUT_NEXT_DROP; - goto done; - } - /* Change state */ - tc0->state = SCTP_STATE_COOKIE_ECHOED; - break; - default: SCTP_DBG_STATE_MACHINE - ("Sending chunk (%s) based on state-machine status (%s)", + ("Sending the wrong chunk (%s) based on state-machine status (%s)", sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); - break; + sctp_state_to_string (sctp_conn->state)); + + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } +#endif - if (chunk_type == SHUTDOWN) + /* Karn's algorithm: RTT measurements MUST NOT be made using + * packets that were retransmitted + */ + if (!sctp_is_retransmitting (sctp_conn, idx)) { - /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ - sctp_timer_set (tc0, idx, SCTP_TIMER_T2_SHUTDOWN, - SCTP_RTO_INIT); - tc0->state = SCTP_STATE_SHUTDOWN_SENT; + /* Measure RTT with this */ + if (chunk_type == DATA + && sctp_conn->sub_conn[idx].RTO_pending == 0) + { + sctp_conn->sub_conn[idx].RTO_pending = 1; + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); + } + else + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); + } + + /* Let's take care of TIMERS */ + switch (chunk_type) + { + case COOKIE_ECHO: + { + sctp_conn->state = SCTP_STATE_COOKIE_ECHOED; + break; + } + case DATA: + { + SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length); + + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, + sctp_conn->sub_conn[idx].RTO); + break; + } + case SHUTDOWN: + { + /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN, + sctp_conn->sub_conn[idx].RTO); + sctp_conn->state = SCTP_STATE_SHUTDOWN_SENT; + break; + } + case SHUTDOWN_ACK: + { + /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN, + sctp_conn->sub_conn[idx].RTO); + sctp_conn->state = SCTP_STATE_SHUTDOWN_ACK_SENT; + break; + } + case SHUTDOWN_COMPLETE: + { + sctp_conn->state = SCTP_STATE_CLOSED; + break; + } } vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + sctp_conn->sub_conn[idx].c_fib_index; b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; - SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u, " - "NEW_STATE = %s, " - "CHUNK_SENT = %s", - tc0->sub_conn[idx].connection.c_index, - sctp_state_to_string (tc0->state), - sctp_chunk_to_string (chunk_type)); + SCTP_DBG_STATE_MACHINE + ("SESSION_INDEX = %u, CONNECTION_INDEX = %u, " "NEW_STATE = %s, " + "CHUNK_SENT = %s", sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (chunk_type)); vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr); @@ -1249,9 +1855,9 @@ sctp46_output_inline (vlib_main_t * vm, } else { - memset (&t0->sctp_header, 0, sizeof (t0->sctp_header)); + clib_memset (&t0->sctp_header, 0, sizeof (t0->sctp_header)); } - clib_memcpy (&t0->sctp_connection, tc0, + clib_memcpy (&t0->sctp_connection, sctp_conn, sizeof (t0->sctp_connection)); }