tcp: timestamp adjustment
[vpp.git] / src / vnet / sctp / sctp_output.c
index ef5d3b7..3c2099a 100644 (file)
 #include <vppinfra/random.h>
 #include <openssl/hmac.h>
 
-vlib_node_registration_t sctp4_output_node;
-vlib_node_registration_t sctp6_output_node;
-
-typedef enum _sctp_output_next
-{
-  SCTP_OUTPUT_NEXT_DROP,
-  SCTP_OUTPUT_NEXT_IP_LOOKUP,
-  SCTP_OUTPUT_N_NEXT
-} sctp_output_next_t;
-
-#define foreach_sctp4_output_next                      \
-  _ (DROP, "error-drop")                        \
-  _ (IP_LOOKUP, "ip4-lookup")
-
-#define foreach_sctp6_output_next                      \
-  _ (DROP, "error-drop")                        \
-  _ (IP_LOOKUP, "ip6-lookup")
-
-static char *sctp_error_strings[] = {
-#define sctp_error(n,s) s,
-#include <vnet/sctp/sctp_error.def>
-#undef sctp_error
-};
-
-typedef struct
-{
-  sctp_header_t sctp_header;
-  sctp_connection_t sctp_connection;
-} sctp_tx_trace_t;
-
 /**
  * Flush tx frame populated by retransmits and timer pops
  */
@@ -261,22 +231,23 @@ sctp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
   b->current_length = 0;
   b->total_length_not_including_first_buffer = 0;
   vnet_buffer (b)->sctp.flags = 0;
+  vnet_buffer (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS;
 
   /* Leave enough space for headers */
-  return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+  return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
 }
 
 always_inline void *
 sctp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
 {
   ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
-  b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK;
   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
   b->total_length_not_including_first_buffer = 0;
   vnet_buffer (b)->sctp.flags = 0;
+  vnet_buffer (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS;
   VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
   /* Leave enough space for headers */
-  return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+  return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
 }
 
 always_inline int
@@ -351,13 +322,6 @@ sctp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
     }
 }
 
-always_inline void
-sctp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
-                       u8 is_ip4)
-{
-  sctp_enqueue_to_output_i (vm, b, bi, is_ip4, 0);
-}
-
 always_inline void
 sctp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
                            u8 is_ip4)
@@ -367,7 +331,7 @@ sctp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
 
 always_inline void
 sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
-                            u8 is_ip4, u8 flush)
+                            u8 is_ip4, u32 fib_index, u8 flush)
 {
   sctp_main_t *tm = vnet_get_sctp_main ();
   u32 thread_index = vlib_get_thread_index ();
@@ -377,8 +341,8 @@ sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
   b->error = 0;
 
-  /* Default FIB for now */
-  vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+  vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index;
+  vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
 
   /* Send to IP lookup */
   next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
@@ -408,21 +372,23 @@ sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
 
 always_inline void
 sctp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
-                          u8 is_ip4)
+                          u8 is_ip4, u32 fib_index)
 {
-  sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0);
+  sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 0);
+  if (vm->thread_index == 0 && vlib_num_workers ())
+    session_flush_frames_main_thread (vm);
 }
 
 /**
  * Convert buffer to INIT
  */
 void
-sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
+sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, u8 idx,
+                        vlib_buffer_t * b)
 {
   u32 random_seed = random_default_seed ();
   u16 alloc_bytes = sizeof (sctp_init_chunk_t);
-  sctp_sub_connection_t *sub_conn =
-    &sctp_conn->sub_conn[sctp_pick_conn_idx_on_chunk (INIT)];
+  sctp_sub_connection_t *sub_conn = &sctp_conn->sub_conn[idx];
 
   sctp_ipv4_addr_param_t *ip4_param = 0;
   sctp_ipv6_addr_param_t *ip6_param = 0;
@@ -468,7 +434,9 @@ sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
   vnet_sctp_set_chunk_length (&init_chunk->chunk_hdr, chunk_len);
   vnet_sctp_common_hdr_params_host_to_net (&init_chunk->chunk_hdr);
 
-  init_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND);
+  sctp_init_cwnd (sctp_conn);
+
+  init_chunk->a_rwnd = clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd);
   init_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed));
   init_chunk->inboud_streams_count =
     clib_host_to_net_u16 (INBOUND_STREAMS_COUNT);
@@ -483,6 +451,7 @@ sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
   sctp_conn->local_tag = init_chunk->initiate_tag;
 
   vnet_buffer (b)->sctp.connection_index = sub_conn->c_c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 
   SCTP_DBG_STATE_MACHINE ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), "
                          "CHUNK_TYPE = %s, "
@@ -503,13 +472,12 @@ sctp_compute_mac (sctp_connection_t * sctp_conn,
   HMAC_CTX *ctx;
 #else
   HMAC_CTX ctx;
-  const EVP_MD *md = EVP_sha1 ();
 #endif
   unsigned int len = 0;
-
+  const EVP_MD *md = EVP_sha1 ();
 #if OPENSSL_VERSION_NUMBER >= 0x10100000L
   ctx = HMAC_CTX_new ();
-  HMAC_Init_ex (&ctx, &state_cookie->creation_time,
+  HMAC_Init_ex (ctx, &state_cookie->creation_time,
                sizeof (state_cookie->creation_time), md, NULL);
   HMAC_Update (ctx, (const unsigned char *) &sctp_conn, sizeof (sctp_conn));
   HMAC_Final (ctx, state_cookie->mac, &len);
@@ -517,7 +485,6 @@ sctp_compute_mac (sctp_connection_t * sctp_conn,
   HMAC_CTX_init (&ctx);
   HMAC_Init_ex (&ctx, &state_cookie->creation_time,
                sizeof (state_cookie->creation_time), md, NULL);
-
   HMAC_Update (&ctx, (const unsigned char *) &sctp_conn, sizeof (sctp_conn));
   HMAC_Final (&ctx, state_cookie->mac, &len);
   HMAC_CTX_cleanup (&ctx);
@@ -527,11 +494,10 @@ sctp_compute_mac (sctp_connection_t * sctp_conn,
 }
 
 void
-sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn,
+sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
                               vlib_buffer_t * b)
 {
   vlib_main_t *vm = vlib_get_main ();
-  u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK);
 
   sctp_reuse_buffer (vm, b);
 
@@ -556,22 +522,19 @@ sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn,
   vnet_sctp_set_chunk_type (&cookie_ack_chunk->chunk_hdr, COOKIE_ACK);
   vnet_sctp_set_chunk_length (&cookie_ack_chunk->chunk_hdr, chunk_len);
 
-  /* Measure RTT with this */
-  sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
-
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 void
-sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn,
-                               vlib_buffer_t * b,
-                               sctp_state_cookie_param_t * sc)
+sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
+                               vlib_buffer_t * b, u8 reuse_buffer)
 {
   vlib_main_t *vm = vlib_get_main ();
-  u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ECHO);
 
-  sctp_reuse_buffer (vm, b);
+  if (reuse_buffer)
+    sctp_reuse_buffer (vm, b);
 
   /* The minimum size of the message is given by the sctp_init_ack_chunk_t */
   u16 alloc_bytes = sizeof (sctp_cookie_echo_chunk_t);
@@ -590,29 +553,158 @@ sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn,
   cookie_echo_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag;
   vnet_sctp_set_chunk_type (&cookie_echo_chunk->chunk_hdr, COOKIE_ECHO);
   vnet_sctp_set_chunk_length (&cookie_echo_chunk->chunk_hdr, chunk_len);
-  clib_memcpy (&(cookie_echo_chunk->cookie), sc,
-              sizeof (sctp_state_cookie_param_t));
+  clib_memcpy_fast (&(cookie_echo_chunk->cookie), &sctp_conn->cookie_param,
+                   sizeof (sctp_state_cookie_param_t));
+
+  vnet_buffer (b)->sctp.connection_index =
+    sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
+}
+
+
+/*
+ *  Send COOKIE_ECHO
+ */
+void
+sctp_send_cookie_echo (sctp_connection_t * sctp_conn)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+
+  if (PREDICT_FALSE (sctp_conn->init_retransmit_err > SCTP_MAX_INIT_RETRANS))
+    {
+      clib_warning ("Reached MAX_INIT_RETRANS times. Aborting connection.");
+
+      session_stream_connect_notify (&sctp_conn->sub_conn
+                                    [SCTP_PRIMARY_PATH_IDX].connection, 1);
+
+      sctp_connection_timers_reset (sctp_conn);
+
+      sctp_connection_cleanup (sctp_conn);
+    }
+
+  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+    return;
+
+  b = vlib_get_buffer (vm, bi);
+  u8 idx = SCTP_PRIMARY_PATH_IDX;
+
+  sctp_init_buffer (vm, b);
+  sctp_prepare_cookie_echo_chunk (sctp_conn, idx, b, 0);
+  sctp_enqueue_to_output_now (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4);
+
+  /* Start the T1_INIT timer */
+  sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT,
+                 sctp_conn->sub_conn[idx].RTO);
+
+  /* Change state to COOKIE_WAIT */
+  sctp_conn->state = SCTP_STATE_COOKIE_WAIT;
 
   /* Measure RTT with this */
   sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
+}
+
+
+/**
+ * Convert buffer to ERROR
+ */
+void
+sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx,
+                             vlib_buffer_t * b, u8 err_cause)
+{
+  vlib_main_t *vm = vlib_get_main ();
+
+  sctp_reuse_buffer (vm, b);
+
+  /* The minimum size of the message is given by the sctp_operation_error_t */
+  u16 alloc_bytes =
+    sizeof (sctp_operation_error_t) + sizeof (sctp_err_cause_param_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_operation_error_t *err_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  /* src_port & dst_port are already in network byte-order */
+  err_chunk->sctp_hdr.checksum = 0;
+  err_chunk->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port;
+  err_chunk->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port;
+  /* As per RFC4960 Section 5.2.2: copy the INITIATE_TAG into the VERIFICATION_TAG of the ABORT chunk */
+  err_chunk->sctp_hdr.verification_tag = sctp_conn->local_tag;
+
+  err_chunk->err_causes[0].param_hdr.length =
+    clib_host_to_net_u16 (sizeof (err_chunk->err_causes[0].param_hdr.type) +
+                         sizeof (err_chunk->err_causes[0].param_hdr.length));
+  err_chunk->err_causes[0].param_hdr.type = clib_host_to_net_u16 (err_cause);
+
+  vnet_sctp_set_chunk_type (&err_chunk->chunk_hdr, OPERATION_ERROR);
+  vnet_sctp_set_chunk_length (&err_chunk->chunk_hdr, chunk_len);
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
+}
+
+/**
+ * Convert buffer to ABORT
+ */
+void
+sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx,
+                                 vlib_buffer_t * b, ip4_address_t * ip4_addr,
+                                 ip6_address_t * ip6_addr)
+{
+  vlib_main_t *vm = vlib_get_main ();
+
+  sctp_reuse_buffer (vm, b);
+
+  /* The minimum size of the message is given by the sctp_abort_chunk_t */
+  u16 alloc_bytes = sizeof (sctp_abort_chunk_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_abort_chunk_t *abort_chunk = vlib_buffer_push_uninit (b, alloc_bytes);
+
+  /* src_port & dst_port are already in network byte-order */
+  abort_chunk->sctp_hdr.checksum = 0;
+  abort_chunk->sctp_hdr.src_port =
+    sctp_conn->sub_conn[idx].connection.lcl_port;
+  abort_chunk->sctp_hdr.dst_port =
+    sctp_conn->sub_conn[idx].connection.rmt_port;
+  /* As per RFC4960 Section 5.2.2: copy the INITIATE_TAG into the VERIFICATION_TAG of the ABORT chunk */
+  abort_chunk->sctp_hdr.verification_tag = sctp_conn->local_tag;
+
+  vnet_sctp_set_chunk_type (&abort_chunk->chunk_hdr, ABORT);
+  vnet_sctp_set_chunk_length (&abort_chunk->chunk_hdr, chunk_len);
+
+  vnet_buffer (b)->sctp.connection_index =
+    sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 /**
  * Convert buffer to INIT-ACK
  */
 void
-sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b,
-                           ip4_address_t * ip4_addr,
-                           ip6_address_t * ip6_addr)
+sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn,
+                                         u8 idx, vlib_buffer_t * b,
+                                         ip4_address_t * ip4_addr,
+                                         ip6_address_t * ip6_addr)
 {
   vlib_main_t *vm = vlib_get_main ();
   sctp_ipv4_addr_param_t *ip4_param = 0;
   sctp_ipv6_addr_param_t *ip6_param = 0;
-  u8 idx = sctp_pick_conn_idx_on_chunk (INIT_ACK);
-  u32 random_seed = random_default_seed ();
 
   sctp_reuse_buffer (vm, b);
 
@@ -656,7 +748,147 @@ sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b,
     clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE);
   state_cookie_param->param_hdr.length =
     clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t));
-  state_cookie_param->creation_time = clib_host_to_net_u32 (sctp_time_now ());
+  state_cookie_param->creation_time = clib_host_to_net_u64 (sctp_time_now ());
+  state_cookie_param->cookie_lifespan =
+    clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE);
+
+  sctp_compute_mac (sctp_conn, state_cookie_param);
+
+  pointer_offset += sizeof (sctp_state_cookie_param_t);
+
+  if (PREDICT_TRUE (ip4_addr != NULL))
+    {
+      sctp_ipv4_addr_param_t *ipv4_addr =
+       (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset;
+
+      ipv4_addr->param_hdr.type =
+       clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE);
+      ipv4_addr->param_hdr.length =
+       clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE_LENGTH);
+      ipv4_addr->address.as_u32 = ip4_addr->as_u32;
+
+      pointer_offset += SCTP_IPV4_ADDRESS_TYPE_LENGTH;
+    }
+  if (PREDICT_TRUE (ip6_addr != NULL))
+    {
+      sctp_ipv6_addr_param_t *ipv6_addr =
+       (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset;
+
+      ipv6_addr->param_hdr.type =
+       clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE);
+      ipv6_addr->param_hdr.length =
+       clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE_LENGTH);
+      ipv6_addr->address.as_u64[0] = ip6_addr->as_u64[0];
+      ipv6_addr->address.as_u64[1] = ip6_addr->as_u64[1];
+
+      pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH;
+    }
+
+  if (sctp_conn->sub_conn[idx].connection.is_ip4)
+    {
+      ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset;
+      ip4_param->address.as_u32 =
+       sctp_conn->sub_conn[idx].connection.lcl_ip.ip4.as_u32;
+
+      pointer_offset += sizeof (sctp_ipv4_addr_param_t);
+    }
+  else
+    {
+      ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset;
+      ip6_param->address.as_u64[0] =
+       sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0];
+      ip6_param->address.as_u64[1] =
+       sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1];
+
+      pointer_offset += sizeof (sctp_ipv6_addr_param_t);
+    }
+
+  /* src_port & dst_port are already in network byte-order */
+  init_ack_chunk->sctp_hdr.checksum = 0;
+  init_ack_chunk->sctp_hdr.src_port =
+    sctp_conn->sub_conn[idx].connection.lcl_port;
+  init_ack_chunk->sctp_hdr.dst_port =
+    sctp_conn->sub_conn[idx].connection.rmt_port;
+  /* the sctp_conn->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */
+  init_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag;
+  init_ack_chunk->initial_tsn =
+    clib_host_to_net_u32 (sctp_conn->local_initial_tsn);
+  SCTP_CONN_TRACKING_DBG ("init_ack_chunk->initial_tsn = %u",
+                         init_ack_chunk->initial_tsn);
+
+  vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK);
+  vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len);
+
+  init_ack_chunk->initiate_tag = sctp_conn->local_tag;
+
+  init_ack_chunk->a_rwnd =
+    clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd);
+  init_ack_chunk->inboud_streams_count =
+    clib_host_to_net_u16 (INBOUND_STREAMS_COUNT);
+  init_ack_chunk->outbound_streams_count =
+    clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT);
+
+  vnet_buffer (b)->sctp.connection_index =
+    sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
+}
+
+/**
+ * Convert buffer to INIT-ACK
+ */
+void
+sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
+                           vlib_buffer_t * b, ip4_address_t * ip4_addr,
+                           u8 add_ip4, ip6_address_t * ip6_addr, u8 add_ip6)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  sctp_ipv4_addr_param_t *ip4_param = 0;
+  sctp_ipv6_addr_param_t *ip6_param = 0;
+  u32 random_seed = random_default_seed ();
+
+  sctp_reuse_buffer (vm, b);
+
+  /* The minimum size of the message is given by the sctp_init_ack_chunk_t */
+  u16 alloc_bytes =
+    sizeof (sctp_init_ack_chunk_t) + sizeof (sctp_state_cookie_param_t);
+
+  if (PREDICT_FALSE (add_ip4 == 1))
+    {
+      /* Create room for variable-length fields in the INIT_ACK chunk */
+      alloc_bytes += SCTP_IPV4_ADDRESS_TYPE_LENGTH;
+    }
+  if (PREDICT_FALSE (add_ip6 == 1))
+    {
+      /* Create room for variable-length fields in the INIT_ACK chunk */
+      alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH;
+    }
+
+  if (sctp_conn->sub_conn[idx].connection.is_ip4)
+    alloc_bytes += sizeof (sctp_ipv4_addr_param_t);
+  else
+    alloc_bytes += sizeof (sctp_ipv6_addr_param_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_init_ack_chunk_t *init_ack_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  u16 pointer_offset = sizeof (sctp_init_ack_chunk_t);
+
+  /* Create State Cookie parameter */
+  sctp_state_cookie_param_t *state_cookie_param =
+    (sctp_state_cookie_param_t *) ((char *) init_ack_chunk + pointer_offset);
+
+  state_cookie_param->param_hdr.type =
+    clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE);
+  state_cookie_param->param_hdr.length =
+    clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t));
+  state_cookie_param->creation_time = clib_host_to_net_u64 (sctp_time_now ());
   state_cookie_param->cookie_lifespan =
     clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE);
 
@@ -730,7 +962,8 @@ sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b,
   init_ack_chunk->initiate_tag =
     clib_host_to_net_u32 (random_u32 (&random_seed));
 
-  init_ack_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND);
+  init_ack_chunk->a_rwnd =
+    clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd);
   init_ack_chunk->inboud_streams_count =
     clib_host_to_net_u16 (INBOUND_STREAMS_COUNT);
   init_ack_chunk->outbound_streams_count =
@@ -738,25 +971,20 @@ sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b,
 
   sctp_conn->local_tag = init_ack_chunk->initiate_tag;
 
-  /* Measure RTT with this */
-  sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
-
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 /**
  * Convert buffer to SHUTDOWN
  */
 void
-sctp_prepare_shutdown_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
+sctp_prepare_shutdown_chunk (sctp_connection_t * sctp_conn, u8 idx,
+                            vlib_buffer_t * b)
 {
-  vlib_main_t *vm = vlib_get_main ();
-  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN);
   u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t);
 
-  b = sctp_reuse_buffer (vm, b);
-
   /* As per RFC 4960 the chunk_length value does NOT contemplate
    * the size of the first header (see sctp_header_t) and any padding
    */
@@ -781,6 +1009,7 @@ sctp_prepare_shutdown_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 /*
@@ -800,26 +1029,23 @@ sctp_send_shutdown (sctp_connection_t * sctp_conn)
   if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
     return;
 
+  u8 idx = SCTP_PRIMARY_PATH_IDX;
+
   b = vlib_get_buffer (vm, bi);
   sctp_init_buffer (vm, b);
-  sctp_prepare_shutdown_chunk (sctp_conn, b);
+  sctp_prepare_shutdown_chunk (sctp_conn, idx, b);
 
-  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN);
-  sctp_enqueue_to_output (vm, b, bi,
-                         sctp_conn->sub_conn[idx].connection.is_ip4);
-
-  /* Measure RTT with this */
-  sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
+  sctp_enqueue_to_output_now (vm, b, bi,
+                             sctp_conn->sub_conn[idx].connection.is_ip4);
 }
 
 /**
  * Convert buffer to SHUTDOWN_ACK
  */
 void
-sctp_prepare_shutdown_ack_chunk (sctp_connection_t * sctp_conn,
+sctp_prepare_shutdown_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
                                 vlib_buffer_t * b)
 {
-  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK);
   u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t);
   alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
 
@@ -841,50 +1067,34 @@ sctp_prepare_shutdown_ack_chunk (sctp_connection_t * sctp_conn,
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 /*
  * Send SHUTDOWN_ACK
  */
 void
-sctp_send_shutdown_ack (sctp_connection_t * sctp_conn)
+sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
+                       vlib_buffer_t * b)
 {
-  vlib_buffer_t *b;
-  u32 bi;
-  sctp_main_t *tm = vnet_get_sctp_main ();
   vlib_main_t *vm = vlib_get_main ();
 
   if (sctp_check_outstanding_data_chunks (sctp_conn) > 0)
     return;
 
-  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
-    return;
-
-  b = vlib_get_buffer (vm, bi);
-  sctp_init_buffer (vm, b);
-  sctp_prepare_shutdown_ack_chunk (sctp_conn, b);
-
-  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK);
-  sctp_enqueue_to_output (vm, b, bi,
-                         sctp_conn->sub_conn[idx].connection.is_ip4);
-
-  /* Measure RTT with this */
-  sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
+  sctp_reuse_buffer (vm, b);
 
-  /* Start the SCTP_TIMER_T2_SHUTDOWN timer */
-  sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN,
-                 sctp_conn->sub_conn[idx].RTO);
-  sctp_conn->state = SCTP_STATE_SHUTDOWN_ACK_SENT;
+  sctp_prepare_shutdown_ack_chunk (sctp_conn, idx, b);
 }
 
 /**
  * Convert buffer to SACK
  */
 void
-sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
+sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
+                        vlib_buffer_t * b)
 {
   vlib_main_t *vm = vlib_get_main ();
-  u8 idx = sctp_pick_conn_idx_on_chunk (SACK);
 
   sctp_reuse_buffer (vm, b);
 
@@ -912,18 +1122,18 @@ sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 /**
  * Convert buffer to HEARTBEAT_ACK
  */
 void
-sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn,
+sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
                                  vlib_buffer_t * b)
 {
   vlib_main_t *vm = vlib_get_main ();
 
-  u8 idx = sctp_pick_conn_idx_on_chunk (HEARTBEAT_ACK);
   u16 alloc_bytes = sizeof (sctp_hb_ack_chunk_t);
 
   sctp_reuse_buffer (vm, b);
@@ -951,16 +1161,16 @@ sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn,
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 /**
  * Convert buffer to HEARTBEAT
  */
 void
-sctp_prepare_heartbeat_chunk (sctp_connection_t * sctp_conn,
+sctp_prepare_heartbeat_chunk (sctp_connection_t * sctp_conn, u8 idx,
                              vlib_buffer_t * b)
 {
-  u8 idx = sctp_pick_conn_idx_on_chunk (HEARTBEAT);
   u16 alloc_bytes = sizeof (sctp_hb_req_chunk_t);
 
   /* As per RFC 4960 the chunk_length value does NOT contemplate
@@ -986,6 +1196,7 @@ sctp_prepare_heartbeat_chunk (sctp_connection_t * sctp_conn,
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 void
@@ -996,28 +1207,39 @@ sctp_send_heartbeat (sctp_connection_t * sctp_conn)
   sctp_main_t *tm = vnet_get_sctp_main ();
   vlib_main_t *vm = vlib_get_main ();
 
-  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
-    return;
+  u8 i;
+  u64 now = sctp_time_now ();
 
-  b = vlib_get_buffer (vm, bi);
-  sctp_init_buffer (vm, b);
-  sctp_prepare_heartbeat_chunk (sctp_conn, b);
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    {
+      if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
+       continue;
 
-  u8 idx = sctp_pick_conn_idx_on_state (SCTP_STATE_ESTABLISHED);
-  sctp_enqueue_to_output_now (vm, b, bi,
-                             sctp_conn->sub_conn[idx].connection.is_ip4);
+      if (now > (sctp_conn->sub_conn[i].last_seen + SCTP_HB_INTERVAL))
+       {
+         if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+           return;
 
-  sctp_conn->sub_conn[idx].unacknowledged_hb += 1;
+         b = vlib_get_buffer (vm, bi);
+         sctp_init_buffer (vm, b);
+         sctp_prepare_heartbeat_chunk (sctp_conn, i, b);
+
+         sctp_enqueue_to_output_now (vm, b, bi,
+                                     sctp_conn->sub_conn[i].
+                                     connection.is_ip4);
+
+         sctp_conn->sub_conn[i].unacknowledged_hb += 1;
+       }
+    }
 }
 
 /**
  * Convert buffer to SHUTDOWN_COMPLETE
  */
 void
-sctp_prepare_shutdown_complete_chunk (sctp_connection_t * sctp_conn,
+sctp_prepare_shutdown_complete_chunk (sctp_connection_t * sctp_conn, u8 idx,
                                      vlib_buffer_t * b)
 {
-  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE);
   u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t);
   alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
 
@@ -1039,31 +1261,23 @@ sctp_prepare_shutdown_complete_chunk (sctp_connection_t * sctp_conn,
 
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 void
-sctp_send_shutdown_complete (sctp_connection_t * sctp_conn)
+sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
+                            vlib_buffer_t * b0)
 {
-  vlib_buffer_t *b;
-  u32 bi;
-  sctp_main_t *tm = vnet_get_sctp_main ();
   vlib_main_t *vm = vlib_get_main ();
 
-  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+  if (sctp_check_outstanding_data_chunks (sctp_conn) > 0)
     return;
 
-  b = vlib_get_buffer (vm, bi);
-  sctp_init_buffer (vm, b);
-  sctp_prepare_shutdown_complete_chunk (sctp_conn, b);
-
-  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE);
-  sctp_enqueue_to_output (vm, b, bi,
-                         sctp_conn->sub_conn[idx].connection.is_ip4);
+  sctp_reuse_buffer (vm, b0);
 
-  sctp_conn->state = SCTP_STATE_CLOSED;
+  sctp_prepare_shutdown_complete_chunk (sctp_conn, idx, b0);
 }
 
-
 /*
  *  Send INIT
  */
@@ -1075,20 +1289,32 @@ sctp_send_init (sctp_connection_t * sctp_conn)
   sctp_main_t *tm = vnet_get_sctp_main ();
   vlib_main_t *vm = vlib_get_main ();
 
+  if (PREDICT_FALSE (sctp_conn->init_retransmit_err > SCTP_MAX_INIT_RETRANS))
+    {
+      clib_warning ("Reached MAX_INIT_RETRANS times. Aborting connection.");
+
+      session_stream_connect_notify (&sctp_conn->sub_conn
+                                    [SCTP_PRIMARY_PATH_IDX].connection, 1);
+
+      sctp_connection_timers_reset (sctp_conn);
+
+      sctp_connection_cleanup (sctp_conn);
+
+      return;
+    }
+
   if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
     return;
 
   b = vlib_get_buffer (vm, bi);
-  u8 idx = sctp_pick_conn_idx_on_chunk (INIT);
+  u8 idx = SCTP_PRIMARY_PATH_IDX;
 
   sctp_init_buffer (vm, b);
-  sctp_prepare_init_chunk (sctp_conn, b);
+  sctp_prepare_init_chunk (sctp_conn, idx, b);
 
   sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b);
-  sctp_enqueue_to_ip_lookup (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4);
-
-  /* Measure RTT with this */
-  sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
+  sctp_enqueue_to_ip_lookup (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4,
+                            sctp_conn->sub_conn[idx].c_fib_index);
 
   /* Start the T1_INIT timer */
   sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT,
@@ -1096,33 +1322,52 @@ sctp_send_init (sctp_connection_t * sctp_conn)
 
   /* Change state to COOKIE_WAIT */
   sctp_conn->state = SCTP_STATE_COOKIE_WAIT;
+
+  /* Measure RTT with this */
+  sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
 }
 
 /**
  * Push SCTP header and update connection variables
  */
 static void
-sctp_push_hdr_i (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b,
+sctp_push_hdr_i (sctp_connection_t * sctp_conn, vlib_buffer_t * b,
                 sctp_state_t next_state)
 {
-  u16 data_len =
-    b->current_length + b->total_length_not_including_first_buffer;
+  u16 data_len = b->current_length;
+
+  if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
+    data_len += b->total_length_not_including_first_buffer;
+
   ASSERT (!b->total_length_not_including_first_buffer
-         || (b->flags & VLIB_BUFFER_NEXT_PRESENT));
+         || (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+         || !(b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID));
 
   SCTP_ADV_DBG_OUTPUT ("b->current_length = %u, "
                       "b->current_data = %p "
                       "data_len = %u",
                       b->current_length, b->current_data, data_len);
 
+  u16 data_padding = vnet_sctp_calculate_padding (b->current_length);
+  if (data_padding > 0)
+    {
+      u8 *p_tail = vlib_buffer_put_uninit (b, data_padding);
+      clib_memset_u8 (p_tail, 0, data_padding);
+    }
+
   u16 bytes_to_add = sizeof (sctp_payload_data_chunk_t);
   u16 chunk_length = data_len + bytes_to_add - sizeof (sctp_header_t);
 
-  bytes_to_add += vnet_sctp_calculate_padding (bytes_to_add + data_len);
-
   sctp_payload_data_chunk_t *data_chunk =
     vlib_buffer_push_uninit (b, bytes_to_add);
 
+  u8 idx = sctp_data_subconn_select (sctp_conn);
+  SCTP_DBG_OUTPUT
+    ("SCTP_CONN = %p, IDX = %u, S_INDEX = %u, C_INDEX = %u, sctp_conn->[...].LCL_PORT = %u, sctp_conn->[...].RMT_PORT = %u",
+     sctp_conn, idx, sctp_conn->sub_conn[idx].connection.s_index,
+     sctp_conn->sub_conn[idx].connection.c_index,
+     sctp_conn->sub_conn[idx].connection.lcl_port,
+     sctp_conn->sub_conn[idx].connection.rmt_port);
   data_chunk->sctp_hdr.checksum = 0;
   data_chunk->sctp_hdr.src_port =
     sctp_conn->sub_conn[idx].connection.lcl_port;
@@ -1143,10 +1388,31 @@ sctp_push_hdr_i (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b,
   SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u",
                       b->data, b->current_data);
 
+  if (sctp_conn->sub_conn[idx].state != SCTP_SUBCONN_AWAITING_SACK)
+    {
+      sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_AWAITING_SACK;
+      sctp_conn->last_unacked_tsn = sctp_conn->next_tsn;
+    }
+
   sctp_conn->next_tsn += data_len;
 
+  u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn;
+  /* Section 7.2.2; point (3) */
+  if (sctp_conn->sub_conn[idx].partially_acked_bytes >=
+      sctp_conn->sub_conn[idx].cwnd
+      && inflight >= sctp_conn->sub_conn[idx].cwnd)
+    {
+      sctp_conn->sub_conn[idx].cwnd += sctp_conn->sub_conn[idx].PMTU;
+      sctp_conn->sub_conn[idx].partially_acked_bytes -=
+       sctp_conn->sub_conn[idx].cwnd;
+    }
+
+  sctp_conn->sub_conn[idx].last_data_ts = sctp_time_now ();
+
   vnet_buffer (b)->sctp.connection_index =
     sctp_conn->sub_conn[idx].connection.c_index;
+
+  vnet_buffer (b)->sctp.subconn_idx = idx;
 }
 
 u32
@@ -1155,361 +1421,143 @@ sctp_push_header (transport_connection_t * trans_conn, vlib_buffer_t * b)
   sctp_connection_t *sctp_conn =
     sctp_get_connection_from_transport (trans_conn);
 
-  u8 idx = sctp_pick_conn_idx_on_chunk (DATA);
+  SCTP_DBG_OUTPUT ("TRANS_CONN = %p, SCTP_CONN = %p, "
+                  "S_INDEX = %u, C_INDEX = %u,"
+                  "trans_conn->LCL_PORT = %u, trans_conn->RMT_PORT = %u",
+                  trans_conn,
+                  sctp_conn,
+                  trans_conn->s_index,
+                  trans_conn->c_index,
+                  trans_conn->lcl_port, trans_conn->rmt_port);
 
-  if (sctp_conn->sub_conn[idx].unacknowledged_hb >
-      SCTP_ASSOCIATION_MAX_RETRANS)
-    {
-      // The remote-peer is considered to be unreachable hence shutting down
+  sctp_push_hdr_i (sctp_conn, b, SCTP_STATE_ESTABLISHED);
 
-      /* Start cleanup. App wasn't notified yet so use delete notify as
-       * opposed to delete to cleanup session layer state. */
-      stream_session_delete_notify (&sctp_conn->sub_conn
-                                   [MAIN_SCTP_SUB_CONN_IDX].connection);
-
-      sctp_connection_timers_reset (sctp_conn);
-
-      sctp_connection_cleanup (sctp_conn);
-    }
-
-  sctp_push_hdr_i (sctp_conn, idx, b, SCTP_STATE_ESTABLISHED);
-
-  if (sctp_conn->sub_conn[idx].RTO_pending == 0)
-    {
-      sctp_conn->sub_conn[idx].RTO_pending = 1;
-      sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now ();
-    }
-
-  sctp_trajectory_add_start (b0, 3);
+  sctp_trajectory_add_start (b, 3);
 
   return 0;
-
 }
 
-always_inline uword
-sctp46_output_inline (vlib_main_t * vm,
-                     vlib_node_runtime_t * node,
-                     vlib_frame_t * from_frame, int is_ip4)
+u32
+sctp_prepare_data_retransmit (sctp_connection_t * sctp_conn,
+                             u8 idx,
+                             u32 offset,
+                             u32 max_deq_bytes, vlib_buffer_t ** b)
 {
-  u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->thread_index;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+  int n_bytes = 0;
+  u32 bi, available_bytes, seg_size;
+  u8 *data;
 
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-  next_index = node->cached_next_index;
-  sctp_set_time_now (my_thread_index);
+  ASSERT (sctp_conn->state >= SCTP_STATE_ESTABLISHED);
+  ASSERT (max_deq_bytes != 0);
 
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
+  /*
+   * Make sure we can retransmit something
+   */
+  available_bytes =
+    transport_max_tx_dequeue (&sctp_conn->sub_conn[idx].connection);
+  ASSERT (available_bytes >= offset);
+  available_bytes -= offset;
+  if (!available_bytes)
+    return 0;
+  max_deq_bytes = clib_min (sctp_conn->sub_conn[idx].cwnd, max_deq_bytes);
+  max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
+
+  seg_size = max_deq_bytes;
+
+  /*
+   * Allocate and fill in buffer(s)
+   */
 
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+    return 0;
+  *b = vlib_get_buffer (vm, bi);
+  data = sctp_init_buffer (vm, *b);
 
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0;
-         vlib_buffer_t *b0;
-         sctp_header_t *sctp_hdr = 0;
-         sctp_connection_t *sctp_conn;
-         sctp_tx_trace_t *t0;
-         sctp_header_t *th0 = 0;
-         u32 error0 = SCTP_ERROR_PKTS_SENT, next0 =
-           SCTP_OUTPUT_NEXT_IP_LOOKUP;
+  /* Easy case, buffer size greater than mss */
+  if (PREDICT_TRUE (seg_size <= tm->bytes_per_buffer))
+    {
+      n_bytes =
+       session_tx_fifo_peek_bytes (&sctp_conn->sub_conn[idx].connection,
+                                   data, offset, max_deq_bytes);
+      ASSERT (n_bytes == max_deq_bytes);
+      b[0]->current_length = n_bytes;
+      sctp_push_hdr_i (sctp_conn, *b, sctp_conn->state);
+    }
 
-#if SCTP_DEBUG_STATE_MACHINE
-         u16 packet_length = 0;
-#endif
+  return n_bytes;
+}
 
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         sctp_conn =
-           sctp_connection_get (vnet_buffer (b0)->sctp.connection_index,
-                                my_thread_index);
-
-         if (PREDICT_FALSE (sctp_conn == 0))
-           {
-             error0 = SCTP_ERROR_INVALID_CONNECTION;
-             next0 = SCTP_OUTPUT_NEXT_DROP;
-             goto done;
-           }
-
-         u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
-
-         th0 = vlib_buffer_get_current (b0);
-
-         if (is_ip4)
-           {
-             ip4_header_t *iph4 = vlib_buffer_push_ip4 (vm,
-                                                        b0,
-                                                        &sctp_conn->sub_conn
-                                                        [idx].connection.
-                                                        lcl_ip.ip4,
-                                                        &sctp_conn->
-                                                        sub_conn
-                                                        [idx].connection.
-                                                        rmt_ip.ip4,
-                                                        IP_PROTOCOL_SCTP, 1);
-
-             u32 checksum = ip4_sctp_compute_checksum (vm, b0, iph4);
-
-             sctp_hdr = ip4_next_header (iph4);
-             sctp_hdr->checksum = checksum;
-
-             vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+void
+sctp_data_retransmit (sctp_connection_t * sctp_conn)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vlib_buffer_t *b = 0;
+  u32 bi, n_bytes = 0;
 
-#if SCTP_DEBUG_STATE_MACHINE
-             packet_length = clib_net_to_host_u16 (iph4->length);
-#endif
-           }
-         else
-           {
-             ip6_header_t *iph6 = vlib_buffer_push_ip6 (vm,
-                                                        b0,
-                                                        &sctp_conn->sub_conn
-                                                        [idx].
-                                                        connection.lcl_ip.
-                                                        ip6,
-                                                        &sctp_conn->sub_conn
-                                                        [idx].
-                                                        connection.rmt_ip.
-                                                        ip6,
-                                                        IP_PROTOCOL_SCTP);
-
-             int bogus = ~0;
-             u32 checksum = ip6_sctp_compute_checksum (vm, b0, iph6, &bogus);
-             ASSERT (!bogus);
-
-             sctp_hdr = ip6_next_header (iph6);
-             sctp_hdr->checksum = checksum;
-
-             vnet_buffer (b0)->l3_hdr_offset = (u8 *) iph6 - b0->data;
-             vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+  u8 idx = sctp_data_subconn_select (sctp_conn);
 
-#if SCTP_DEBUG_STATE_MACHINE
-             packet_length = clib_net_to_host_u16 (iph6->payload_length);
-#endif
-           }
-
-         u8 is_valid =
-           (sctp_conn->sub_conn[idx].connection.lcl_port ==
-            sctp_hdr->src_port
-            || sctp_conn->sub_conn[idx].connection.lcl_port ==
-            sctp_hdr->dst_port)
-           && (sctp_conn->sub_conn[idx].connection.rmt_port ==
-               sctp_hdr->dst_port
-               || sctp_conn->sub_conn[idx].connection.rmt_port ==
-               sctp_hdr->src_port);
-
-         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
-         u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr);
-
-         if (!is_valid)
-           {
-             SCTP_DBG_STATE_MACHINE ("BUFFER IS INCORRECT: conn_index = %u, "
-                                     "packet_length = %u, "
-                                     "chunk_type = %u [%s], "
-                                     "connection.lcl_port = %u, sctp_hdr->src_port = %u, "
-                                     "connection.rmt_port = %u, sctp_hdr->dst_port = %u",
-                                     sctp_conn->sub_conn
-                                     [idx].connection.c_index, packet_length,
-                                     chunk_type,
-                                     sctp_chunk_to_string (chunk_type),
-                                     sctp_conn->sub_conn[idx].
-                                     connection.lcl_port, sctp_hdr->src_port,
-                                     sctp_conn->sub_conn[idx].
-                                     connection.rmt_port,
-                                     sctp_hdr->dst_port);
-
-             error0 = SCTP_ERROR_UNKOWN_CHUNK;
-             next0 = SCTP_OUTPUT_NEXT_DROP;
-             goto done;
-           }
-
-         SCTP_DBG_STATE_MACHINE
-           ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), "
-            "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u",
-            sctp_conn->sub_conn[idx].connection.c_index,
-            sctp_conn->state, sctp_state_to_string (sctp_conn->state),
-            sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port,
-            full_hdr->hdr.dst_port);
-
-         if (chunk_type == DATA)
-           SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length);
-
-         /* Let's make sure the state-machine does not send anything crazy */
-         switch (sctp_conn->state)
-           {
-           case SCTP_STATE_CLOSED:
-             {
-               if (chunk_type != INIT && chunk_type != INIT_ACK)
-                 {
-                   SCTP_DBG_STATE_MACHINE
-                     ("Sending the wrong chunk (%s) based on state-machine status (%s)",
-                      sctp_chunk_to_string (chunk_type),
-                      sctp_state_to_string (sctp_conn->state));
-
-                   error0 = SCTP_ERROR_UNKOWN_CHUNK;
-                   next0 = SCTP_OUTPUT_NEXT_DROP;
-                   goto done;
-                 }
-               break;
-             }
-           case SCTP_STATE_ESTABLISHED:
-             if (chunk_type != DATA && chunk_type != HEARTBEAT &&
-                 chunk_type != HEARTBEAT_ACK && chunk_type != SACK &&
-                 chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN)
-               {
-                 SCTP_DBG_STATE_MACHINE
-                   ("Sending the wrong chunk (%s) based on state-machine status (%s)",
-                    sctp_chunk_to_string (chunk_type),
-                    sctp_state_to_string (sctp_conn->state));
-
-                 error0 = SCTP_ERROR_UNKOWN_CHUNK;
-                 next0 = SCTP_OUTPUT_NEXT_DROP;
-                 goto done;
-               }
-             break;
-           case SCTP_STATE_COOKIE_WAIT:
-             if (chunk_type != COOKIE_ECHO)
-               {
-                 SCTP_DBG_STATE_MACHINE
-                   ("Sending the wrong chunk (%s) based on state-machine status (%s)",
-                    sctp_chunk_to_string (chunk_type),
-                    sctp_state_to_string (sctp_conn->state));
-
-                 error0 = SCTP_ERROR_UNKOWN_CHUNK;
-                 next0 = SCTP_OUTPUT_NEXT_DROP;
-                 goto done;
-               }
-             /* Change state */
-             sctp_conn->state = SCTP_STATE_COOKIE_ECHOED;
-             break;
-           default:
-             SCTP_DBG_STATE_MACHINE
-               ("Sending chunk (%s) based on state-machine status (%s)",
-                sctp_chunk_to_string (chunk_type),
-                sctp_state_to_string (sctp_conn->state));
-             break;
-           }
-
-         if (chunk_type == SHUTDOWN)
-           {
-             /* Start the SCTP_TIMER_T2_SHUTDOWN timer */
-             sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN,
-                             sctp_conn->sub_conn[idx].RTO);
-             sctp_conn->state = SCTP_STATE_SHUTDOWN_SENT;
-           }
-
-         if (chunk_type == DATA)
-           {
-             sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX,
-                                sctp_conn->sub_conn[idx].RTO);
-           }
-
-         vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
-         vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
-
-         b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
-
-         SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u, "
-                                 "NEW_STATE = %s, "
-                                 "CHUNK_SENT = %s",
-                                 sctp_conn->sub_conn[idx].connection.c_index,
-                                 sctp_state_to_string (sctp_conn->state),
-                                 sctp_chunk_to_string (chunk_type));
-
-         vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr);
-
-       done:
-         b0->error = node->errors[error0];
-         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-             t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
-             if (th0)
-               {
-                 clib_memcpy (&t0->sctp_header, th0,
-                              sizeof (t0->sctp_header));
-               }
-             else
-               {
-                 memset (&t0->sctp_header, 0, sizeof (t0->sctp_header));
-               }
-             clib_memcpy (&t0->sctp_connection, sctp_conn,
-                          sizeof (t0->sctp_connection));
-           }
-
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                          n_left_to_next, bi0, next0);
-       }
+  SCTP_DBG_OUTPUT
+    ("SCTP_CONN = %p, IDX = %u, S_INDEX = %u, C_INDEX = %u, sctp_conn->[...].LCL_PORT = %u, sctp_conn->[...].RMT_PORT = %u",
+     sctp_conn, idx, sctp_conn->sub_conn[idx].connection.s_index,
+     sctp_conn->sub_conn[idx].connection.c_index,
+     sctp_conn->sub_conn[idx].connection.lcl_port,
+     sctp_conn->sub_conn[idx].connection.rmt_port);
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+  if (sctp_conn->state >= SCTP_STATE_ESTABLISHED)
+    {
+      return;
     }
 
-  return from_frame->n_vectors;
-}
+  n_bytes =
+    sctp_prepare_data_retransmit (sctp_conn, idx, 0,
+                                 sctp_conn->sub_conn[idx].cwnd, &b);
+  if (n_bytes > 0)
+    SCTP_DBG_OUTPUT ("We have data (%u bytes) to retransmit", n_bytes);
 
-static uword
-sctp4_output (vlib_main_t * vm, vlib_node_runtime_t * node,
-             vlib_frame_t * from_frame)
-{
-  return sctp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
-}
+  bi = vlib_get_buffer_index (vm, b);
 
-static uword
-sctp6_output (vlib_main_t * vm, vlib_node_runtime_t * node,
-             vlib_frame_t * from_frame)
-{
-  return sctp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+  sctp_enqueue_to_output_now (vm, b, bi,
+                             sctp_conn->sub_conn[idx].connection.is_ip4);
+
+  return;
 }
 
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sctp4_output_node) =
-{
-  .function = sctp4_output,.name = "sctp4-output",
-    /* Takes a vector of packets. */
-    .vector_size = sizeof (u32),
-    .n_errors = SCTP_N_ERROR,
-    .error_strings = sctp_error_strings,
-    .n_next_nodes = SCTP_OUTPUT_N_NEXT,
-    .next_nodes = {
-#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n,
-    foreach_sctp4_output_next
-#undef _
-    },
-    .format_buffer = format_sctp_header,
-    .format_trace = format_sctp_tx_trace,
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (sctp4_output_node, sctp4_output);
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sctp6_output_node) =
+#if SCTP_DEBUG_STATE_MACHINE
+always_inline u8
+sctp_validate_output_state_machine (sctp_connection_t * sctp_conn,
+                                   u8 chunk_type)
 {
-  .function = sctp6_output,
-  .name = "sctp6-output",
-    /* Takes a vector of packets. */
-  .vector_size = sizeof (u32),
-  .n_errors = SCTP_N_ERROR,
-  .error_strings = sctp_error_strings,
-  .n_next_nodes = SCTP_OUTPUT_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n,
-    foreach_sctp6_output_next
-#undef _
-  },
-  .format_buffer = format_sctp_header,
-  .format_trace = format_sctp_tx_trace,
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (sctp6_output_node, sctp6_output);
+  u8 result = 0;
+  switch (sctp_conn->state)
+    {
+    case SCTP_STATE_CLOSED:
+      if (chunk_type != INIT && chunk_type != INIT_ACK)
+       result = 1;
+      break;
+    case SCTP_STATE_ESTABLISHED:
+      if (chunk_type != DATA && chunk_type != HEARTBEAT &&
+         chunk_type != HEARTBEAT_ACK && chunk_type != SACK &&
+         chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN)
+       result = 1;
+      break;
+    case SCTP_STATE_COOKIE_WAIT:
+      if (chunk_type != COOKIE_ECHO)
+       result = 1;
+      break;
+    case SCTP_STATE_SHUTDOWN_SENT:
+      if (chunk_type != SHUTDOWN_COMPLETE)
+       result = 1;
+      break;
+    case SCTP_STATE_SHUTDOWN_RECEIVED:
+      if (chunk_type != SHUTDOWN_ACK)
+       result = 1;
+      break;
+    }
+  return result;
+}
+#endif
 
 /*
  * fd.io coding-style-patch-verification: ON