VPP-1506: dump local punts and registered punt sockets
[vpp.git] / src / vnet / sctp / sctp.c
index 2e37a91..85ca9b8 100644 (file)
@@ -25,12 +25,13 @@ sctp_connection_bind (u32 session_index, transport_endpoint_t * tep)
   void *iface_ip;
 
   pool_get (tm->listener_pool, listener);
-  memset (listener, 0, sizeof (*listener));
+  clib_memset (listener, 0, sizeof (*listener));
 
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = listener;
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index =
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx =
+    SCTP_PRIMARY_PATH_IDX;
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index =
     listener - tm->listener_pool;
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_port = tep->port;
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_port = tep->port;
 
   /* If we are provided a sw_if_index, bind using one of its IPs */
   if (ip_is_zero (&tep->ip, 1) && tep->sw_if_index != ENDPOINT_INVALID_INDEX)
@@ -39,20 +40,26 @@ sctp_connection_bind (u32 session_index, transport_endpoint_t * tep)
                                                 tep->is_ip4)))
        ip_set (&tep->ip, iface_ip, tep->is_ip4);
     }
-  ip_copy (&listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_ip,
+  ip_copy (&listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_ip,
           &tep->ip, tep->is_ip4);
 
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.is_ip4 = tep->is_ip4;
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto =
+  u32 mtu = tep->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+                                                    tep->sw_if_index,
+                                                    VNET_MTU_IP4) :
+    vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index,
+                              VNET_MTU_IP6);
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU = mtu;
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.is_ip4 = tep->is_ip4;
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto =
     TRANSPORT_PROTO_SCTP;
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_s_index = session_index;
-  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.fib_index =
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].c_s_index = session_index;
+  listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.fib_index =
     tep->fib_index;
   listener->state = SCTP_STATE_CLOSED;
 
   sctp_connection_timers_init (listener);
 
-  return listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index;
+  return listener->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index;
 }
 
 u32
@@ -65,13 +72,13 @@ static void
 sctp_connection_unbind (u32 listener_index)
 {
   sctp_main_t *tm = vnet_get_sctp_main ();
-  sctp_connection_t *tc;
+  sctp_connection_t *sctp_conn;
 
-  tc = pool_elt_at_index (tm->listener_pool, listener_index);
+  sctp_conn = pool_elt_at_index (tm->listener_pool, listener_index);
 
   /* Poison the entry */
   if (CLIB_DEBUG > 0)
-    memset (tc, 0xFA, sizeof (*tc));
+    clib_memset (sctp_conn, 0xFA, sizeof (*sctp_conn));
 
   pool_put_index (tm->listener_pool, listener_index);
 }
@@ -127,31 +134,33 @@ sctp_alloc_custom_local_endpoint (sctp_main_t * tm, ip46_address_t * lcl_addr,
  * Initialize all connection timers as invalid
  */
 void
-sctp_connection_timers_init (sctp_connection_t * tc)
+sctp_connection_timers_init (sctp_connection_t * sctp_conn)
 {
   int i, j;
 
   /* Set all to invalid */
   for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
-    for (j = 0; j < SCTP_N_TIMERS; j++)
-      {
-       tc->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID;
-      }
+    {
+      sctp_conn->sub_conn[i].RTO = SCTP_RTO_INIT;
 
-  tc->rto = SCTP_RTO_INIT;
+      for (j = 0; j < SCTP_N_TIMERS; j++)
+       {
+         sctp_conn->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID;
+       }
+    }
 }
 
 /**
  * Stop all connection timers
  */
 void
-sctp_connection_timers_reset (sctp_connection_t * tc)
+sctp_connection_timers_reset (sctp_connection_t * sctp_conn)
 {
   int i, j;
   for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
     {
       for (j = 0; j < SCTP_N_TIMERS; j++)
-       sctp_timer_reset (tc, i, j);
+       sctp_timer_reset (sctp_conn, i, j);
     }
 }
 
@@ -176,40 +185,59 @@ format_sctp_state (u8 * s, va_list * args)
 u8 *
 format_sctp_connection_id (u8 * s, va_list * args)
 {
-  /*
-     sctp_connection_t *tc = va_arg (*args, sctp_connection_t *);
-     if (!tc)
-     return s;
-     if (tc->c_is_ip4)
-     {
-     s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T",
-     format_ip4_address, &tc->c_lcl_ip4,
-     clib_net_to_host_u16 (tc->c_lcl_port), format_ip4_address,
-     &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port));
-     }
-     else
-     {
-     s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T",
-     format_ip6_address, &tc->c_lcl_ip6,
-     clib_net_to_host_u16 (tc->c_lcl_port), format_ip6_address,
-     &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port));
-     }
-   */
+  sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *);
+  if (!sctp_conn)
+    return s;
+
+  u8 i;
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    {
+      if (sctp_conn->sub_conn[i].connection.is_ip4)
+       {
+         s = format (s, "%U[#%d][%s] %U:%d->%U:%d",
+                     s,
+                     sctp_conn->sub_conn[i].connection.thread_index,
+                     "T",
+                     format_ip4_address,
+                     &sctp_conn->sub_conn[i].connection.lcl_ip.ip4,
+                     clib_net_to_host_u16 (sctp_conn->sub_conn[i].
+                                           connection.lcl_port),
+                     format_ip4_address,
+                     &sctp_conn->sub_conn[i].connection.rmt_ip.ip4,
+                     clib_net_to_host_u16 (sctp_conn->sub_conn[i].
+                                           connection.rmt_port));
+       }
+      else
+       {
+         s = format (s, "%U[#%d][%s] %U:%d->%U:%d",
+                     s,
+                     sctp_conn->sub_conn[i].connection.thread_index,
+                     "T",
+                     format_ip6_address,
+                     &sctp_conn->sub_conn[i].connection.lcl_ip.ip6,
+                     clib_net_to_host_u16 (sctp_conn->sub_conn[i].
+                                           connection.lcl_port),
+                     format_ip6_address,
+                     &sctp_conn->sub_conn[i].connection.rmt_ip.ip6,
+                     clib_net_to_host_u16 (sctp_conn->sub_conn[i].
+                                           connection.rmt_port));
+       }
+    }
   return s;
 }
 
 u8 *
 format_sctp_connection (u8 * s, va_list * args)
 {
-  sctp_connection_t *tc = va_arg (*args, sctp_connection_t *);
+  sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *);
   u32 verbose = va_arg (*args, u32);
 
-  if (!tc)
+  if (!sctp_conn)
     return s;
-  s = format (s, "%-50U", format_sctp_connection_id, tc);
+  s = format (s, "%-50U", format_sctp_connection_id, sctp_conn);
   if (verbose)
     {
-      s = format (s, "%-15U", format_sctp_state, tc->state);
+      s = format (s, "%-15U", format_sctp_state, sctp_conn->state);
     }
 
   return s;
@@ -219,154 +247,220 @@ format_sctp_connection (u8 * s, va_list * args)
  * Initialize connection send variables.
  */
 void
-sctp_init_snd_vars (sctp_connection_t * tc)
+sctp_init_snd_vars (sctp_connection_t * sctp_conn)
 {
   u32 time_now;
-
   /*
    * We use the time to randomize iss and for setting up the initial
    * timestamp. Make sure it's updated otherwise syn and ack in the
    * handshake may make it look as if time has flown in the opposite
    * direction for us.
    */
+
   sctp_set_time_now (vlib_get_thread_index ());
   time_now = sctp_time_now ();
 
-  tc->iss = random_u32 (&time_now);
-  tc->snd_una = tc->iss;
-  tc->snd_nxt = tc->iss + 1;
-  tc->snd_una_max = tc->snd_nxt;
+  sctp_conn->local_initial_tsn = random_u32 (&time_now);
+  sctp_conn->last_unacked_tsn = sctp_conn->local_initial_tsn;
+  sctp_conn->next_tsn = sctp_conn->local_initial_tsn + 1;
+
+  sctp_conn->remote_initial_tsn = 0x0;
+  sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn;
 }
 
-/**
- * Update max segment size we're able to process.
- *
- * The value is constrained by our interface's MTU and IP options. It is
- * also what we advertise to our peer.
- */
-void
-sctp_update_rcv_mss (sctp_connection_t * tc)
+always_inline sctp_connection_t *
+sctp_sub_connection_add (u8 thread_index)
 {
-  /* TODO find our iface MTU */
-  tc->a_rwnd = DEFAULT_A_RWND - sizeof (sctp_full_hdr_t);
-  tc->rcv_opts.a_rwnd = tc->a_rwnd;
-  tc->rcv_a_rwnd = tc->a_rwnd; /* This will be updated by our congestion algos */
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *sctp_conn = tm->connections[thread_index];
+
+  u8 subconn_idx = sctp_next_avail_subconn (sctp_conn);
+
+  ASSERT (subconn_idx < MAX_SCTP_CONNECTIONS);
+
+  sctp_conn->sub_conn[subconn_idx].connection.c_index =
+    sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.c_index;
+  sctp_conn->sub_conn[subconn_idx].connection.thread_index = thread_index;
+  sctp_conn->sub_conn[subconn_idx].subconn_idx = subconn_idx;
+
+  return sctp_conn;
 }
 
-void
-sctp_init_mss (sctp_connection_t * tc)
+u8
+sctp_sub_connection_add_ip4 (vlib_main_t * vm,
+                            ip4_address_t * lcl_addr,
+                            ip4_address_t * rmt_addr)
 {
-  SCTP_DBG ("CONN_INDEX = %u",
-           tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index);
+  sctp_connection_t *sctp_conn = sctp_sub_connection_add (vm->thread_index);
 
-  u16 default_a_rwnd = 536;
-  sctp_update_rcv_mss (tc);
+  u8 subconn_idx = sctp_next_avail_subconn (sctp_conn);
 
-  /* TODO cache mss and consider PMTU discovery */
-  tc->snd_a_rwnd = clib_min (tc->rcv_opts.a_rwnd, tc->a_rwnd);
+  if (subconn_idx == MAX_SCTP_CONNECTIONS)
+    return SCTP_ERROR_MAX_CONNECTIONS;
 
-  if (tc->snd_a_rwnd < sizeof (sctp_full_hdr_t))
-    {
-      SCTP_ADV_DBG ("tc->snd_a_rwnd < sizeof(sctp_full_hdr_t)");
-      /* Assume that at least the min default mss works */
-      tc->snd_a_rwnd = default_a_rwnd;
-      tc->rcv_opts.a_rwnd = default_a_rwnd;
-    }
+  clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.lcl_ip,
+              &lcl_addr, sizeof (lcl_addr));
 
-  ASSERT (tc->snd_a_rwnd > sizeof (sctp_full_hdr_t));
+  clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.rmt_ip,
+              &rmt_addr, sizeof (rmt_addr));
+
+  sctp_conn->forming_association_changed = 1;
+
+  return SCTP_ERROR_NONE;
 }
 
-/** Initialize sctp connection variables
- *
- * Should be called after having received a msg from the peer, i.e., a SYN or
- * a SYNACK, such that connection options have already been exchanged. */
-void
-sctp_connection_init_vars (sctp_connection_t * tc)
+u8
+sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr,
+                            ip4_address_t * rmt_addr)
 {
-  sctp_init_mss (tc);
-  sctp_init_snd_vars (tc);
+  sctp_main_t *sctp_main = vnet_get_sctp_main ();
+
+  u32 thread_idx = vlib_get_thread_index ();
+  u8 i;
+
+  ASSERT (thread_idx == 0);
+
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    {
+      sctp_connection_t *sctp_conn = sctp_main->connections[thread_idx];
+      sctp_sub_connection_t *sub_conn =
+       &sctp_main->connections[thread_idx]->sub_conn[i];
+      ip46_address_t *lcl_ip =
+       &sctp_main->connections[thread_idx]->sub_conn[i].connection.lcl_ip;
+      ip46_address_t *rmt_ip =
+       &sctp_main->connections[thread_idx]->sub_conn[i].connection.rmt_ip;
+
+      if (!sub_conn->connection.is_ip4)
+       continue;
+      if (lcl_ip->ip4.as_u32 == lcl_addr->as_u32 &&
+         rmt_ip->ip4.as_u32 == rmt_addr->as_u32)
+       {
+         sub_conn->state = SCTP_SUBCONN_STATE_DOWN;
+         sctp_conn->forming_association_changed = 1;
+         break;
+       }
+    }
+  return SCTP_ERROR_NONE;
 }
 
-always_inline sctp_connection_t *
-sctp_sub_connection_add (u8 thread_index)
+u8
+sctp_sub_connection_add_ip6 (vlib_main_t * vm,
+                            ip6_address_t * lcl_addr,
+                            ip6_address_t * rmt_addr)
 {
-  sctp_main_t *tm = vnet_get_sctp_main ();
-  sctp_connection_t *tc = tm->connections[thread_index];
+  sctp_connection_t *sctp_conn = sctp_sub_connection_add (vm->thread_index);
 
-  tc->sub_conn[tc->next_avail_sub_conn].connection.c_index =
-    tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index;
-  tc->sub_conn[tc->next_avail_sub_conn].connection.thread_index =
-    thread_index;
-  tc->sub_conn[tc->next_avail_sub_conn].parent = tc;
+  u8 subconn_idx = sctp_next_avail_subconn (sctp_conn);
 
-  tc->next_avail_sub_conn += 1;
+  if (subconn_idx == MAX_SCTP_CONNECTIONS)
+    return SCTP_ERROR_MAX_CONNECTIONS;
 
-  return tc;
+  clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.lcl_ip,
+              &lcl_addr, sizeof (lcl_addr));
+
+  clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.rmt_ip,
+              &rmt_addr, sizeof (rmt_addr));
+
+  sctp_conn->forming_association_changed = 1;
+
+  return SCTP_ERROR_NONE;
 }
 
-void
-sctp_sub_connection_add_ip4 (u8 thread_index,
-                            sctp_ipv4_addr_param_t * ipv4_addr)
+u8
+sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr,
+                            ip6_address_t * rmt_addr)
 {
-  sctp_connection_t *tc = sctp_sub_connection_add (thread_index);
+  sctp_main_t *sctp_main = vnet_get_sctp_main ();
 
-  clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip4,
-              &ipv4_addr->address, sizeof (ipv4_addr->address));
+  u32 thread_idx = vlib_get_thread_index ();
+  u8 i;
+
+  ASSERT (thread_idx == 0);
+
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    {
+      sctp_connection_t *sctp_conn = sctp_main->connections[thread_idx];
+      sctp_sub_connection_t *sub_conn =
+       &sctp_main->connections[thread_idx]->sub_conn[i];
+      ip46_address_t *lcl_ip =
+       &sctp_main->connections[thread_idx]->sub_conn[i].connection.lcl_ip;
+      ip46_address_t *rmt_ip =
+       &sctp_main->connections[thread_idx]->sub_conn[i].connection.rmt_ip;
+
+      if (!sub_conn->connection.is_ip4)
+       continue;
+      if ((lcl_ip->ip6.as_u64[0] == lcl_addr->as_u64[0]
+          && lcl_ip->ip6.as_u64[1] == lcl_addr->as_u64[1])
+         && (rmt_ip->ip6.as_u64[0] == rmt_addr->as_u64[0]
+             && rmt_ip->ip6.as_u64[1] == rmt_addr->as_u64[1]))
+       {
+         sub_conn->state = SCTP_SUBCONN_STATE_DOWN;
+         sctp_conn->forming_association_changed = 1;
+         break;
+       }
+    }
+  return SCTP_ERROR_NONE;
 }
 
-void
-sctp_sub_connection_add_ip6 (u8 thread_index,
-                            sctp_ipv6_addr_param_t * ipv6_addr)
+u8
+sctp_configure (sctp_user_configuration_t config)
 {
-  sctp_connection_t *tc = sctp_sub_connection_add (thread_index);
+  sctp_main_t *sctp_main = vnet_get_sctp_main ();
+
+  u32 thread_idx = vlib_get_thread_index ();
 
-  clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip6,
-              &ipv6_addr->address, sizeof (ipv6_addr->address));
+  sctp_main->connections[thread_idx]->conn_config.never_delay_sack =
+    config.never_delay_sack;
+  sctp_main->connections[thread_idx]->conn_config.never_bundle =
+    config.never_bundle;
+
+  return 0;
 }
 
 sctp_connection_t *
 sctp_connection_new (u8 thread_index)
 {
-  sctp_main_t *tm = vnet_get_sctp_main ();
-  sctp_connection_t *tc;
+  sctp_main_t *sctp_main = vnet_get_sctp_main ();
+  sctp_connection_t *sctp_conn;
 
-  pool_get (tm->connections[thread_index], tc);
-  memset (tc, 0, sizeof (*tc));
-  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc;
-  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index =
-    tc - tm->connections[thread_index];
-  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index = thread_index;
-  tc->local_tag = 0;
-  tc->next_avail_sub_conn = 1;
+  pool_get (sctp_main->connections[thread_index], sctp_conn);
+  clib_memset (sctp_conn, 0, sizeof (*sctp_conn));
+  sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx =
+    SCTP_PRIMARY_PATH_IDX;
+  sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index =
+    sctp_conn - sctp_main->connections[thread_index];
+  sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index = thread_index;
+  sctp_conn->local_tag = 0;
 
-  return tc;
+  return sctp_conn;
 }
 
 sctp_connection_t *
 sctp_half_open_connection_new (u8 thread_index)
 {
   sctp_main_t *tm = vnet_get_sctp_main ();
-  sctp_connection_t *tc = 0;
+  sctp_connection_t *sctp_conn = 0;
   ASSERT (vlib_get_thread_index () == 0);
-  pool_get (tm->half_open_connections, tc);
-  memset (tc, 0, sizeof (*tc));
-  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index =
-    tc - tm->half_open_connections;
-  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc;
-  return tc;
+  pool_get (tm->half_open_connections, sctp_conn);
+  clib_memset (sctp_conn, 0, sizeof (*sctp_conn));
+  sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index =
+    sctp_conn - tm->half_open_connections;
+  sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx =
+    SCTP_PRIMARY_PATH_IDX;
+  return sctp_conn;
 }
 
 static inline int
-sctp_connection_open (transport_endpoint_t * rmt)
+sctp_connection_open (transport_endpoint_cfg_t * rmt)
 {
   sctp_main_t *tm = vnet_get_sctp_main ();
-  sctp_connection_t *tc;
+  sctp_connection_t *sctp_conn;
   ip46_address_t lcl_addr;
   u16 lcl_port;
   uword thread_id;
   int rv;
 
-  u8 idx = sctp_pick_conn_idx_on_state (SCTP_STATE_CLOSED);
+  u8 idx = SCTP_PRIMARY_PATH_IDX;
 
   /*
    * Allocate local endpoint
@@ -389,27 +483,33 @@ sctp_connection_open (transport_endpoint_t * rmt)
   ASSERT (thread_id == 0);
 
   clib_spinlock_lock_if_init (&tm->half_open_lock);
-  tc = sctp_half_open_connection_new (thread_id);
-
-  transport_connection_t *t_conn = &tc->sub_conn[idx].connection;
-  ip_copy (&t_conn->rmt_ip, &rmt->ip, rmt->is_ip4);
-  ip_copy (&t_conn->lcl_ip, &lcl_addr, rmt->is_ip4);
-  tc->sub_conn[idx].parent = tc;
-  t_conn->rmt_port = rmt->port;
-  t_conn->lcl_port = clib_host_to_net_u16 (lcl_port);
-  t_conn->is_ip4 = rmt->is_ip4;
-  t_conn->proto = TRANSPORT_PROTO_SCTP;
-  t_conn->fib_index = rmt->fib_index;
-
-  sctp_connection_timers_init (tc);
+  sctp_conn = sctp_half_open_connection_new (thread_id);
+  u32 mtu = rmt->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+                                                    rmt->peer.sw_if_index,
+                                                    VNET_MTU_IP4) :
+    vnet_sw_interface_get_mtu (vnet_get_main (), rmt->peer.sw_if_index,
+                              VNET_MTU_IP6);
+  sctp_conn->sub_conn[idx].PMTU = mtu;
+
+  transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection;
+  ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4);
+  ip_copy (&trans_conn->lcl_ip, &lcl_addr, rmt->is_ip4);
+  sctp_conn->sub_conn[idx].subconn_idx = idx;
+  trans_conn->rmt_port = rmt->port;
+  trans_conn->lcl_port = clib_host_to_net_u16 (lcl_port);
+  trans_conn->is_ip4 = rmt->is_ip4;
+  trans_conn->proto = TRANSPORT_PROTO_SCTP;
+  trans_conn->fib_index = rmt->fib_index;
+
+  sctp_connection_timers_init (sctp_conn);
   /* The other connection vars will be initialized after INIT_ACK chunk received */
-  sctp_init_snd_vars (tc);
+  sctp_init_snd_vars (sctp_conn);
 
-  sctp_send_init (tc);
+  sctp_send_init (sctp_conn);
 
   clib_spinlock_unlock_if_init (&tm->half_open_lock);
 
-  return tc->sub_conn[idx].connection.c_index;
+  return sctp_conn->sub_conn[idx].connection.c_index;
 }
 
 /**
@@ -418,7 +518,7 @@ sctp_connection_open (transport_endpoint_t * rmt)
  * No notifications.
  */
 void
-sctp_connection_cleanup (sctp_connection_t * tc)
+sctp_connection_cleanup (sctp_connection_t * sctp_conn)
 {
   sctp_main_t *tm = &sctp_main;
   u8 i;
@@ -426,50 +526,58 @@ sctp_connection_cleanup (sctp_connection_t * tc)
   /* Cleanup local endpoint if this was an active connect */
   for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
     transport_endpoint_cleanup (TRANSPORT_PROTO_SCTP,
-                               &tc->sub_conn[i].connection.lcl_ip,
-                               tc->sub_conn[i].connection.lcl_port);
-
-  /* Check if connection is not yet fully established */
-  if (tc->state == SCTP_STATE_COOKIE_WAIT)
-    {
+                               &sctp_conn->sub_conn[i].connection.lcl_ip,
+                               sctp_conn->sub_conn[i].connection.lcl_port);
 
-    }
-  else
-    {
-      int thread_index =
-       tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.thread_index;
+  int thread_index =
+    sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.thread_index;
 
-      /* Make sure all timers are cleared */
-      sctp_connection_timers_reset (tc);
+  /* Make sure all timers are cleared */
+  sctp_connection_timers_reset (sctp_conn);
 
-      /* Poison the entry */
-      if (CLIB_DEBUG > 0)
-       memset (tc, 0xFA, sizeof (*tc));
-      pool_put (tm->connections[thread_index], tc);
-    }
+  /* Poison the entry */
+  if (CLIB_DEBUG > 0)
+    clib_memset (sctp_conn, 0xFA, sizeof (*sctp_conn));
+  pool_put (tm->connections[thread_index], sctp_conn);
 }
 
 int
-sctp_session_open (transport_endpoint_t * tep)
+sctp_session_open (transport_endpoint_cfg_t * tep)
 {
   return sctp_connection_open (tep);
 }
 
 u16
-sctp_check_outstanding_data_chunks (sctp_connection_t * tc)
+sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn)
 {
+  u8 i;
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    {
+      if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
+       continue;
+
+      if (sctp_conn->sub_conn[i].is_retransmitting == 1 ||
+         sctp_conn->sub_conn[i].enqueue_state != SCTP_ERROR_ENQUEUED)
+       {
+         SCTP_DBG_OUTPUT
+           ("Connection %u has still DATA to be enqueued inboud / outboud",
+            sctp_conn->sub_conn[i].connection.c_index);
+         return 1;
+       }
+
+    }
   return 0;                    /* Indicates no more data to be read/sent */
 }
 
 void
-sctp_connection_close (sctp_connection_t * tc)
+sctp_connection_close (sctp_connection_t * sctp_conn)
 {
   SCTP_DBG ("Closing connection %u...",
-           tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index);
+           sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.c_index);
 
-  tc->state = SCTP_STATE_SHUTDOWN_PENDING;
+  sctp_conn->state = SCTP_STATE_SHUTDOWN_PENDING;
 
-  sctp_send_shutdown (tc);
+  sctp_send_shutdown (sctp_conn);
 }
 
 void
@@ -477,172 +585,196 @@ sctp_session_close (u32 conn_index, u32 thread_index)
 {
   ASSERT (thread_index == 0);
 
-  sctp_connection_t *tc;
-  tc = sctp_connection_get (conn_index, thread_index);
-  sctp_connection_close (tc);
+  sctp_connection_t *sctp_conn =
+    sctp_connection_get (conn_index, thread_index);
+  if (sctp_conn != NULL)
+    sctp_connection_close (sctp_conn);
 }
 
 void
 sctp_session_cleanup (u32 conn_index, u32 thread_index)
 {
-  sctp_connection_t *tc;
-  tc = sctp_connection_get (conn_index, thread_index);
-  sctp_connection_timers_reset (tc);
+  sctp_connection_t *sctp_conn =
+    sctp_connection_get (conn_index, thread_index);
 
-  /* Wait for the session tx events to clear */
-  tc->state = SCTP_STATE_CLOSED;
+  if (sctp_conn != NULL)
+    {
+      sctp_connection_timers_reset (sctp_conn);
+      /* Wait for the session tx events to clear */
+      sctp_conn->state = SCTP_STATE_CLOSED;
+    }
 }
 
 /**
- * Update snd_mss to reflect the effective segment size that we can send
+ * Compute maximum segment size for session layer.
  */
-void
-sctp_update_snd_mss (sctp_connection_t * tc)
-{
-  /* The overhead for the sctp_header_t and sctp_chunks_common_hdr_t
-   * (the sum equals to sctp_full_hdr_t) is already taken into account
-   * for the tc->a_rwnd computation.
-   * So let's not account it again here.
-   */
-  tc->snd_hdr_length =
-    sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t);
-  tc->snd_a_rwnd =
-    clib_min (tc->a_rwnd, tc->rcv_opts.a_rwnd) - tc->snd_hdr_length;
-
-  SCTP_DBG ("tc->snd_a_rwnd = %u, tc->snd_hdr_length = %u ",
-           tc->snd_a_rwnd, tc->snd_hdr_length);
-
-  ASSERT (tc->snd_a_rwnd > 0);
-}
-
 u16
 sctp_session_send_mss (transport_connection_t * trans_conn)
 {
-  SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index);
-
-  sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn);
+  sctp_connection_t *sctp_conn =
+    sctp_get_connection_from_transport (trans_conn);
 
-  if (trans_conn == NULL)
+  if (sctp_conn == NULL)
     {
-      SCTP_DBG ("trans_conn == NULL");
+      SCTP_DBG ("sctp_conn == NULL");
       return 0;
     }
 
-  if (tc == NULL)
-    {
-      SCTP_DBG ("tc == NULL");
-      return 0;
-    }
-  /* Ensure snd_mss does accurately reflect the amount of data we can push
-   * in a segment. This also makes sure that options are updated according to
-   * the current state of the connection. */
-  sctp_update_snd_mss (tc);
+  update_cwnd (sctp_conn);
+  update_smallest_pmtu_idx (sctp_conn);
 
-  return tc->snd_a_rwnd;
+  u8 idx = sctp_data_subconn_select (sctp_conn);
+  return sctp_conn->sub_conn[idx].cwnd;
 }
 
 u16
 sctp_snd_space (sctp_connection_t * sctp_conn)
 {
-  /* TODO: This requires a real implementation */
-  if (sctp_conn == NULL)
-    {
-      SCTP_DBG ("sctp_conn == NULL");
-      return 0;
-    }
-
-  if (sctp_conn->state != SCTP_STATE_ESTABLISHED)
-    {
-      SCTP_DBG_STATE_MACHINE
-       ("Trying to send DATA while not in SCTP_STATE_ESTABLISHED");
-      return 0;
-    }
+  /* RFC 4096 Section 6.1; point (A) */
+  if (sctp_conn->peer_rwnd == 0)
+    return 0;
 
-  return sctp_conn->snd_a_rwnd;
-}
+  u8 idx = sctp_data_subconn_select (sctp_conn);
 
-u32
-sctp_session_send_space (transport_connection_t * trans_conn)
-{
-  SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index);
+  u32 available_wnd =
+    clib_min (sctp_conn->peer_rwnd, sctp_conn->sub_conn[idx].cwnd);
+  int flight_size = (int) (sctp_conn->next_tsn - sctp_conn->last_unacked_tsn);
 
-  sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn);
+  if (available_wnd <= flight_size)
+    return 0;
 
-  return sctp_snd_space (tc);
+  /* Finally, let's subtract the DATA chunk headers overhead */
+  return available_wnd -
+    flight_size -
+    sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t);
 }
 
+/**
+ * Compute TX window session is allowed to fill.
+ */
 u32
-sctp_session_tx_fifo_offset (transport_connection_t * trans_conn)
+sctp_session_send_space (transport_connection_t * trans_conn)
 {
-  SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index);
+  sctp_connection_t *sctp_conn =
+    sctp_get_connection_from_transport (trans_conn);
 
-  sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn);
-
-  if (tc == NULL)
-    {
-      SCTP_DBG ("tc == NULL");
-      return 0;
-    }
-
-  /* This still works if fast retransmit is on */
-  return (tc->snd_nxt - tc->snd_una);
+  return sctp_snd_space (sctp_conn);
 }
 
 transport_connection_t *
 sctp_session_get_transport (u32 conn_index, u32 thread_index)
 {
-  sctp_connection_t *tc = sctp_connection_get (conn_index, thread_index);
-  return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection;
+  sctp_connection_t *sctp_conn =
+    sctp_connection_get (conn_index, thread_index);
+
+  if (PREDICT_TRUE (sctp_conn != NULL))
+    return &sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection;
+
+  return NULL;
 }
 
 transport_connection_t *
 sctp_session_get_listener (u32 listener_index)
 {
   sctp_main_t *tm = vnet_get_sctp_main ();
-  sctp_connection_t *tc;
-  tc = pool_elt_at_index (tm->listener_pool, listener_index);
-  return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection;
+  sctp_connection_t *sctp_conn;
+  sctp_conn = pool_elt_at_index (tm->listener_pool, listener_index);
+  return &sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection;
 }
 
 u8 *
 format_sctp_session (u8 * s, va_list * args)
 {
-  return NULL;
+  u32 tci = va_arg (*args, u32);
+  u32 thread_index = va_arg (*args, u32);
+  u32 verbose = va_arg (*args, u32);
+  sctp_connection_t *tc;
+
+  tc = sctp_connection_get (tci, thread_index);
+  if (tc)
+    s = format (s, "%U", format_sctp_connection, tc, verbose);
+  else
+    s = format (s, "empty\n");
+  return s;
 }
 
 u8 *
 format_sctp_listener_session (u8 * s, va_list * args)
 {
-  return NULL;
+  u32 tci = va_arg (*args, u32);
+  sctp_connection_t *tc = sctp_listener_get (tci);
+  return format (s, "%U", format_sctp_connection_id, tc);
 }
 
 void
-sctp_timer_init_handler (u32 conn_index)
+sctp_expired_timers_cb (u32 conn_index, u32 timer_id)
 {
-  sctp_connection_t *tc;
+  sctp_connection_t *sctp_conn;
+
+  SCTP_DBG ("%s expired", sctp_timer_to_string (timer_id));
 
-  tc = sctp_connection_get (conn_index, vlib_get_thread_index ());
+  sctp_conn = sctp_connection_get (conn_index, vlib_get_thread_index ());
   /* note: the connection may have already disappeared */
-  if (PREDICT_FALSE (tc == 0))
+  if (PREDICT_FALSE (sctp_conn == 0))
     return;
-  ASSERT (tc->state == SCTP_STATE_COOKIE_ECHOED);
-  /* Start cleanup. App wasn't notified yet so use delete notify as
-   * opposed to delete to cleanup session layer state. */
-  stream_session_delete_notify (&tc->
-                               sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection);
-  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].timers[SCTP_TIMER_T1_INIT] =
-    SCTP_TIMER_HANDLE_INVALID;
 
-  sctp_connection_cleanup (tc);
-}
+  if (sctp_conn->sub_conn[conn_index].unacknowledged_hb >
+      SCTP_PATH_MAX_RETRANS)
+    {
+      // The remote-peer is considered to be unreachable hence shutting down
+      u8 i, total_subs_down = 1;
+      for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+       {
+         if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
+           continue;
+
+         u32 now = sctp_time_now ();
+         if (now > (sctp_conn->sub_conn[i].last_seen + SCTP_HB_INTERVAL))
+           {
+             total_subs_down += 1;
+             sctp_conn->sub_conn[i].state = SCTP_SUBCONN_STATE_DOWN;
+           }
+       }
+
+      if (total_subs_down == MAX_SCTP_CONNECTIONS)
+       {
+         /* Start cleanup. App wasn't notified yet so use delete notify as
+          * opposed to delete to cleanup session layer state. */
+         stream_session_delete_notify (&sctp_conn->sub_conn
+                                       [SCTP_PRIMARY_PATH_IDX].connection);
+
+         sctp_connection_timers_reset (sctp_conn);
+
+         sctp_connection_cleanup (sctp_conn);
+       }
+      return;
+    }
 
-/* *INDENT OFF* */
-static timer_expiration_handler *sctp_timer_expiration_handlers[SCTP_N_TIMERS]
-  = {
-  sctp_timer_init_handler
-};
+  switch (timer_id)
+    {
+    case SCTP_TIMER_T1_INIT:
+      sctp_send_init (sctp_conn);
+      break;
+    case SCTP_TIMER_T1_COOKIE:
+      sctp_send_cookie_echo (sctp_conn);
+      break;
+    case SCTP_TIMER_T2_SHUTDOWN:
+      sctp_send_shutdown (sctp_conn);
+      break;
+    case SCTP_TIMER_T3_RXTX:
+      sctp_timer_reset (sctp_conn, conn_index, timer_id);
+      sctp_conn->flags |= SCTP_CONN_RECOVERY;
+      sctp_data_retransmit (sctp_conn);
+      break;
+    case SCTP_TIMER_T4_HEARTBEAT:
+      sctp_timer_reset (sctp_conn, conn_index, timer_id);
+      goto heartbeat;
+    }
+  return;
 
-/* *INDENT ON* */
+heartbeat:
+  sctp_send_heartbeat (sctp_conn);
+}
 
 static void
 sctp_expired_timers_dispatch (u32 * expired_timers)
@@ -656,8 +788,10 @@ sctp_expired_timers_dispatch (u32 * expired_timers)
       connection_index = expired_timers[i] & 0x0FFFFFFF;
       timer_id = expired_timers[i] >> 28;
 
+      SCTP_DBG ("Expired timer ID: %u", timer_id);
+
       /* Handle expiration */
-      (*sctp_timer_expiration_handlers[timer_id]) (connection_index);
+      sctp_expired_timers_cb (connection_index, timer_id);
     }
 }
 
@@ -683,7 +817,7 @@ sctp_main_enable (vlib_main_t * vm)
   clib_error_t *error = 0;
   u32 num_threads;
   int thread;
-  sctp_connection_t *tc __attribute__ ((unused));
+  sctp_connection_t *sctp_conn __attribute__ ((unused));
   u32 preallocated_connections_per_thread;
 
   if ((error = vlib_call_init_function (vm, ip_main_init)))
@@ -743,6 +877,7 @@ sctp_main_enable (vlib_main_t * vm)
 
   if (num_threads > 1)
     {
+      clib_spinlock_init (&tm->half_open_lock);
     }
 
   vec_validate (tm->tx_frames[0], num_threads - 1);
@@ -778,16 +913,25 @@ sctp_enable_disable (vlib_main_t * vm, u8 is_en)
 transport_connection_t *
 sctp_half_open_session_get_transport (u32 conn_index)
 {
-  sctp_connection_t *tc = sctp_half_open_connection_get (conn_index);
-  return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection;
+  sctp_connection_t *sctp_conn = sctp_half_open_connection_get (conn_index);
+  return &sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection;
 }
 
 u8 *
 format_sctp_half_open (u8 * s, va_list * args)
 {
   u32 tci = va_arg (*args, u32);
-  sctp_connection_t *tc = sctp_half_open_connection_get (tci);
-  return format (s, "%U", format_sctp_connection_id, tc);
+  sctp_connection_t *sctp_conn = sctp_half_open_connection_get (tci);
+  return format (s, "%U", format_sctp_connection_id, sctp_conn);
+}
+
+void
+sctp_update_time (f64 now, u8 thread_index)
+{
+  sctp_set_time_now (thread_index);
+  tw_timer_expire_timers_16t_2w_512sl (&sctp_main.timer_wheels[thread_index],
+                                      now);
+  sctp_flush_frames_to_output (thread_index);
 }
 
 /* *INDENT OFF* */
@@ -801,13 +945,15 @@ const static transport_proto_vft_t sctp_proto = {
   .push_header = sctp_push_header,
   .send_mss = sctp_session_send_mss,
   .send_space = sctp_session_send_space,
-  .tx_fifo_offset = NULL,      //sctp_session_tx_fifo_offset,
+  .update_time = sctp_update_time,
   .get_connection = sctp_session_get_transport,
   .get_listener = sctp_session_get_listener,
   .get_half_open = sctp_half_open_session_get_transport,
   .format_connection = format_sctp_session,
   .format_listener = format_sctp_listener_session,
   .format_half_open = format_sctp_half_open,
+  .tx_type = TRANSPORT_TX_DEQUEUE,
+  .service_type = TRANSPORT_SERVICE_VC,
 };
 
 /* *INDENT ON* */
@@ -834,11 +980,36 @@ sctp_init (vlib_main_t * vm)
   transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto,
                               FIB_PROTOCOL_IP6, sctp6_output_node.index);
 
+  sctp_api_reference ();
+
   return 0;
 }
 
 VLIB_INIT_FUNCTION (sctp_init);
 
+static clib_error_t *
+show_sctp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
+                  vlib_cli_command_t * cmd_arg)
+{
+  sctp_main_t *tm = &sctp_main;
+  if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+                             input);
+  vlib_cli_output (vm, "IPv4 UDP punt: %s",
+                  tm->punt_unknown4 ? "enabled" : "disabled");
+  vlib_cli_output (vm, "IPv6 UDP punt: %s",
+                  tm->punt_unknown6 ? "enabled" : "disabled");
+  return 0;
+}
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
+{
+  .path = "show sctp punt",
+  .short_help = "show sctp punt",
+  .function = show_sctp_punt_fn,
+};
+/* *INDENT-ON* */
+
 /*
  * fd.io coding-style-patch-verification: ON
  *